trsm.hpp
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief -
6  *
7  * \author O. Krause
8  * \date 2017
9  *
10  *
11  * \par Copyright 1995-2015 Shark Development Team
12  *
13  * <BR><HR>
14  * This file is part of Shark.
15  * <http://image.diku.dk/shark/>
16  *
17  * Shark is free software: you can redistribute it and/or modify
18  * it under the terms of the GNU Lesser General Public License as published
19  * by the Free Software Foundation, either version 3 of the License, or
20  * (at your option) any later version.
21  *
22  * Shark is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25  * GNU Lesser General Public License for more details.
26  *
27  * You should have received a copy of the GNU Lesser General Public License
28  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
29  *
30  */
31 //===========================================================================
32 #ifndef REMORA_KERNELS_CLBLAST_TRSM_HPP
33 #define REMORA_KERNELS_CLBLAST_TRSM_HPP
34 
35 #include "../../expression_types.hpp"
36 #include "../../detail/traits.hpp"
37 #include <clblast.h>
38 #include "../../detail/matrix_proxy_classes.hpp"
39 namespace remora{ namespace kernels{
40 
41 
42 template <class Triangular, typename MatA, typename MatB>
43 void trsm_impl(
44  matrix_expression<MatA, gpu_tag> const& A,
45  matrix_expression<MatB, gpu_tag>& B,
46  Triangular,
47  left
48 ){
49  REMORA_SIZE_CHECK(A().size1() == A().size2());
50  REMORA_SIZE_CHECK(A().size2() == B().size1());
51 
52  static_assert(std::is_same<typename MatA::value_type, typename MatB::value_type>::value, "[trsm] Arguments do not have same element type");
53  static_assert(std::is_same<typename MatA::evaluation_category::tag, dense_tag>::value, "[trsm] A is not dense");
54  static_assert(std::is_base_of<dense_tag, typename MatB::storage_type::storage_tag>::value, "[trsm] B does not have dense storage layout");
55 
56  //pre-evaluate A into a temporary if necessary
57  auto const& Aeval = eval_expression(A);
58 
59  using namespace clblast;
60 
61  //obtain geometry information
62  auto transA = std::is_same<typename MatA::orientation,typename MatB::orientation>::value? Transpose::kNo : Transpose::kYes;
63  auto layout = std::is_same<typename MatB::orientation::orientation, row_major>::value? Layout::kRowMajor : Layout::kColMajor;
64  auto diagonal = Triangular::is_unit? Diagonal::kUnit : Diagonal::kNonUnit;
65  auto triangular = Triangular::is_upper? Triangle::kUpper : Triangle::kLower;
66  if(transA == Transpose::kYes){//when we transpose the matrix, we also have to change its Triangular type
67  triangular = Triangular::is_upper? Triangle::kLower : Triangle::kUpper;
68  }
69  std::size_t m = B().size1();
70  std::size_t n = B().size2();
71 
72  //obtain raw storage
73  auto storageA = Aeval.raw_storage();
74  auto storageB = B().raw_storage();
75 
76  cl_event* event = nullptr;//todo: store events for out-of-order queues
77  auto code = Trsm(layout, Side::kLeft, triangular, transA, diagonal,
78  m, n, typename MatB::value_type(1),
79  storageA.buffer.get(), storageA.offset, storageA.leading_dimension,
80  storageB.buffer.get(), storageB.offset, storageB.leading_dimension,
81  &B().queue().get(), event
82  );
83  assert(code == StatusCode::kSuccess);
84 }
85 
86 
87 template <class Triangular, typename MatA, typename MatB>
88 void trsm_impl(
89  matrix_expression<MatA, gpu_tag> const& A,
90  matrix_expression<MatB, gpu_tag>& B,
91  Triangular,
92  right
93 ){
94  matrix_transpose<typename const_expression<MatA>::type> transA(A());
95  matrix_transpose<MatB> transB(B());
96  trsm_impl(transA, transB, typename Triangular::transposed_orientation(), left());
97 }
98 
99 // solve AX = B or XA=B with A being triangular
100 template <class Triangular, class Side, typename MatA, typename MatB>
101 void trsm(
102  matrix_expression<MatA, gpu_tag> const& A,
103  matrix_expression<MatB, gpu_tag>& B
104 ){
105  trsm_impl(A,B,Triangular(),Side());
106 }
107 
108 }}
109 
110 #endif