31 #ifndef REMORA_KERNELS_DEFAULT_TRSM_HPP 32 #define REMORA_KERNELS_DEFAULT_TRSM_HPP 34 #include "../../expression_types.hpp" 35 #include "../../detail/structure.hpp" 37 #include "../gemm.hpp" 39 #include <type_traits> 41 namespace remora{
namespace bindings {
44 template<std::
size_t maxBlockSize1,std::
size_t maxBlockSize2,
bool Unit,
class MatA,
class MatB>
46 matrix_expression<MatA, cpu_tag>
const& A,
47 matrix_expression<MatB, cpu_tag> &B,
51 REMORA_SIZE_CHECK(A().size1() <= maxBlockSize1);
52 typedef typename MatA::value_type value_typeA;
53 typedef typename MatB::value_type value_typeB;
57 std::size_t size = A().size1();
58 value_typeA blockA[maxBlockSize1][maxBlockSize1];
59 for(std::size_t i = 0; i != size; ++i){
60 for(std::size_t j = 0; j <= i; ++j){
61 blockA[i][j] = A()(i,j);
66 value_typeB blockB[maxBlockSize2][maxBlockSize1];
67 std::size_t numBlocks = (B().size2()+maxBlockSize2-1)/maxBlockSize2;
68 for(std::size_t i = 0; i != numBlocks; ++i){
69 std::size_t startB= i*maxBlockSize2;
70 std::size_t curBlockSize2 =std::min(maxBlockSize2, B().size2() - startB);
73 for(std::size_t i = 0; i != size; ++i){
74 for(std::size_t k = 0; k != curBlockSize2; ++k){
75 blockB[k][i] = B()(i,startB+k);
79 for(std::size_t k = 0; k != curBlockSize2; ++k){
80 for (std::size_t i = 0; i != size; ++i) {
81 for (std::size_t j = 0; j != i; ++j) {
82 blockB[k][i] -= blockA[i][j]*blockB[k][j];
85 if(blockA[i][i] == value_typeA())
86 throw std::invalid_argument(
"[TRSM] Matrix is singular!");
87 blockB[k][i] /= blockA[i][i];
92 for(std::size_t i = 0; i != size; ++i){
93 for(std::size_t k = 0; k != curBlockSize2; ++k){
94 B()(i,startB+k) = blockB[k][i];
101 template<std::
size_t maxBlockSize1,std::
size_t maxBlockSize2,
bool Unit,
class MatA,
class MatB>
103 matrix_expression<MatA, cpu_tag>
const& A,
104 matrix_expression<MatB, cpu_tag>& B,
108 typedef typename MatA::value_type value_type;
110 std::size_t size = A().size1();
111 value_type blockA[maxBlockSize1][maxBlockSize1];
112 for(std::size_t i = 0; i != size; ++i){
113 for(std::size_t j = 0; j <= i; ++j){
114 blockA[i][j] = A()(i,j);
119 for(std::size_t k = 0; k != B().size2(); ++k){
120 for (std::size_t i = 0; i != size; ++i) {
121 for (std::size_t j = 0; j != i; ++j) {
122 B()(i,k) -= blockA[i][j] * B()(j,k);
125 if(blockA[i][i] == value_type())
126 throw std::invalid_argument(
"[TRSM] Matrix is singular!");
127 B()(i,k) /= blockA[i][i];
135 template<std::
size_t maxBlockSize1, std::
size_t maxBlockSize2,
bool Unit,
class MatA,
class MatB>
137 matrix_expression<MatA, cpu_tag>
const& A,
138 matrix_expression<MatB, cpu_tag> &B,
142 REMORA_SIZE_CHECK(A().size1() <= maxBlockSize1);
143 typedef typename MatA::value_type value_typeA;
144 typedef typename MatB::value_type value_typeB;
147 std::size_t size = A().size1();
148 value_typeA blockA[maxBlockSize1][maxBlockSize1];
149 for(std::size_t i = 0; i != size; ++i){
150 for(std::size_t j = i; j != size; ++j){
151 blockA[i][j] = A()(i,j);
155 value_typeB blockB[maxBlockSize2][maxBlockSize1];
156 std::size_t numBlocks = (B().size2()+maxBlockSize2-1)/maxBlockSize2;
157 for(std::size_t i = 0; i != numBlocks; ++i){
158 std::size_t startB= i*maxBlockSize2;
159 std::size_t curBlockSize2 =std::min(maxBlockSize2, B().size2() - startB);
162 for(std::size_t i = 0; i != size; ++i){
163 for(std::size_t k = 0; k != curBlockSize2; ++k){
164 blockB[k][i] = B()(i,startB+k);
168 for(std::size_t k = 0; k != curBlockSize2; ++k){
169 for (std::size_t n = 0; n != size; ++n) {
170 std::size_t i = size-n-1;
171 for (std::size_t j = i+1; j != size; ++j) {
172 blockB[k][i] -= blockA[i][j] * blockB[k][j];
175 if(blockA[i][i] == value_typeA())
176 throw std::invalid_argument(
"[TRSM] Matrix is singular!");
177 blockB[k][i] /= blockA[i][i];
182 for(std::size_t i = 0; i != size; ++i){
183 for(std::size_t j = 0; j != curBlockSize2; ++j){
184 B()(i,startB+j) = blockB[j][i];
191 template<std::
size_t maxBlockSize1,std::
size_t maxBlockSize2,
bool Unit,
class MatA,
class MatB>
193 matrix_expression<MatA, cpu_tag>
const& A,
194 matrix_expression<MatB, cpu_tag>& B,
198 typedef typename MatA::value_type value_type;
200 std::size_t size = A().size1();
201 value_type blockA[maxBlockSize1][maxBlockSize1];
202 for(std::size_t i = 0; i != size; ++i){
203 for(std::size_t j = i; j != size; ++j){
204 blockA[i][j] = A()(i,j);
209 for(std::size_t k = 0; k != B().size2(); ++k){
210 for (std::size_t n = 0; n != size; ++n) {
211 std::size_t i = size-n-1;
212 for (std::size_t j = i+1; j != size; ++j) {
213 B()(i,k) -= blockA[i][j] * B()(j,k);
216 if(blockA[i][i] == value_type())
217 throw std::invalid_argument(
"[TRSM] Matrix is singular!");
218 B()(i,k) /= blockA[i][i];
224 template <
typename MatA,
typename MatB,
class Triangular>
226 matrix_expression<MatA, cpu_tag>
const& Afull,
227 matrix_expression<MatB, cpu_tag> & Bfull,
233 static std::size_t
const Block_Size = 32;
234 std::size_t num_rhs = Bfull().size2();
235 auto A = simple_subrange(Afull,start,end,start,end);
236 auto B = simple_subrange(Bfull,start,end,0,num_rhs);
238 if(A.size1() <= Block_Size){
239 trsm_block<Block_Size,16,Triangular::is_unit>(A,B,triangular_tag<Triangular::is_upper,false>(),
typename MatB::orientation());
242 std::size_t size = A.size1();
243 std::size_t numBlocks =(A.size1()+Block_Size-1)/Block_Size;
244 std::size_t split = numBlocks/2*Block_Size;
245 auto Bfront = simple_subrange(B,0,split,0,num_rhs);
246 auto Bback = simple_subrange(B,split,size,0,num_rhs);
249 if(Triangular::is_upper){
250 trsm_recursive(Afull, Bfull,start+split,end, t, l);
251 kernels::gemm(simple_subrange(A,0,split,split,size), Bback, Bfront, -1.0);
252 trsm_recursive(Afull, Bfull,start,start+split, t, l);
254 trsm_recursive(Afull, Bfull,start,start+split, t, l);
255 kernels::gemm(simple_subrange(A,split,size,0,split), Bfront, Bback, -1.0);
256 trsm_recursive(Afull, Bfull,start+split,end, t, l);
260 template <
typename MatA,
typename MatB,
class Triangular>
262 matrix_expression<MatA, cpu_tag>
const& Afull,
263 matrix_expression<MatB, cpu_tag> & Bfull,
269 auto transA = simple_trans(Afull);
270 auto transB = simple_trans(Bfull);
271 trsm_recursive(transA,transB,start,end,
typename Triangular::transposed_orientation(),left());
275 template <
class Triangular,
class S
ide,
typename MatA,
typename MatB>
277 matrix_expression<MatA, cpu_tag>
const& A,
278 matrix_expression<MatB, cpu_tag>& B,
282 bindings::trsm_recursive(A,B,0,A().size1(), Triangular(), Side());