30 #ifndef REMORA_KERNELS_DEFAULT_POTRF_HPP 31 #define REMORA_KERNELS_DEFAULT_POTRF_HPP 34 #include "../trsm.hpp" 35 #include "../syrk.hpp" 36 #include <type_traits> 38 namespace remora{
namespace bindings {
43 std::size_t potrf_block(
44 matrix_expression<MatA, cpu_tag>& A,
47 std::size_t m = A().size1();
48 for(
size_t j = 0; j < m; j++) {
49 for(
size_t i = j; i < m; i++) {
51 for(
size_t k = 0; k < j; k++) {
52 s -= A()(i, k) * A()(j, k);
57 A()(i, j) = std::sqrt(s);
59 A()(i, j) = s / A()(j , j);
68 std::size_t potrf_block(
69 matrix_expression<MatA, cpu_tag>& A,
72 std::size_t m = A().size1();
73 for(
size_t i = 0; i < m; i++) {
74 double& Aii = A()(i, i);
81 for(std::size_t j = i + 1; j < m; ++j) {
85 for(
size_t k = i + 1; k < m; k++) {
86 for(std::size_t j = k; j < m; ++j) {
87 A()(k, j) -= A()(i, k) * A()(i, j);
96 template<
class MatA,
class Triangular>
97 std::size_t potrf_block(
98 matrix_expression<MatA, cpu_tag>& A,
99 column_major, Triangular
101 auto Atrans = simple_trans(A);
102 return potrf_block(Atrans, row_major(),
typename Triangular::transposed_orientation());
106 template <
typename MatA>
107 std::size_t potrf_recursive(
108 matrix_expression<MatA, cpu_tag>& Afull,
113 std::size_t block_size = 32;
114 auto A = simple_subrange(Afull,start,end,start,end);
115 std::size_t size = A.size1();
117 if(size <= block_size){
118 return potrf_block(A,
typename MatA::orientation(), lower());
120 std::size_t numBlocks = (A.size1()+block_size-1)/block_size;
121 std::size_t split = numBlocks/2*block_size;
125 std::size_t result = potrf_recursive(Afull,start,start+split,lower());
126 if(result)
return result;
128 auto Aul = simple_subrange(A,0,split,0,split);
129 auto All = simple_subrange(A,split,size,0,split);
130 auto Alr = simple_subrange(A,split,size,split,size);
131 kernels::trsm<upper,right>(simple_trans(Aul), All );
132 kernels::syrk<false>(All,Alr, -1.0);
133 return potrf_recursive(Afull,start+split,end,lower());
136 template <
typename MatA>
137 std::size_t potrf_recursive(
138 matrix_expression<MatA, cpu_tag>& A,
143 auto Atrans = simple_trans(A);
144 return potrf_recursive(Atrans,start,end,lower());
148 template <
class Triangular,
typename MatA>
150 matrix_container<MatA, cpu_tag>& A,
153 REMORA_SIZE_CHECK(A().size1() == A().size2());
154 return potrf_recursive(A,0,A().size1(), Triangular());