potrf.hpp
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief Implements the default implementation of the POTRF algorithm
5  *
6  * \author O. Krause
7  * \date 2016
8  *
9  *
10  * \par Copyright 1995-2014 Shark Development Team
11  *
12  * <BR><HR>
13  * This file is part of Shark.
14  * <http://image.diku.dk/shark/>
15  *
16  * Shark is free software: you can redistribute it and/or modify
17  * it under the terms of the GNU Lesser General Public License as published
18  * by the Free Software Foundation, either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * Shark is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28  *
29  */
30 #ifndef REMORA_KERNELS_DEFAULT_POTRF_HPP
31 #define REMORA_KERNELS_DEFAULT_POTRF_HPP
32 
33 #include "simple_proxies.hpp"
34 #include "../trsm.hpp" //trsm kernel
35 #include "../syrk.hpp" //syrk kernel
36 #include <type_traits> //std::false_type marker for unoptimized
37 
38 namespace remora{namespace bindings {
39 
40 //diagonal block kernels
41 //upper potrf(row-major)
42 template<class MatA>
43 std::size_t potrf_block(
44  matrix_expression<MatA, cpu_tag>& A,
45  row_major, lower
46 ) {
47  std::size_t m = A().size1();
48  for(size_t j = 0; j < m; j++) {
49  for(size_t i = j; i < m; i++) {
50  double s = A()(i, j);
51  for(size_t k = 0; k < j; k++) {
52  s -= A()(i, k) * A()(j, k);
53  }
54  if(i == j) {
55  if(s <= 0)
56  return i+1;
57  A()(i, j) = std::sqrt(s);
58  } else {
59  A()(i, j) = s / A()(j , j);
60  }
61  }
62  }
63  return 0;
64 }
65 
66 //lower potrf(row-major)
67 template<class MatA>
68 std::size_t potrf_block(
69  matrix_expression<MatA, cpu_tag>& A,
70  row_major, upper
71 ) {
72  std::size_t m = A().size1();
73  for(size_t i = 0; i < m; i++) {
74  double& Aii = A()(i, i);
75  if(Aii < 0)
76  return i+1;
77  using std::sqrt;
78  Aii = sqrt(Aii);
79  //update row
80 
81  for(std::size_t j = i + 1; j < m; ++j) {
82  A()(i, j) /= Aii;
83  }
84  //rank-one update
85  for(size_t k = i + 1; k < m; k++) {
86  for(std::size_t j = k; j < m; ++j) {
87  A()(k, j) -= A()(i, k) * A()(i, j);
88  }
89  }
90  }
91  return 0;
92 }
93 
94 
95 //dispatcher for column major
96 template<class MatA, class Triangular>
97 std::size_t potrf_block(
98  matrix_expression<MatA, cpu_tag>& A,
99  column_major, Triangular
100 ) {
101  auto Atrans = simple_trans(A);
102  return potrf_block(Atrans, row_major(), typename Triangular::transposed_orientation());
103 }
104 
105 //main kernel for large matrices
106 template <typename MatA>
107 std::size_t potrf_recursive(
108  matrix_expression<MatA, cpu_tag>& Afull,
109  std::size_t start,
110  std::size_t end,
111  lower
112 ){
113  std::size_t block_size = 32;
114  auto A = simple_subrange(Afull,start,end,start,end);
115  std::size_t size = A.size1();
116  //if the matrix is small enough call the computation kernel directly for the block
117  if(size <= block_size){
118  return potrf_block(A,typename MatA::orientation(), lower());
119  }
120  std::size_t numBlocks = (A.size1()+block_size-1)/block_size;
121  std::size_t split = numBlocks/2*block_size;
122 
123 
124  //otherwise run the kernel recursively
125  std::size_t result = potrf_recursive(Afull,start,start+split,lower());
126  if(result) return result;
127 
128  auto Aul = simple_subrange(A,0,split,0,split);
129  auto All = simple_subrange(A,split,size,0,split);
130  auto Alr = simple_subrange(A,split,size,split,size);
131  kernels::trsm<upper,right>(simple_trans(Aul), All );
132  kernels::syrk<false>(All,Alr, -1.0);
133  return potrf_recursive(Afull,start+split,end,lower());
134 }
135 
136 template <typename MatA>
137 std::size_t potrf_recursive(
138  matrix_expression<MatA, cpu_tag>& A,
139  std::size_t start,
140  std::size_t end,
141  upper
142 ){
143  auto Atrans = simple_trans(A);
144  return potrf_recursive(Atrans,start,end,lower());
145 }
146 
147 //dispatcher
148 template <class Triangular, typename MatA>
149 std::size_t potrf(
150  matrix_container<MatA, cpu_tag>& A,
151  std::false_type//unoptimized
152 ){
153  REMORA_SIZE_CHECK(A().size1() == A().size2());
154  return potrf_recursive(A,0,A().size1(), Triangular());
155 }
156 
157 }}
158 #endif