pstrf.hpp
Go to the documentation of this file.
1 /*!
2  * \brief Cholesky Decompositions for a positive semi-definite Matrix A = LL^T
3  *
4  *
5  * \author O. Krause
6  * \date 2016
7  *
8  * \par Copyright 1995-2015 Shark Development Team
9  *
10  * <BR><HR>
11  * This file is part of Shark.
12  * <http://image.diku.dk/shark/>
13  *
14  * Shark is free software: you can redistribute it and/or modify
15  * it under the terms of the GNU Lesser General Public License as published
16  * by the Free Software Foundation, either version 3 of the License, or
17  * (at your option) any later version.
18  *
19  * Shark is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22  * GNU Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public License
25  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
26  *
27  */
28 
29 #ifndef REMORA_KERNELS_DEFAULT_PSTRF_HPP
30 #define REMORA_KERNELS_DEFAULT_PSTRF_HPP
31 
32 #include "../gemm.hpp" //gemm kernel
33 #include "../gemv.hpp" //gemv kernel
34 #include "simple_proxies.hpp"
35 #include "../../vector.hpp"//matrix_transpose, matrix_range
36 #include <algorithm>
37 namespace remora{namespace bindings {
38 
39 template<class MatA, class VecP>
40 std::size_t pstrf(
41  matrix_expression<MatA, cpu_tag> &A,
42  vector_expression<VecP, cpu_tag>& P,
43  lower
44 ){
45  //The normal cholesky decomposition works by
46  // partitioning A in 4 blocks.
47  // |A11 | A12
48  //A^(k)=|-----------
49  // |A21 | A^(k+1)
50  // First, the cholesky decomposition of A is computed, after which
51  // bloc A12 is computed by solving a system of equations.
52  // Lastly, the (expensive) operation
53  // A^(k+1) -= A21 A21^T
54  // is performed.
55  // When we suspect that A is rank deficient, this does not work
56  // as we might run in the case that A11 is rank deficient which makes
57  // updating A21 impossible.
58  // instead, in every iteration we first pick the row/column with the
59  // current largest diagonal element, permute the matrix, so that
60  // this row/column is in the blocks A11 and A12 and update everything
61  //step by step until we have a full block, which makes it possible
62  // to perform the expensive
63  // // A^(k+1) -= A21 A21^T
64  // using efficient routines.
65 
66 
67  //todo: experiment a bit with the sizes
68  std::size_t block_size = 20;
69 
70 
71  size_t m = A().size1();
72  //storage for pivot values
73  vector<typename MatA::value_type> pivotValues(m);
74 
75  //stopping criterion
76  double max_diag = A()(0,0);
77  for(std::size_t i = 1; i < m; ++i)
78  max_diag = std::max(max_diag,std::abs(A()(i,i)));
79  double epsilon = m * m * std::numeric_limits<typename MatA::value_type>::epsilon() * max_diag;
80 
81  for(std::size_t k = 0; k < m; k += block_size){
82  std::size_t currentSize = std::min(m-k,block_size);//last iteration is smaller
83  //partition of the matrix
84  auto Ak = simple_subrange(A,k,m,k,m);
85  auto pivots = simple_subrange(pivotValues,k,m);
86 
87  //update current block L11
88  for(std::size_t j = 0; j != currentSize; ++j){
89  //we have to dynamically update the pivot values
90  //we start every block anew to prevent accumulation of rounding errors
91  if(j == 0){
92  for(std::size_t i = 0; i != m-k; ++i)
93  pivots(i) = Ak(i,i);
94  }else{//update pivot values
95  for(std::size_t i = j; i != m-k; ++i)
96  pivots(i) -= Ak(i,j-1) * Ak(i,j-1);
97  }
98  //get current pivot. if it is not equal to the j-th, we swap rows and columns
99  //such that j == pivot and Ak(j,j) = pivots(pivot)
100  std::size_t pivot = std::max_element(pivots.begin()+j,pivots.end())-pivots.begin();
101  if(pivot != j){
102  P()(k+j) = (int)(pivot+k);
103  A().swap_rows(k+j,k+pivot);
104  A().swap_columns(k+j,k+pivot);
105  std::swap(pivots(j),pivots(pivot));
106  }
107 
108  //check whether we are finished
109  auto pivotValue = pivots(j);
110  if(pivotValue < epsilon){
111  //the remaining part is so near 0, we can just ignore it
112  simple_subrange(Ak,j,m-k,j,m-k).clear();
113  return k+j;
114  }
115 
116  //update current column
117  Ak(j,j) = std::sqrt(pivotValue);
118  //the last updates of columns k...k+j-1 did not change
119  //this row, so do it now
120  auto curCol = simple_column(Ak,j);
121  auto colLowerPart = simple_subrange(curCol,j+1,m-k);
122  if(j > 0){
123  //the last updates of columns 0,1,...,j-1 did not change
124  //this column, so do it now
125  auto blockLL = simple_subrange(Ak,j+1,m-k,0,j);
126  auto curRow = simple_row(Ak,j);
127  auto rowLeftPart = simple_subrange(curRow,0,j);
128 
129  //suppose you are the j-th column
130  //than you want to get updated by the last
131  //outer products which are induced by your column friends
132  //Li...Lj-1
133  //so you get the effect of
134  //(L1L1T)^(j)+...+(Lj-1Lj-1)^(j)
135  //=L1*L1j+L2*L2j+L3*L3j...
136  //which is a matrix-vector product
137  kernels::gemv(blockLL,rowLeftPart,colLowerPart,-1);
138  }
139  colLowerPart /= Ak(j,j);
140  //set block L12 to 0
141  subrange(Ak,j,j+1,j+1,Ak.size2()).clear();
142  }
143  if(k+currentSize < m){
144  auto blockLL = simple_subrange(Ak, block_size, m-k, 0, block_size);
145  auto blockLR = simple_subrange(Ak, block_size, m-k, block_size, m-k);
146  kernels::gemm(blockLL,simple_trans(blockLL), blockLR, -1);
147  }
148  }
149  return m;
150 
151 }
152 
153 template<class MatA, class VecP>
154 std::size_t pstrf(
155  matrix_expression<MatA, cpu_tag> &A,
156  vector_expression<VecP, cpu_tag>& P,
157  upper
158 ){
159  auto transA = simple_trans(A);
160  return pstrf(transA,P,lower());
161 }
162 
163 }}
164 #endif