28 #ifndef REMORA_DECOMPOSITIONS_HPP 29 #define REMORA_DECOMPOSITIONS_HPP 45 template<
class D,
class Device>
46 struct solver_expression{
47 typedef Device device_type;
49 D
const& operator()()
const {
50 return *
static_cast<D const*
>(
this);
54 return *
static_cast<D*
>(
this);
71 template<
class MatrixStorage>
72 class cholesky_decomposition:
73 public solver_expression<
74 cholesky_decomposition<MatrixStorage>,
75 typename MatrixStorage::device_type
78 typedef typename MatrixStorage::value_type value_type;
80 typedef typename MatrixStorage::device_type device_type;
81 cholesky_decomposition(){}
83 cholesky_decomposition(matrix_expression<E,device_type>
const& e):m_cholesky(e){
84 kernels::potrf<lower>(m_cholesky);
88 void decompose(matrix_expression<E,device_type>
const& e){
89 m_cholesky.resize(e().size1(), e().size2());
90 noalias(m_cholesky) = e;
91 kernels::potrf<lower>(m_cholesky);
94 MatrixStorage
const& lower_factor()
const{
98 auto upper_factor()const -> decltype(trans(
std::declval<MatrixStorage const&>())){
99 return trans(m_cholesky);
103 void solve(matrix_expression<MatB,device_type>& B, left)
const{
104 kernels::trsm<lower,left >(lower_factor(),B);
105 kernels::trsm<upper,left >(upper_factor(),B);
108 void solve(matrix_expression<MatB,device_type>& B, right)
const{
109 kernels::trsm<upper,right >(upper_factor(),B);
110 kernels::trsm<lower,right >(lower_factor(),B);
112 template<
class VecB,
bool Left>
113 void solve(vector_expression<VecB,device_type>&b, system_tag<Left>)
const{
114 kernels::trsv<lower, left>(lower_factor(),b);
115 kernels::trsv<upper,left>(upper_factor(),b);
134 void update(value_type alpha, value_type
beta, vector_expression<VecV,device_type>
const& v){
136 m_cholesky *= std::sqrt(alpha);
140 std::size_t n = v().size();
141 auto& L = m_cholesky;
142 typename vector_temporary<VecV>::type temp = v;
143 double beta_prime = 1;
144 double a = std::sqrt(alpha);
145 for(std::size_t j=0; j != n; ++j)
147 double Ljj = a * L(j,j);
148 double dj = Ljj * Ljj;
150 double swj2 = beta * wj * wj;
151 double gamma = dj * beta_prime + swj2;
153 double x = dj + swj2/beta_prime;
155 throw std::invalid_argument(
"[cholesky_decomposition::update] update makes matrix indefinite, no update available");
156 double nLjj = std::sqrt(x);
158 beta_prime += swj2/dj;
163 subrange(column(L,j),j+1,n) *= a;
164 noalias(subrange(temp,j+1,n)) -= (wj/Ljj) * subrange(column(L,j),j+1,n);
167 subrange(column(L,j),j+1,n) *= nLjj/Ljj;
168 noalias(subrange(column(L,j),j+1,n))+= (nLjj * beta*wj/gamma)*subrange(temp,j+1,n);
173 template<
typename Archive>
174 void serialize( Archive & ar,
const std::size_t version ) {
178 MatrixStorage m_cholesky;
187 template<
class MatrixStorage>
188 class symm_eigenvalue_decomposition:
189 public solver_expression<
190 symm_eigenvalue_decomposition<MatrixStorage>,
191 typename MatrixStorage::device_type
194 typedef typename MatrixStorage::value_type value_type;
195 typedef typename vector_temporary<MatrixStorage>::type VectorStorage;
197 typedef typename MatrixStorage::device_type device_type;
198 symm_eigenvalue_decomposition(){}
200 symm_eigenvalue_decomposition(matrix_expression<E,device_type>
const& e){
205 void decompose(matrix_expression<E,device_type>
const& e){
206 REMORA_SIZE_CHECK(e().size1() == e().size2());
207 m_eigenvectors.resize(e().size1(),e().size1());
208 m_eigenvalues.resize(e().size1());
209 noalias(m_eigenvectors) = e;
211 kernels::syev(m_eigenvectors,m_eigenvalues);
214 MatrixStorage
const& Q()
const{
215 return m_eigenvectors;
217 VectorStorage
const& D()
const{
218 return m_eigenvalues;
223 void solve(matrix_expression<MatB,device_type>& B, left)
const{
224 B() = Q() % to_diagonal(elem_inv(D()))% trans(Q()) % B;
227 void solve(matrix_expression<MatB,device_type>& B, right)
const{
228 auto transB = trans(B);
229 solve(transB,left());
232 void solve(vector_expression<VecB,device_type>&b, left)
const{
233 b() = Q() % safe_div(trans(Q()) % b,D() ,0.0);
237 void solve(vector_expression<VecB,device_type>&b, right)
const{
241 template<
typename Archive>
242 void serialize( Archive & ar,
const std::size_t version ) {
247 MatrixStorage m_eigenvectors;
248 VectorStorage m_eigenvalues;
254 template<
class MatrixStorage>
255 class pivoting_lu_decomposition:
256 public solver_expression<
257 pivoting_lu_decomposition<MatrixStorage>,
258 typename MatrixStorage::device_type
261 typedef typename MatrixStorage::device_type device_type;
263 pivoting_lu_decomposition(matrix_expression<E,device_type>
const& e)
264 :m_factor(e), m_permutation(e().size1()){
265 kernels::getrf(m_factor,m_permutation);
268 MatrixStorage
const& factor()
const{
272 permutation_matrix
const& permutation()
const{
273 return m_permutation;
277 void solve(matrix_expression<MatB,device_type>& B, left)
const{
278 swap_rows(m_permutation,B);
279 kernels::trsm<unit_lower,left>(m_factor,B);
280 kernels::trsm<upper,left>(m_factor,B);
283 void solve(matrix_expression<MatB,device_type>& B, right)
const{
284 kernels::trsm<upper,right>(m_factor,B);
285 kernels::trsm<unit_lower,right>(m_factor,B);
286 swap_columns_inverted(m_permutation,B);
289 void solve(vector_expression<VecB,device_type>& b, left)
const{
290 swap_rows(m_permutation,b);
291 kernels::trsv<unit_lower,left>(m_factor,b);
292 kernels::trsv<upper,left>(m_factor,b);
295 void solve(vector_expression<VecB,device_type>& b, right)
const{
296 kernels::trsv<upper,right>(m_factor,b);
297 kernels::trsv<unit_lower,right>(m_factor,b);
298 swap_rows_inverted(m_permutation,b);
301 MatrixStorage m_factor;
302 permutation_matrix m_permutation;
326 template<
class MatrixStorage>
327 class symm_pos_semi_definite_solver:
328 public solver_expression<symm_pos_semi_definite_solver<MatrixStorage>, typename MatrixStorage::device_type>{
330 typedef typename MatrixStorage::device_type device_type;
332 symm_pos_semi_definite_solver(matrix_expression<E,device_type>
const& e,
double max_conditioning = 0.0)
333 :m_factor(e), m_permutation(e().size1()){
334 m_rank = kernels::pstrf<lower>(m_factor,m_permutation);
335 if(m_rank == e().size1())
return;
336 if(max_conditioning > 0){
337 while(m_factor(0,0) > max_conditioning * m_factor(m_rank-1,m_rank -1))
341 auto L = columns(m_factor,0,m_rank);
342 m_cholesky.decompose(trans(L) % L);
345 std::size_t rank()
const{
353 void compute_inverse_factor(matrix_expression<Mat,device_type>& C)
const{
354 REMORA_SIZE_CHECK(C().size1() == m_rank);
355 REMORA_SIZE_CHECK(C().size2() == m_factor.size1());
356 if(m_rank == m_factor.size1()){
358 noalias(C) = identity_matrix<double>( m_factor.size1());
359 swap_columns_inverted(m_permutation,C);
360 kernels::trsm<lower,left>(m_factor,C);
362 auto L = columns(m_factor,0,m_rank);
363 noalias(C) = trans(L);
364 m_cholesky.solve(C,left());
365 swap_columns_inverted(m_permutation,C);
370 void solve(matrix_expression<MatB,device_type>& B, left)
const{
371 swap_rows(m_permutation,B);
374 }
else if(m_rank == m_factor.size1()){
375 kernels::trsm<lower,left>(m_factor,B);
376 kernels::trsm<upper,left>(trans(m_factor),B);
378 auto L = columns(m_factor,0,m_rank);
379 auto Z = eval_block(trans(L) % B);
380 m_cholesky.solve(Z,left());
381 m_cholesky.solve(Z,left());
384 swap_rows_inverted(m_permutation,B);
387 void solve(matrix_expression<MatB,device_type>& B, right)
const{
389 auto transB = trans(B);
390 solve(transB, left());
392 template<
class VecB,
bool Left>
393 void solve(vector_expression<VecB,device_type>& b, system_tag<Left>)
const{
394 swap_rows(m_permutation,b);
397 }
else if(m_rank == m_factor.size1()){
398 kernels::trsv<lower,left >(m_factor,b);
399 kernels::trsv<upper,left >(trans(m_factor),b);
401 auto L = columns(m_factor,0,m_rank);
402 auto z = eval_block(prod(trans(L),b));
403 m_cholesky.solve(z,left());
404 m_cholesky.solve(z,left());
405 noalias(b) = prod(L,z);
407 swap_rows_inverted(m_permutation,b);
411 MatrixStorage m_factor;
412 cholesky_decomposition<MatrixStorage> m_cholesky;
413 permutation_matrix m_permutation;
417 class cg_solver:
public solver_expression<cg_solver<MatA>, typename MatA::device_type>{
419 typedef typename MatA::const_closure_type matrix_closure_type;
420 typedef typename MatA::device_type device_type;
421 cg_solver(matrix_closure_type
const& e,
double epsilon = 1.e-10,
unsigned int max_iterations = 0)
422 :m_expression(e), m_epsilon(epsilon), m_max_iterations(max_iterations){}
426 matrix_expression<MatB,device_type>& B, left,
427 double epsilon,
unsigned max_iterations
429 typename matrix_temporary<MatB>::type X = B;
430 cg(m_expression,X, B, epsilon, max_iterations);
435 matrix_expression<MatB,device_type>& B, right,
436 double epsilon,
unsigned max_iterations
438 auto transB = trans(B);
439 typename transposed_matrix_temporary<MatB>::type X = transB;
440 cg(m_expression,X,transB, epsilon, max_iterations);
443 template<
class VecB,
bool Left>
445 vector_expression<VecB,device_type>&b, system_tag<Left>,
446 double epsilon,
unsigned max_iterations
448 typename vector_temporary<VecB>::type x = b;
449 cg(m_expression,x,b,epsilon,max_iterations);
453 template<
class VecB,
bool Left>
454 void solve(vector_expression<VecB,device_type>&b, system_tag<Left>
tag)
const{
455 solve(b, tag, m_epsilon, m_max_iterations);
457 template<
class MatB,
bool Left>
458 void solve(matrix_expression<MatB,device_type>&B, system_tag<Left>
tag)
const{
459 solve(B, tag, m_epsilon, m_max_iterations);
462 template<
class MatT,
class VecB,
class VecX>
464 matrix_expression<MatT, device_type>
const& A,
465 vector_expression<VecX, device_type>& x,
466 vector_expression<VecB, device_type>
const& b,
468 unsigned int max_iterations
470 REMORA_SIZE_CHECK(A().size1() == A().size2());
471 REMORA_SIZE_CHECK(A().size1() == b().size());
472 REMORA_SIZE_CHECK(A().size1() == x().size());
473 typedef typename vector_temporary<VecX>::type vector_type;
475 std::size_t dim = b().size();
478 vector_type residual = b - prod(A,x);
480 if(norm_inf(residual) > norm_inf(b)){
484 vector_type next_residual(dim);
485 vector_type p = residual;
488 for(std::size_t iter = 0;; ++iter){
489 if(max_iterations != 0 && iter >= max_iterations)
break;
490 noalias(Ap) = prod(A,p);
491 double rsqr = norm_sqr(residual);
492 double alpha = rsqr/inner_prod(p,Ap);
493 noalias(x) += alpha * p;
494 noalias(next_residual) = residual - alpha * Ap;
495 if(norm_inf(next_residual) < epsilon)
498 double beta = inner_prod(next_residual,next_residual)/rsqr;
500 noalias(p) += next_residual;
501 swap(residual,next_residual);
505 template<
class MatT,
class MatB,
class MatX>
507 matrix_expression<MatT, device_type>
const& A,
508 matrix_expression<MatX, device_type>& X,
509 matrix_expression<MatB, device_type>
const& B,
511 unsigned int max_iterations
513 REMORA_SIZE_CHECK(A().size1() == A().size2());
514 REMORA_SIZE_CHECK(A().size1() == B().size1());
515 REMORA_SIZE_CHECK(A().size1() == X().size1());
516 REMORA_SIZE_CHECK(B().size2() == X().size2());
517 typedef typename vector_temporary<MatX>::type vector_type;
518 typedef typename matrix_temporary<MatX>::type matrix_type;
520 std::size_t dim = B().size1();
521 std::size_t num_rhs = B().size2();
524 matrix_type residual = B - prod(A,X);
526 for(std::size_t i = 0; i != num_rhs; ++i){
527 if(norm_inf(column(residual,i)) <= norm_inf(column(residual,i))){
529 noalias(column(residual,i)) = column(B,i);
533 vector_type next_residual(dim);
534 matrix_type P = residual;
535 matrix_type AP(dim, num_rhs);
537 for(std::size_t iter = 0;; ++iter){
538 if(max_iterations != 0 && iter >= max_iterations)
break;
540 noalias(AP) = prod(A,P);
542 for(std::size_t i = 0; i != num_rhs; ++i){
543 auto r = column(residual,i);
546 if(norm_inf(r) < epsilon)
continue;
548 auto x = column(X,i);
549 auto p = column(P,i);
550 auto Ap = column(AP,i);
551 double rsqr = norm_sqr(r);
552 double alpha = rsqr/inner_prod(p,Ap);
553 noalias(x) += alpha * p;
554 noalias(next_residual) = r - alpha * Ap;
555 double beta = inner_prod(next_residual,next_residual)/rsqr;
557 noalias(p) += next_residual;
558 noalias(r) = next_residual;
561 if(max(abs(residual)) < epsilon)
566 matrix_closure_type m_expression;
568 unsigned int m_max_iterations;
575 struct symm_pos_def{
typedef symm_pos_def transposed_orientation;};
576 struct symm_semi_pos_def{
typedef symm_semi_pos_def transposed_orientation;};
577 struct indefinite_full_rank{
typedef indefinite_full_rank transposed_orientation;};
578 struct conjugate_gradient{
579 typedef conjugate_gradient transposed_orientation;
581 unsigned max_iterations;
582 conjugate_gradient(
double epsilon = 1.e-10,
unsigned max_iterations = 0)
583 :epsilon(epsilon), max_iterations(max_iterations){}
588 template<
class MatA,
class SolverTag>
589 struct solver_traits;
591 template<
class MatA,
bool Upper,
bool Unit>
592 struct solver_traits<MatA,triangular_tag<Upper,Unit> >{
593 class type:
public solver_expression<type,typename MatA::device_type>{
595 typedef typename MatA::const_closure_type matrix_closure_type;
596 typedef typename MatA::device_type device_type;
597 type(matrix_closure_type
const& e, triangular_tag<Upper,Unit>):m_matrix(e){}
599 template<
class MatB,
bool Left>
600 void solve(matrix_expression<MatB, device_type>& B, system_tag<Left> ){
601 kernels::trsm<triangular_tag<Upper,Unit>,system_tag<Left> >(m_matrix,B);
603 template<
class VecB,
bool Left>
604 void solve(vector_expression<VecB, device_type>& b, system_tag<Left> ){
605 kernels::trsv<triangular_tag<Upper,Unit>,system_tag<Left> >(m_matrix,b);
608 matrix_closure_type m_matrix;
613 struct solver_traits<MatA,symm_pos_def>{
614 struct type :
public cholesky_decomposition<typename matrix_temporary<MatA>::type>{
616 type(M
const& m, symm_pos_def)
617 :cholesky_decomposition<typename matrix_temporary<MatA>::type>(m){}
622 struct solver_traits<MatA,indefinite_full_rank>{
623 struct type :
public pivoting_lu_decomposition<typename matrix_temporary<MatA>::type>{
625 type(M
const& m, indefinite_full_rank)
626 :pivoting_lu_decomposition<typename matrix_temporary<MatA>::type>(m){}
631 struct solver_traits<MatA,symm_semi_pos_def>{
632 struct type :
public symm_pos_semi_definite_solver<typename matrix_temporary<MatA>::type>{
634 type(M
const& m, symm_semi_pos_def)
635 :symm_pos_semi_definite_solver<typename matrix_temporary<MatA>::type>(m){}
640 struct solver_traits<MatA,conjugate_gradient>{
641 struct type :
public cg_solver<MatA>{
643 type(M
const& m, conjugate_gradient t):cg_solver<MatA>(m,t.epsilon,t.max_iterations){}
649 template<
class MatA,
class SolverType>
650 struct solver:
public detail::solver_traits<MatA,SolverType>::type{
652 solver(M
const& m, SolverType t = SolverType()): detail::solver_traits<MatA,SolverType>::type(m,t){}