trmv.hpp
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  * \brief -
5  *
6  * \author O. Krause
7  * \date 2010
8  *
9  *
10  * \par Copyright 1995-2015 Shark Development Team
11  *
12  * <BR><HR>
13  * This file is part of Shark.
14  * <http://image.diku.dk/shark/>
15  *
16  * Shark is free software: you can redistribute it and/or modify
17  * it under the terms of the GNU Lesser General Public License as published
18  * by the Free Software Foundation, either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * Shark is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28  *
29  */
30 //===========================================================================
31 #ifndef REMORA_KERNELS_DEFAULT_TRMV_HPP
32 #define REMORA_KERNELS_DEFAULT_TRMV_HPP
33 
34 #include "../../expression_types.hpp" //vector/matrix_expression
35 #include "../../detail/matrix_proxy_classes.hpp" //matrix_range, matrix_row
36 #include "../../detail/vector_proxy_classes.hpp" //vector_range, dense_vector_adaptor
37 #include "../gemv.hpp" //gemv kernel
38 #include "../dot.hpp" //dot kernel
39 #include <type_traits> //std::false_type marker for unoptimized
40 namespace remora{ namespace bindings{
41 
42 //Lower triangular(row-major) - vector
43 template<bool Unit, class MatA, class V>
44 void trmv_impl(
45  matrix_expression<MatA, cpu_tag> const& A,
46  vector_expression<V, cpu_tag> &b,
47  lower, row_major
48 ){
49  typedef typename MatA::value_type value_typeA;
50  typedef typename V::value_type value_typeV;
51  std::size_t size = A().size1();
52  std::size_t const blockSize = 128;
53  std::size_t numBlocks = size/blockSize;
54  if(numBlocks*blockSize < size) ++numBlocks;
55 
56  //this implementation partitions A into
57  //a set of panels, where a Panel is a set
58  // of columns. We start with the last panel
59  //and compute the product of it with the part of the vector
60  // and than just add the previous panel on top etc.
61 
62  //tmporary storage for subblocks of b
63  value_typeV valueStorage[blockSize];
64 
65  for(std::size_t bi = 1; bi <= numBlocks; ++bi){
66  std::size_t startbi = blockSize*(numBlocks-bi);
67  std::size_t sizebi = std::min(blockSize,size-startbi);
68  dense_vector_adaptor<value_typeA> values(valueStorage,sizebi);
69 
70  //store and save the values of b we are now changing
71  vector_range<V> bupper(b(),startbi,startbi+sizebi);
72  vector_range<V> blower(b(),startbi+sizebi,size);
73  noalias(values) = bupper;
74 
75  //multiply with triangular part of the block
76  for (std::size_t i = 0; i != sizebi; ++i) {
77  std::size_t posi = startbi+i;
78  b()(posi) = 0;
79  for(std::size_t j = 0; j < i; ++j){
80  b()(posi) += A()(posi,startbi+j)*values(j);
81  }
82  b()(posi) += values(i)*(Unit? value_typeA(1):A()(posi,posi));
83  }
84  //now compute the lower rectangular part of the current block of A
85  matrix_range<typename const_expression<MatA>::type > Alower(A(), startbi+sizebi,size,startbi,startbi+sizebi);
86  kernels::gemv(Alower,values,blower,1);
87  }
88 }
89 
90 //upper triangular(row-major)-vector
91 template<bool Unit, class MatA, class V>
92 void trmv_impl(
93  matrix_expression<MatA, cpu_tag> const& A,
94  vector_expression<V, cpu_tag>& b,
95  upper, row_major
96 ){
97  std::size_t size = A().size1();
98  for (std::size_t i = 0; i < size; ++ i) {
99  if(!Unit){
100  b()(i) *= A()(i,i);
101  }
102  vector_range<V> bblock(b(),i+1,size);
103  typedef typename const_expression<MatA>::type CMatA;
104  matrix_row<CMatA> Arow(A(),i);
105  vector_range<matrix_row<CMatA> > Asubrow(Arow,i+1,size);
106  typename V::value_type result;
107  kernels::dot(Asubrow,bblock,result);
108  b()(i) += result;
109  }
110 }
111 
112 //Lower triangular(column-major) - vector
113 template<bool Unit, class MatA, class V>
114 void trmv_impl(
115  matrix_expression<MatA, cpu_tag> const& A,
116  vector_expression<V, cpu_tag> &b,
117  lower, column_major
118 ){
119 
120  std::size_t size = A().size1();
121  for (std::size_t n = 1; n <= size; ++n) {
122  std::size_t i = size-n;
123  double bi = b()(i);
124  if(!Unit){
125  b()(i) *= A()(i,i);
126  }
127  for(std::size_t k = i+1; k != size; ++k)
128  b()(k) += bi * A()(k,i);
129  }
130 }
131 
132 //upper triangular(column-major)-vector
133 template<bool Unit, class MatA, class V>
134 void trmv_impl(
135  matrix_expression<MatA, cpu_tag> const& A,
136  vector_expression<V, cpu_tag>& b,
137  upper, column_major
138 ){
139  typedef typename MatA::value_type value_typeA;
140  typedef typename V::value_type value_typeV;
141  std::size_t size = A().size1();
142  std::size_t const blockSize = 128;
143  std::size_t numBlocks = size/blockSize;
144  if(numBlocks*blockSize < size) ++numBlocks;
145 
146  //this implementation partitions A into
147  //a set of panels, where a Panel is a set
148  // of rows. We start with the first panel
149  //and compute the product of it with the part of the vector
150  // and than just add the next panel on top etc.
151 
152  //tmporary storage for subblocks of b
153  value_typeV valueStorage[blockSize];
154 
155  for(std::size_t bj = 0; bj != numBlocks; ++bj){
156  std::size_t startbj = blockSize*bj;
157  std::size_t sizebj = std::min(blockSize,size-startbj);
158  dense_vector_adaptor<value_typeA> values(valueStorage,sizebj);
159 
160  //store and save the values of b we are now changing
161  vector_range<V> bupper(b(),startbj,startbj+sizebj);
162  vector_range<V> blower(b(),startbj+sizebj,size);
163  noalias(values) = bupper;
164  bupper.clear();
165  //multiply with triangular element
166  for (std::size_t j = 0; j != sizebj; ++j) {
167  std::size_t posj = startbj+j;
168  for(std::size_t i = 0; i < j; ++i){
169  b()(startbj+i) += A()(startbj+i,posj)*values(j);
170  }
171  b()(posj) += values(j)*(Unit? 1.0:A()(posj,posj));
172  }
173  //now compute the right rectangular part of the current block of A
174  matrix_range<typename const_expression<MatA>::type > Aright(A(), startbj,startbj+sizebj, startbj+sizebj,size);
175  kernels::gemv(Aright,blower,bupper,1);
176  }
177 }
178 
179 //dispatcher
180 template <bool Upper,bool Unit,typename MatA, typename V>
181 void trmv(
182  matrix_expression<MatA, cpu_tag> const& A,
183  vector_expression<V, cpu_tag> & b,
184  std::false_type//unoptimized
185 ){
186  REMORA_SIZE_CHECK(A().size1() == A().size2());
187  REMORA_SIZE_CHECK(A().size2() == b().size());
188  trmv_impl<Unit>(A, b, triangular_tag<Upper,false>(), typename MatA::orientation());
189 }
190 
191 }}
192 #endif