31 #ifndef REMORA_KERNELS_DEFAULT_TRMV_HPP 32 #define REMORA_KERNELS_DEFAULT_TRMV_HPP 34 #include "../../expression_types.hpp" 35 #include "../../detail/matrix_proxy_classes.hpp" 36 #include "../../detail/vector_proxy_classes.hpp" 37 #include "../gemv.hpp" 39 #include <type_traits> 40 namespace remora{
namespace bindings{
43 template<
bool Unit,
class MatA,
class V>
45 matrix_expression<MatA, cpu_tag>
const& A,
46 vector_expression<V, cpu_tag> &b,
49 typedef typename MatA::value_type value_typeA;
50 typedef typename V::value_type value_typeV;
51 std::size_t size = A().size1();
52 std::size_t
const blockSize = 128;
53 std::size_t numBlocks = size/blockSize;
54 if(numBlocks*blockSize < size) ++numBlocks;
63 value_typeV valueStorage[blockSize];
65 for(std::size_t bi = 1; bi <= numBlocks; ++bi){
66 std::size_t startbi = blockSize*(numBlocks-bi);
67 std::size_t sizebi = std::min(blockSize,size-startbi);
68 dense_vector_adaptor<value_typeA> values(valueStorage,sizebi);
71 vector_range<V> bupper(b(),startbi,startbi+sizebi);
72 vector_range<V> blower(b(),startbi+sizebi,size);
73 noalias(values) = bupper;
76 for (std::size_t i = 0; i != sizebi; ++i) {
77 std::size_t posi = startbi+i;
79 for(std::size_t j = 0; j < i; ++j){
80 b()(posi) += A()(posi,startbi+j)*values(j);
82 b()(posi) += values(i)*(Unit? value_typeA(1):A()(posi,posi));
85 matrix_range<typename const_expression<MatA>::type > Alower(A(), startbi+sizebi,size,startbi,startbi+sizebi);
86 kernels::gemv(Alower,values,blower,1);
91 template<
bool Unit,
class MatA,
class V>
93 matrix_expression<MatA, cpu_tag>
const& A,
94 vector_expression<V, cpu_tag>& b,
97 std::size_t size = A().size1();
98 for (std::size_t i = 0; i < size; ++ i) {
102 vector_range<V> bblock(b(),i+1,size);
103 typedef typename const_expression<MatA>::type CMatA;
104 matrix_row<CMatA> Arow(A(),i);
105 vector_range<matrix_row<CMatA> > Asubrow(Arow,i+1,size);
106 typename V::value_type result;
107 kernels::dot(Asubrow,bblock,result);
113 template<
bool Unit,
class MatA,
class V>
115 matrix_expression<MatA, cpu_tag>
const& A,
116 vector_expression<V, cpu_tag> &b,
120 std::size_t size = A().size1();
121 for (std::size_t n = 1; n <= size; ++n) {
122 std::size_t i = size-n;
127 for(std::size_t k = i+1; k != size; ++k)
128 b()(k) += bi * A()(k,i);
133 template<
bool Unit,
class MatA,
class V>
135 matrix_expression<MatA, cpu_tag>
const& A,
136 vector_expression<V, cpu_tag>& b,
139 typedef typename MatA::value_type value_typeA;
140 typedef typename V::value_type value_typeV;
141 std::size_t size = A().size1();
142 std::size_t
const blockSize = 128;
143 std::size_t numBlocks = size/blockSize;
144 if(numBlocks*blockSize < size) ++numBlocks;
153 value_typeV valueStorage[blockSize];
155 for(std::size_t bj = 0; bj != numBlocks; ++bj){
156 std::size_t startbj = blockSize*bj;
157 std::size_t sizebj = std::min(blockSize,size-startbj);
158 dense_vector_adaptor<value_typeA> values(valueStorage,sizebj);
161 vector_range<V> bupper(b(),startbj,startbj+sizebj);
162 vector_range<V> blower(b(),startbj+sizebj,size);
163 noalias(values) = bupper;
166 for (std::size_t j = 0; j != sizebj; ++j) {
167 std::size_t posj = startbj+j;
168 for(std::size_t i = 0; i < j; ++i){
169 b()(startbj+i) += A()(startbj+i,posj)*values(j);
171 b()(posj) += values(j)*(Unit? 1.0:A()(posj,posj));
174 matrix_range<typename const_expression<MatA>::type > Aright(A(), startbj,startbj+sizebj, startbj+sizebj,size);
175 kernels::gemv(Aright,blower,bupper,1);
180 template <
bool Upper,
bool Unit,
typename MatA,
typename V>
182 matrix_expression<MatA, cpu_tag>
const& A,
183 vector_expression<V, cpu_tag> & b,
186 REMORA_SIZE_CHECK(A().size1() == A().size2());
187 REMORA_SIZE_CHECK(A().size2() == b().size());
188 trmv_impl<Unit>(A, b, triangular_tag<Upper,false>(),
typename MatA::orientation());