matrix_assign.hpp
Go to the documentation of this file.
1 /*!
2  * \brief Kernels for matrix-expression assignments
3  *
4  * \author O. Krause
5  * \date 2013
6  *
7  *
8  * \par Copyright 1995-2015 Shark Development Team
9  *
10  * <BR><HR>
11  * This file is part of Shark.
12  * <http://image.diku.dk/shark/>
13  *
14  * Shark is free software: you can redistribute it and/or modify
15  * it under the terms of the GNU Lesser General Public License as published
16  * by the Free Software Foundation, either version 3 of the License, or
17  * (at your option) any later version.
18  *
19  * Shark is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22  * GNU Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public License
25  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
26  *
27  */
28 #ifndef REMORA_KERNELS_DEFAULT_MATRIX_ASSIGN_HPP
29 #define REMORA_KERNELS_DEFAULT_MATRIX_ASSIGN_HPP
30 
31 #include "../vector_assign.hpp"
32 #include "../../detail/traits.hpp"
33 #include <algorithm>
34 #include <vector>
35 namespace remora{namespace bindings{
36 
37 //////////////////////////////////////////////////////
38 ////Scalar Assignment to Matrix
39 /////////////////////////////////////////////////////
40 
41 
42 // Explicitly iterating row major
43 template<class F, class M>
44 void matrix_assign(
45  matrix_expression<M, cpu_tag> &m,
46  typename M::value_type t,
47  row_major
48 ){
49  for(std::size_t i = 0; i != m().size1(); ++i){
50  auto rowM = row(m,i);
51  kernels::assign<F>(rowM,t);
52  }
53 }
54 // Explicitly iterating column major
55 template<class F, class M>
56 void matrix_assign(
57  matrix_expression<M, cpu_tag> &m,
58  typename M::value_type t,
59  column_major
60 ){
61  for(std::size_t j = 0; j != m().size2(); ++j){
62  auto columnM = column(m,j);
63  kernels::assign<F>(columnM,t);
64  }
65 }
66 // Spcial case triangular packed - just calls the first two implementations.
67 template<class F, class M, class Orientation, class Triangular>
68 void matrix_assign(
69  matrix_expression<M, cpu_tag> &m,
70  typename M::value_type t,
71  triangular<Orientation,Triangular>
72 ){
73  matrix_assign<F>(m,t,Orientation());
74 }
75 
76 
77 /////////////////////////////////////////////////////////////////
78 //////Matrix Assignment implementing op=
79 ////////////////////////////////////////////////////////////////
80 
81 //direct assignment without functor
82 //the cases were both arguments have the same orientation can be implemented using assign.
83 template<class M, class E,class TagE, class TagM>
84 void matrix_assign(
85  matrix_expression<M, cpu_tag> &m,
86  matrix_expression<E, cpu_tag> const& e,
87  row_major, row_major,TagE, TagM
88 ) {
89  for(std::size_t i = 0; i != m().size1(); ++i){
90  auto rowM = row(m,i);
91  kernels::assign(rowM,row(e,i));
92  }
93 }
94 
95 //remain the versions where both argumnts to not have the same orientation
96 
97 //dense-dense case
98 template<class M, class E>
99 void matrix_assign(
100  matrix_expression<M, cpu_tag> &m,
101  matrix_expression<E, cpu_tag> const& e,
102  row_major, column_major,dense_tag, dense_tag
103 ) {
104  //compute blockwise and wrelem the transposed block.
105  std::size_t const blockSize = 8;
106  typename M::value_type blockStorage[blockSize][blockSize];
107 
108  typedef typename M::size_type size_type;
109  size_type size1 = m().size1();
110  size_type size2 = m().size2();
111  for (size_type iblock = 0; iblock < size1; iblock += blockSize){
112  for (size_type jblock = 0; jblock < size2; jblock += blockSize){
113  std::size_t blockSizei = std::min(blockSize,size1-iblock);
114  std::size_t blockSizej = std::min(blockSize,size2-jblock);
115 
116  //compute block values
117  for (size_type j = 0; j < blockSizej; ++j){
118  for (size_type i = 0; i < blockSizei; ++i){
119  blockStorage[i][j] = e()(iblock+i,jblock+j);
120  }
121  }
122 
123  //copy block in a different order to m
124  for (size_type i = 0; i < blockSizei; ++i){
125  for (size_type j = 0; j < blockSizej; ++j){
126  m()(iblock+i,jblock+j) = blockStorage[i][j];
127  }
128  }
129  }
130  }
131 }
132 
133 // dense-sparse
134 template<class M, class E>
135 void matrix_assign(
136  matrix_expression<M, cpu_tag> &m,
137  matrix_expression<E, cpu_tag> const& e,
138  row_major, column_major,dense_tag, sparse_tag
139 ) {
140  for(std::size_t j = 0; j != m().size2(); ++j){
141  auto columnM = column(m,j);
142  kernels::assign(columnM,column(e,j));
143  }
144 }
145 
146 
147 //sparse-dense
148 template<class M, class E>
149 void matrix_assign(
150  matrix_expression<M, cpu_tag> &m,
151  matrix_expression<E, cpu_tag> const& e,
152  row_major, column_major, sparse_tag, dense_tag
153 ) {
154  for(std::size_t i = 0; i != m().size1(); ++i){
155  auto rowM = row(m,i);
156  kernels::assign(rowM,row(e,i));
157  }
158 }
159 
160 //sparse-sparse
161 template<class M, class E>
162 void matrix_assign(
163  matrix_expression<M, cpu_tag> &m,
164  matrix_expression<E, cpu_tag> const& e,
165  row_major, column_major,sparse_tag,sparse_tag
166 ) {
167  //apply the transposition of e()
168  //first evaluate e and fill the values into a vector which is then sorted by row_major order
169  //this gives this algorithm a run time of O(eval(e)+k*log(k))
170  //where eval(e) is the time to evaluate and k*log(k) the number of non-zero elements
171  typedef typename M::value_type value_type;
172  typedef typename M::size_type size_type;
173  typedef row_major::sparse_element<value_type> Element;
174  std::vector<Element> elements;
175 
176  size_type size2 = m().size2();
177  size_type size1 = m().size1();
178  for(size_type j = 0; j != size2; ++j){
179  typename E::const_column_iterator pos_e = e().column_begin(j);
180  typename E::const_column_iterator end_e = e().column_end(j);
181  for(; pos_e != end_e; ++pos_e){
182  Element element;
183  element.i = pos_e.index();
184  element.j = j;
185  element.value = *pos_e;
186  elements.push_back(element);
187  }
188  }
189  std::sort(elements.begin(),elements.end());
190  //fill m with the contents
191  m().clear();
192  m().reserve(elements.size());//reserve a bit of space
193  for(size_type current = 0; current != elements.size();){
194  //count elements in row and reserve enough space for it
195  size_type row = elements[current].i;
196  size_type row_end = current;
197  while(row_end != elements.size() && elements[row_end].i == row)
198  ++ row_end;
199  m().reserve_row(row,row_end - current);
200 
201  //copy elements
202  typename M::row_iterator row_pos = m().row_begin(row);
203  for(; current != row_end; ++current){
204  row_pos = m().set_element(row_pos,elements[current].j,elements[current].value);
205  ++row_pos;
206  }
207  }
208 }
209 
210 //packed row_major,row_major
211 template<class M, class E,class Triangular>
212 void matrix_assign(
213  matrix_expression<M, cpu_tag> &m,
214  matrix_expression<E, cpu_tag> const& e,
215  triangular<row_major,Triangular>, triangular<row_major,Triangular>,
216  packed_tag, packed_tag
217 ) {
218  typedef typename M::row_iterator MIter;
219  typedef typename E::const_row_iterator EIter;
220 
221  for(std::size_t i = 0; i != m().size1(); ++i){
222  MIter mpos = m().row_begin(i);
223  EIter epos = e().row_begin(i);
224  MIter mend = m().row_end(i);
225  REMORA_SIZE_CHECK(mpos.index() == epos.index());
226  for(; mpos!=mend; ++mpos,++epos){
227  *mpos = *epos;
228  }
229  }
230 }
231 
232 ////packed row_major,column_major
233 //todo: this is suboptimal as we do strided access!!!!
234 template<class M, class E,class Triangular>
235 void matrix_assign(
236  matrix_expression<M, cpu_tag> &m,
237  matrix_expression<E, cpu_tag> const& e,
238  triangular<row_major,Triangular>, triangular<column_major,Triangular>,
239  packed_tag, packed_tag
240 ) {
241  typedef typename M::row_iterator MIter;
242 
243  for(std::size_t i = 0; i != m().size1(); ++i){
244  MIter mpos = m().row_begin(i);
245  MIter mend = m().row_end(i);
246  for(; mpos!=mend; ++mpos){
247  *mpos = e()(i,mpos.index());
248  }
249  }
250 }
251 
252 ///////////////////////////////////////////////////////////////////////////////////////////
253 //////Matrix Assignment With Functor implementing +=,-=...
254 ///////////////////////////////////////////////////////////////////////////////////////////
255 
256 //when both are row-major we can map to vector case
257 //this is not necessarily efficient if m is sparse.
258 template<class F, class M, class E, class TagE, class TagM>
259 void matrix_assign_functor(
260  matrix_expression<M, cpu_tag> &m,
261  matrix_expression<E, cpu_tag> const& e,
262  F f,
263  row_major, row_major,TagM, TagE
264 ) {
265  for(std::size_t i = 0; i != m().size1(); ++i){
266  auto rowM = row(m,i);
267  kernels::assign(rowM,row(e,i),f);
268  }
269 }
270 
271 //we only need to implement the remaining versions for column major second argument
272 
273 //dense-dense case
274 template<class F,class M, class E>
275 void matrix_assign_functor(
276  matrix_expression<M, cpu_tag> &m,
277  matrix_expression<E, cpu_tag> const& e,
278  F f,
279  row_major, column_major,dense_tag, dense_tag
280 ) {
281  //compute blockwise and wrelem the transposed block.
282  std::size_t const blockSize = 16;
283  typename M::value_type blockStorage[blockSize][blockSize];
284 
285  typedef typename M::size_type size_type;
286  size_type size1 = m().size1();
287  size_type size2 = m().size2();
288  for (size_type iblock = 0; iblock < size1; iblock += blockSize){
289  for (size_type jblock = 0; jblock < size2; jblock += blockSize){
290  std::size_t blockSizei = std::min(blockSize,size1-iblock);
291  std::size_t blockSizej = std::min(blockSize,size2-jblock);
292 
293  //fill the block with the values of e
294  for (size_type j = 0; j < blockSizej; ++j){
295  for (size_type i = 0; i < blockSizei; ++i){
296  blockStorage[i][j] = e()(iblock+i,jblock+j);
297  }
298  }
299 
300  //compute block values and store in m
301  for (size_type i = 0; i < blockSizei; ++i){
302  for (size_type j = 0; j < blockSizej; ++j){
303  m()(iblock+i,jblock+j) = f(m()(iblock+i,jblock+j), blockStorage[i][j]);
304  }
305  }
306  }
307  }
308 }
309 
310 //dense-sparse case
311 template<class F,class M, class E>
312 void matrix_assign_functor(
313  matrix_expression<M, cpu_tag> &m,
314  matrix_expression<E, cpu_tag> const& e,
315  F f,
316  row_major, column_major,dense_tag, sparse_tag
317 ) {
318  for(std::size_t j = 0; j != m().size2(); ++j){
319  auto columnM = column(m,j);
320  kernels::assign(columnM,column(e,j),f);
321  }
322 }
323 
324 //sparse-dense case
325 template<class F,class M, class E>
326 void matrix_assign_functor(
327  matrix_expression<M, cpu_tag> &m,
328  matrix_expression<E, cpu_tag> const& e,
329  F f,
330  row_major, column_major, sparse_tag, dense_tag
331 ) {
332  for(std::size_t i = 0; i != m().size1(); ++i){
333  auto rowM = row(m,i);
334  kernels::assign(rowM,row(e,i),f);
335  }
336 }
337 
338 //sparse-sparse
339 template<class F,class M, class E>
340 void matrix_assign_functor(
341  matrix_expression<M, cpu_tag> &m,
342  matrix_expression<E, cpu_tag> const& e,
343  F f,
344  row_major, column_major,sparse_tag t,sparse_tag
345 ) {
346  typename matrix_temporary<M>::type eTrans = e;//explicit calculation of the transpose for now
347  matrix_assign_functor(m,eTrans,f,row_major(),row_major(),t,t);
348  //~ F<typename M::iterator::reference, typename E::value_type> f;
349  //~ //first evaluate e and fill the values togethe into a vector which
350  //~ //is then sorted by row_major order
351  //~ //this gives this algorithm a run time of O(eval(e)+k*log(k))
352  //~ //where eval(e) is the time to evaluate and k*log(k) the number of non-zero elements
353  //~ typedef typename M::value_type value_type;
354  //~ typedef typename M::size_type size_type;
355  //~ typedef row_major::sparse_element<value_type> Element;
356  //~ std::vector<Element> elements;
357 
358  //~ size_type size2 = m().size2();
359  //~ size_type size1 = m().size1();
360  //~ for(size_type j = 0; j != size2; ++j){
361  //~ typename E::const_column_iterator pos_e = e().column_begin(j);
362  //~ typename E::const_column_iterator end_e = e().column_end(j);
363  //~ for(; pos_e != end_e; ++pos_e){
364  //~ Element element;
365  //~ element.i = pos_e.index();
366  //~ element.j = j;
367  //~ element.value = *pos_e;
368  //~ elements.push_back(element);
369  //~ }
370  //~ }
371  //~ std::sort(elements.begin(),elements.end());
372 
373 
374  //~ //assign the contents to m, applying the functor every time
375  //~ //that is we assign it for every element for e and m
376  //~ m().reserve(elements.size());//reserve a bit of space, we might need more, though.
377  //~ std::vector<Element>::const_iterator elem = elements.begin();
378  //~ std::vector<Element>::const_iterator elem_end = elements.end();
379  //~ value_type zero = value_type();
380  //~ for(size_type row = 0; row != m().size2(); ++row){
381  //~ //todo pre-reserve enough space in the row of m()
382  //~ //merge both rows with f as functor
383  //~ typename M::row_iterator it = m().row_begin(row);
384  //~ while(it != m().row_end(row) && elem != elem_end && elem->i == row) {
385  //~ size_type it_index = it.index();
386  //~ size_type elem_index = elem->j;
387  //~ if (it_index == elem_index) {
388  //~ f(*it, *elem);
389  //~ ++ elem;
390  //~ } else if (it_index < elem_index) {
391  //~ f(*it, zero);
392  //~ } else{//it_index > elem_index so insert new element in v()
393  //~ it = m().set_element(it,elem_index,zero);
394  //~ f(*it, *elem);
395  //~ ++elem;
396  //~ }
397  //~ ++it;
398  //~ }
399  //~ //apply f to remaining elemms in the row
400  //~ for(;it != v().end();++it) {
401  //~ f(*it, zero);
402  //~ }
403  //~ //add missing elements
404  //~ for(;elem != elem_end;++it,++elem) {
405  //~ it = m().set_element(it,elem.index(),zero);
406  //~ f(*it, zero);
407  //~ }
408  //~ }
409 }
410 
411 
412 //kernels for packed
413 template<class F, class M, class E, class Triangular>
414 void matrix_assign_functor(
415  matrix_expression<M, cpu_tag> &m,
416  matrix_expression<E, cpu_tag> const& e,
417  F f,
418  triangular<row_major,Triangular>, triangular<row_major,Triangular>
419 ) {
420  typedef typename M::row_iterator MIter;
421  typedef typename E::const_row_iterator EIter;
422  //there is nothing we can do if F does not leave the non-stored elements 0
423  //this is the case for all current assignment functors, but you never know :)
424  static_assert(F::left_zero_identity || F::right_zero_identity, "cannot handle the given packed matrix assignment function");
425 
426  for(std::size_t i = 0; i != m().size1(); ++i){
427  MIter mpos = m().row_begin(i);
428  EIter epos = e().row_begin(i);
429  MIter mend = m().row_end(i);
430  REMORA_SIZE_CHECK(mpos.index() == epos.index());
431  for(; mpos!=mend; ++mpos,++epos){
432  *mpos = f(*mpos,*epos);
433  }
434  }
435 }
436 
437 //todo: this is suboptimal as we do strided access!!!!
438 template<class F, class M, class E, class Triangular>
439 void matrix_assign_functor(
440  matrix_expression<M, cpu_tag> &m,
441  matrix_expression<E, cpu_tag> const& e,
442  F f,
443  triangular<row_major,Triangular>, triangular<column_major,Triangular>
444 ) {
445  typedef typename M::row_iterator MIter;
446  //there is nothing we can do, if F does not leave the non-stored elements 0
447  static_assert(F::left_zero_identity, "cannot handle the given packed matrix assignment function");
448 
449  for(std::size_t i = 0; i != m().size1(); ++i){
450  MIter mpos = m().row_begin(i);
451  MIter mend = m().row_end(i);
452  for(; mpos!=mend; ++mpos){
453  *mpos = f(*mpos,e()(i,mpos.index()));
454  }
455  }
456 }
457 
458 
459 }}
460 
461 #endif