matrix.hpp
Go to the documentation of this file.
1 /*!
2  * \brief Implements the dense matrix class for the gpu
3  *
4  * \author O. Krause
5  * \date 2016
6  *
7  *
8  * \par Copyright 1995-2015 Shark Development Team
9  *
10  * <BR><HR>
11  * This file is part of Shark.
12  * <http://image.diku.dk/shark/>
13  *
14  * Shark is free software: you can redistribute it and/or modify
15  * it under the terms of the GNU Lesser General Public License as published
16  * by the Free Software Foundation, either version 3 of the License, or
17  * (at your option) any later version.
18  *
19  * Shark is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22  * GNU Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public License
25  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
26  *
27  */
28 #ifndef REMORA_GPU_MATRIX_HPP
29 #define REMORA_GPU_MATRIX_HPP
30 
31 #include "../detail/traits.hpp"
32 #include "../assignment.hpp"
33 #include "../detail/matrix_proxy_classes.hpp"
34 #include <boost/compute/container/vector.hpp>
35 #include <boost/compute/iterator/strided_iterator.hpp>
36 
37 namespace remora{
38 
39 namespace detail{
40 template<class Arg1, class Arg2, class T>
41 struct induced_matrix_element{
42  typedef T result_type;
43  Arg1 arg1;
44  Arg2 arg2;
45  std::size_t leading_dimension;
46  boost::compute::buffer const& buffer;
47 };
48 
49 template<class Arg1, class Arg2,class T>
50 boost::compute::detail::meta_kernel& operator<< (
51  boost::compute::detail::meta_kernel& k,
52  induced_matrix_element<Arg1, Arg2, T> const& e
53 ){
54  return k<< k.get_buffer_identifier<T>(e.buffer, boost::compute::memory_object::global_memory)
55  <<'['<<e.arg1 <<'*'<<e.leading_dimension<<'+'<< e.arg2<<']';
56 }
57 }
58 
59 /// \brief A dense matrix of values of type \c T stored on the gpu
60 ///
61 /// For a \f$(m \times n)\f$-dimensional matrix and \f$ 0 \leq i < m, 0 \leq j < n\f$, every element \f$ m_{i,j} \f$ is mapped to
62 /// the \f$(i.n + j)\f$-th element of the container for row major orientation or the \f$ (i + j.m) \f$-th element of
63 /// the container for column major orientation. In a dense matrix all elements are represented in memory in a
64 /// contiguous chunk of memory by definition.
65 ///
66 /// Orientation can also be specified, otherwise a \c row_major is used.
67 ///
68 /// \tparam T the type of object stored in the matrix (like double, float, complex, etc...)
69 /// \tparam L the storage organization. It can be either \c row_major or \c column_major. Default is \c row_major
70 template<class T, class L>
71 class matrix<T,L, gpu_tag> : public matrix_container<matrix<T,L, gpu_tag>, gpu_tag > {
72 private:
73  template<class IndexExpr1, class IndexExpr2>
74  detail::induced_matrix_element<IndexExpr1, IndexExpr2, T> get_element(
75  IndexExpr1 const& expr1,IndexExpr2 const& expr2,
76  row_major
77  )const{
78  return {expr1, expr2,orientation::index_m(m_size1,m_size2), m_storage.get_buffer()};
79  }
80  template<class IndexExpr1, class IndexExpr2>
81  detail::induced_matrix_element<IndexExpr2, IndexExpr1,T> get_element(
82  IndexExpr1 const& expr1,IndexExpr2 const& expr2,
83  column_major
84  )const{
85  return {expr2, expr1,orientation::index_m(m_size1,m_size2), m_storage.get_buffer()};
86  }
87 public:
88  typedef T value_type;
89  typedef value_type const_reference;
90  typedef value_type reference;
91  typedef std::size_t size_type;
92 
93  typedef matrix_reference<matrix const> const_closure_type;
94  typedef matrix_reference<matrix> closure_type;
95  typedef gpu::dense_matrix_storage<T, continuous_dense_tag> storage_type;
96  typedef gpu::dense_matrix_storage<T const, continuous_dense_tag> const_storage_type;
97  typedef elementwise<dense_tag> evaluation_category;
98  typedef L orientation;
99 
100  // Construction and destruction
101 
102  /// \brief Constructor of a matrix with a default queue
103  ///
104  ///note that for all operations for which matrix is on the left hand side,
105  ///the kernels are enqueued on the supplied queue in case of a multi-queue setup.
106  matrix(boost::compute::command_queue& queue = boost::compute::system::default_queue())
107  : m_storage(queue.get_context())
108  , m_queue(&queue),m_size1(0), m_size2(0){}
109 
110  /// \brief Constructor of a matrix with a predefined size
111  /// By default, its elements are uninitialized
112  /// \param size1 number of rows
113  /// \param size2 number of columns
114  /// \param queue the opencl queue to use by this matrix
115  explicit matrix(size_type size1, size_type size2, boost::compute::command_queue& queue = boost::compute::system::default_queue())
116  : m_storage(size1 * size2, queue.get_context())
117  , m_queue(&queue)
118  , m_size1(size1)
119  , m_size2(size2){}
120 
121  /// \brief Constructor of a matrix with a predefined size initialized to a value
122  /// \param size1 number of rows
123  /// \param size2 number of columns
124  /// \param init value to assign to each element of the matrix
125  /// \param queue the opencl queue to use by this matrix
126  matrix(size_type size1, size_type size2, value_type const& init, boost::compute::command_queue& queue = boost::compute::system::default_queue())
127  : m_storage(size1 * size2, init, queue)
128  , m_queue(&queue)
129  , m_size1(size1)
130  , m_size2(size2){}
131 
132  /// \brief Move-constructor of a matrix
133  /// \param m is the matrix to be moved
134  matrix(matrix && m)
135  : m_storage(std::move(m.m_storage))
136  , m_queue(&m.queue())
137  , m_size1(m.size1())
138  , m_size2(m.size2()){}
139 
140  /// \brief Copy-constructor of a matrix
141  /// \param m is the matrix to be duplicated
142  matrix(matrix const& m)
143  : m_storage(m.m_storage)
144  , m_queue(&m.queue())
145  , m_size1(m.size1())
146  , m_size2(m.size2()){}
147 
148  /// \brief Copy-constructor of a matrix from a matrix_expression
149  /// \param e the matrix_expression whose values will be duplicated into the matrix
150  template<class E>
151  matrix(matrix_expression<E, gpu_tag> const& e)
152  : m_storage(e().size1() * e().size2(), e().queue().get_context())
153  , m_queue(&e().queue())
154  , m_size1(e().size1())
155  , m_size2(e().size2()){
156  assign(*this, e);
157  }
158 
159  // -------------------
160  // Assignment operators
161  // -------------------
162 
163  /// \brief Assign a full matrix (\e RHS-matrix) to the current matrix (\e LHS-matrix)
164  /// Assign a full matrix (\e RHS-matrix) to the current matrix (\e LHS-matrix). This method does not create any temporary.
165  /// \param m is the source matrix container
166  /// \return a reference to a matrix (i.e. the destination matrix)
167  matrix& operator = (matrix const& m){
168  resize(m.size1(),m.size2());
169  return assign(*this, m);
170  }
171 
172  /// \brief Move-Assign a full matrix (\e RHS-matrix) to the current matrix (\e LHS-matrix)
173  /// \param m is the source matrix container
174  /// \return a reference to a matrix (i.e. the destination matrix)
175  matrix& operator = (matrix && m){
176  m_storage = std::move(m.m_storage);
177  m_queue = m.m_queue;
178  m_size1 = m.m_size1;
179  m_size2 = m.m_size2;
180  return *this;
181  }
182 
183  /// \brief Assign a full matrix (\e RHS-matrix) to the current matrix (\e LHS-matrix)
184  /// Assign a full matrix (\e RHS-matrix) to the current matrix (\e LHS-matrix). This method does not create any temporary.
185  /// \param m is the source matrix container
186  /// \return a reference to a matrix (i.e. the destination matrix)
187  template<class C> // Container assignment without temporary
188  matrix& operator = (matrix_container<C, gpu_tag> const& m) {
189  resize(m().size1(), m().size2());
190  return assign(*this, m);
191  }
192 
193  /// \brief Assign the result of a matrix_expression to the matrix
194  ///
195  /// \param e is a const reference to the matrix_expression
196  /// \return a reference to the resulting matrix
197  template<class E>
198  matrix& operator = (matrix_expression<E, gpu_tag> const& e) {
199  matrix temporary(e);
200  swap(*this,temporary);
201  return *this;
202  }
203 
204  // ---------
205  // Storage interface
206  // ---------
207 
208  ///\brief Returns the number of rows of the matrix.
209  size_type size1() const {
210  return m_size1;
211  }
212  ///\brief Returns the number of columns of the matrix.
213  size_type size2() const {
214  return m_size2;
215  }
216 
217  boost::compute::command_queue& queue() const{
218  return *m_queue;
219  }
220  ///\brief Returns the underlying storage structure for low level access
221  const_storage_type raw_storage() const{
222  return {m_storage.get_buffer(),0,orientation::index_m(m_size1,m_size2)};
223  }
224 
225  ///\brief Returns the underlying storage structure for low level access
226  storage_type raw_storage(){
227  return {m_storage.get_buffer(),0,orientation::index_m(m_size1,m_size2)};
228  }
229 
230  // Element access
231  template <class IndexExpr1, class IndexExpr2>
232  detail::induced_matrix_element<IndexExpr1,IndexExpr2,T> operator()(IndexExpr1 const& i, IndexExpr2 const& j) const{
233  return this->get_element(i,j,orientation());
234  }
235 
236 
237  /// \brief Resize the matrix
238  ///
239  /// This might erase all data stored in the matrix
240  ///
241  /// \param size1 new number of rows
242  /// \param size2 new number of columns
243  void resize(size_type size1, size_type size2) {
244  if(size1 * size2 < m_storage.size())
245  m_storage.resize(size1 * size2);
246  else
247  m_storage = boost::compute::vector<T>(size1 * size2, queue().get_context());
248  m_size1 = size1;
249  m_size2 = size2;
250  }
251 
252 
253  /// \brief Resize the matrix
254  ///
255  /// This will erase all data stored in the matrix and reinitialize it with the supplied value of init
256  ///
257  /// \param size1 new number of rows
258  /// \param size2 new number of columns
259  /// \param init the value of all elements
260  void resize(size_type size1, size_type size2, value_type init) {
261  resize(size1,size2);
262  boost::compute::fill(m_storage.begin(),m_storage.end(), init, queue());
263  }
264 
265  void clear(){
266  boost::compute::fill(m_storage.begin(),m_storage.end(), value_type/*zero*/(), queue());
267  }
268 
269  // Iterator types
270  typedef boost::compute::strided_iterator<typename boost::compute::vector<T>::iterator > row_iterator;
271  typedef boost::compute::strided_iterator<typename boost::compute::vector<T>::iterator > column_iterator;
272  typedef boost::compute::strided_iterator<typename boost::compute::vector<T>::const_iterator > const_row_iterator;
273  typedef boost::compute::strided_iterator<typename boost::compute::vector<T>::const_iterator > const_column_iterator;
274 
275  const_row_iterator row_begin(size_type i) const {
276  return {m_storage.begin() + i * stride1(), stride2()};
277  }
278  const_row_iterator row_end(size_type i) const {
279  return {m_storage.begin() + i * stride1()+size2()*stride2(), stride2()};
280  }
281 
282  const_row_iterator column_begin(size_type j) const {
283  return {m_storage.begin() + j * stride2(), stride1()};
284  }
285  const_column_iterator column_end(size_type j) const {
286  return {m_storage.begin() + j * stride2()+size1()*stride1(), stride1()};
287  }
288 
289  row_iterator row_begin(size_type i){
290  return {m_storage.begin() + i * stride1(), stride2()};
291  }
292  row_iterator row_end(size_type i){
293  return {m_storage.begin() + i * stride1()+size2()*stride2(), stride2()};
294  }
295 
296  row_iterator column_begin(size_type j){
297  return {m_storage.begin() + j * stride2(), stride1()};
298  }
299  column_iterator column_end(size_type j){
300  return {m_storage.begin() + j * stride2()+size1()*stride1(), stride1()};
301  }
302 
303 
304  /// \brief Swap the content of two matrixs
305  /// \param m1 is the first matrix. It takes values from m2
306  /// \param m2 is the second matrix It takes values from m1
307  friend void swap(matrix& m1, matrix& m2) {
308  using std::swap;
309  swap(m1.m_storage,m2.m_storage);
310  std::swap(m1.m_queue,m2.m_queue);
311  std::swap(m1.m_size1,m2.m_size1);
312  std::swap(m1.m_size2,m2.m_size2);
313  }
314 
315 
316 private:
317  std::ptrdiff_t stride1() const {
318  return (std::ptrdiff_t) orientation::stride1(m_size1, m_size2);
319  }
320  std::ptrdiff_t stride2() const {
321  return (std::ptrdiff_t) orientation::stride2(m_size1, m_size2);
322  }
323 
324  boost::compute::vector<T> m_storage;
325  boost::compute::command_queue* m_queue;
326  size_type m_size1;
327  size_type m_size2;
328 };
329 
330 }
331 
332 #endif