traits.hpp
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Traits of gpu expressions
6  *
7  * \author O. Krause
8  * \date 2016
9  *
10  *
11  * \par Copyright 1995-2015 Shark Development Team
12  *
13  * <BR><HR>
14  * This file is part of Shark.
15  * <http://image.diku.dk/shark/>
16  *
17  * Shark is free software: you can redistribute it and/or modify
18  * it under the terms of the GNU Lesser General Public License as published
19  * by the Free Software Foundation, either version 3 of the License, or
20  * (at your option) any later version.
21  *
22  * Shark is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25  * GNU Lesser General Public License for more details.
26  *
27  * You should have received a copy of the GNU Lesser General Public License
28  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
29  *
30  */
31 //===========================================================================
32 
33 #ifndef REMORA_GPU_TRAITS_HPP
34 #define REMORA_GPU_TRAITS_HPP
35 
36 #include <boost/compute/command_queue.hpp>
37 #include <boost/compute/core.hpp>
38 #include <boost/compute/functional/detail/unpack.hpp>
39 #include <boost/compute/container/vector.hpp>
40 #include <boost/compute/functional/operator.hpp>
41 #include <boost/compute/iterator/zip_iterator.hpp>
42 #include <boost/compute/iterator/constant_iterator.hpp>
43 #include <boost/compute/iterator/transform_iterator.hpp>
44 #include <boost/compute/functional.hpp>
45 #include <tuple>
46 
47 namespace remora{namespace gpu{
48 
49 template<class Device>
50 struct device_traits;
51 
52 template<class T, class Tag>
53 struct dense_vector_storage{
54  typedef Tag storage_tag;
55 
56  boost::compute::buffer buffer;
57  std::size_t offset;
58  std::size_t stride;
59 
60  dense_vector_storage(){}
61  dense_vector_storage(boost::compute::buffer const& buffer, std::size_t offset, std::size_t stride)
62  :buffer(buffer), offset(offset), stride(stride){}
63  template<class U, class Tag2>
64  dense_vector_storage(dense_vector_storage<U, Tag2> const& storage):
65  buffer(storage.buffer), offset(storage.offset), stride(storage.stride){
66  static_assert(!(std::is_same<Tag,continuous_dense_tag>::value && std::is_same<Tag2,dense_tag>::value), "Trying to assign dense to continuous dense storage");
67  }
68 
69  dense_vector_storage<T,Tag> sub_region(std::size_t offset){
70  return {buffer, this->offset+offset * stride, stride};
71  }
72 };
73 
74 template<class T, class Tag>
75 struct dense_matrix_storage{
76  typedef Tag storage_tag;
77  template<class O>
78  struct row_storage: public std::conditional<
79  std::is_same<O,row_major>::value,
80  dense_vector_storage<T, Tag>,
81  dense_vector_storage<T, dense_tag>
82  >{};
83 
84  typedef dense_vector_storage<T,Tag> diag_storage;
85  typedef dense_matrix_storage<T,dense_tag> sub_region_storage;
86 
87  boost::compute::buffer buffer;
88  std::size_t offset;
89  std::size_t leading_dimension;
90 
91  dense_matrix_storage(){}
92  dense_matrix_storage(boost::compute::buffer const& buffer, std::size_t offset, std::size_t leading_dimension)
93  :buffer(buffer), offset(offset), leading_dimension(leading_dimension){}
94  template<class U, class Tag2>
95  dense_matrix_storage(dense_matrix_storage<U, Tag2> const& storage):
96  buffer(storage.buffer), offset(storage.offset), leading_dimension(storage.leading_dimension){
97  static_assert(!(std::is_same<Tag,continuous_dense_tag>::value && std::is_same<Tag2,dense_tag>::value), "Trying to assign dense to continuous dense storage");
98  }
99 
100  template<class Orientation>
101  sub_region_storage sub_region(std::size_t offset1, std::size_t offset2, Orientation){
102  std::size_t offset_major = Orientation::index_M(offset1,offset2);
103  std::size_t offset_minor = Orientation::index_m(offset1,offset2);
104  return {buffer, offset + offset_major*leading_dimension+offset_minor, leading_dimension};
105  }
106 
107  template<class Orientation>
108  typename row_storage<Orientation>::type row(std::size_t i, Orientation){
109  return {buffer, offset + i * Orientation::index_M(leading_dimension,std::size_t(1)), Orientation::index_m(leading_dimension,std::size_t(1))};
110  }
111  diag_storage diag(){
112  return {buffer, offset, leading_dimension+1};
113  }
114 };
115 
116 
117 namespace detail{
118 
119 template<class Arg1, class T>
120 struct invoked_multiply_scalar{
121  typedef T result_type;
122  Arg1 arg1;
123  T m_scalar;
124 };
125 
126 template<class Arg1, class Arg2, class T>
127 struct invoked_multiply_and_add{
128  typedef T result_type;
129  Arg1 arg1;
130  Arg2 arg2;
131  T m_scalar;
132 };
133 
134 template<class Arg1, class T>
135 struct invoked_soft_plus{
136  typedef T result_type;
137  Arg1 arg1;
138 };
139 template<class Arg1, class T>
140 struct invoked_sigmoid{
141  typedef T result_type;
142  Arg1 arg1;
143 };
144 
145 template<class Arg1, class T>
146 struct invoked_sqr{
147  typedef T result_type;
148  Arg1 arg1;
149 };
150 
151 template<class Arg1, class T>
152 struct invoked_inv{
153  typedef T result_type;
154  Arg1 arg1;
155 };
156 
157 template<class Arg1, class Arg2, class T>
158 struct invoked_safe_div{
159  typedef T result_type;
160  Arg1 arg1;
161  Arg2 arg2;
162  T default_value;
163 };
164 
165 
166 template<class Arg1, class T>
167 boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_multiply_scalar<Arg1,T> const& e){
168  return k << '('<<e.m_scalar << '*'<< e.arg1<<')';
169 }
170 template<class Arg1, class Arg2, class T>
171 boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_multiply_and_add<Arg1,Arg2,T> const& e){
172  return k << '('<<e.arg1<<'+'<<e.m_scalar << '*'<< e.arg2<<')';
173 }
174 template<class Arg1, class T>
175 boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_soft_plus<Arg1,T> const& e){
176  return k << "(log(1+exp("<< e.arg1<<")))";
177 }
178 template<class Arg1, class T>
179 boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_sigmoid<Arg1,T> const& e){
180  return k << "(1/(1+exp(-"<< e.arg1<<")))";
181 }
182 template<class Arg1, class T>
183 boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_sqr<Arg1,T> const& e){
184  return k << '('<<e.arg1<<'*'<<e.arg1<<')';
185 }
186 template<class Arg1, class T>
187 boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_inv<Arg1,T> const& e){
188  return k << "1/("<<e.arg1<<')';
189 }
190 
191 template<class Arg1, class Arg2, class T>
192 boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_safe_div<Arg1,Arg2,T> const& e){
193  return k << "(("<<e.arg2<<"!=0)?"<<e.arg1<<'/'<<e.arg2<<':'<<e.default_value<<')';
194 }
195 
196 template<class M, class Index, class Orientation>
197 struct invoked_linearized_matrix{
198  typedef typename M::value_type result_type;
199  M mat_closure;
200  Index index;
201 };
202 
203 template<class M, class Index>
204 boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_linearized_matrix<M, Index, row_major> const& e){
205  std::string index_str = k.expr_to_string(e.index);
206  std::string i1 = "(("+index_str+")/"+std::to_string(e.mat_closure.size2())+")";
207  std::string i2 = "(("+index_str+")%"+std::to_string(e.mat_closure.size2())+")";
208  return k << e.mat_closure(k.expr<std::size_t>(i1),k.expr<std::size_t>(i2));
209 }
210 
211 template<class M, class Index>
212 boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_linearized_matrix<M, Index, column_major> const& e){
213  std::string index_str = k.expr_to_string(e.index);
214  std::string i1 = "(("+index_str+")/"+std::to_string(e.mat_closure.size1())+")";
215  std::string i2 = "(("+index_str+")%"+std::to_string(e.mat_closure.size1())+")";
216  return k << '('<<e.mat_closure(k.expr<std::size_t>(i2),k.expr<std::size_t>(i1)) << ')';
217 }
218 
219 template<class Iterator1, class Iterator2, class Functor>
220 struct binary_transform_iterator
221 : public boost::compute::transform_iterator<
222  boost::compute::zip_iterator<boost::tuple<Iterator1, Iterator2> >,
223  boost::compute::detail::unpacked<Functor>
224 >{
225  typedef boost::compute::transform_iterator<
226  boost::compute::zip_iterator<boost::tuple<Iterator1, Iterator2> >,
227  boost::compute::detail::unpacked<Functor>
228  > self_type;
229  binary_transform_iterator(){}
230  binary_transform_iterator(
231  Functor const& f,
232  Iterator1 const& iter1, Iterator1 const& iter1_end,
233  Iterator2 const& iter2, Iterator2 const& iter2_end
234  ): self_type(boost::compute::make_zip_iterator(boost::make_tuple(iter1,iter2)), boost::compute::detail::unpack(f)){}
235 };
236 
237 template<class Closure>
238 class indexed_iterator : public boost::iterator_facade<
239  indexed_iterator<Closure>,
240  typename Closure::value_type,
241  std::random_access_iterator_tag,
242  typename Closure::value_type
243 >{
244 public:
245  indexed_iterator() = default;
246  indexed_iterator(Closure const& closure, std::size_t index)
247  : m_closure(closure)
248  , m_index(index){}
249 
250  template<class C>
251  indexed_iterator(indexed_iterator<C> const& other)
252  : m_closure (other.m_closure)
253  , m_index(other.m_index){}
254 
255  template<class C>
256  indexed_iterator& operator=(indexed_iterator<C> const& other){
257  m_closure = other.m_closure;
258  m_index = other.m_index;
259  return *this;
260  }
261 
262  size_t get_index() const{
263  return m_index;
264  }
265 
266  /// \internal_
267  template<class Expr>
268  auto operator[](Expr const& expr) const-> decltype(std::declval<Closure>()(expr)){
269  return m_closure(expr);
270  }
271 
272 private:
273  friend class ::boost::iterator_core_access;
274 
275  /// \internal_
276  typename Closure::value_type dereference() const
277  {
278  return typename Closure::value_type();
279  }
280 
281  /// \internal_
282  template<class C>
283  bool equal(indexed_iterator<C> const& other) const
284  {
285  return m_index == other.m_index;
286  }
287 
288  /// \internal_
289  void increment()
290  {
291  m_index++;
292  }
293 
294  /// \internal_
295  void decrement()
296  {
297  m_index--;
298  }
299 
300  /// \internal_
301  void advance(std::ptrdiff_t n)
302  {
303  m_index = static_cast<size_t>(static_cast<std::ptrdiff_t>(m_index) + n);
304  }
305 
306  /// \internal_
307  template<class C>
308  std::ptrdiff_t distance_to(indexed_iterator<C> const& other) const
309  {
310  return static_cast<std::ptrdiff_t>(other.m_index - m_index);
311  }
312 
313 private:
314  Closure m_closure;
315  std::size_t m_index;
316  template<class> friend class indexed_iterator;
317 };
318 
319 
320 template<class Iterator>
321 class subrange_iterator : public boost::iterator_facade<
322  subrange_iterator<Iterator>,
323  typename Iterator::value_type,
324  std::random_access_iterator_tag,
325  typename Iterator::value_type
326 >{
327 public:
328  subrange_iterator() = default;
329  subrange_iterator(Iterator const &it, Iterator const& /*end*/, std::size_t startIterIndex,std::size_t /*startIndex*/)
330  : m_iterator(it+startIterIndex){}
331 
332  template<class I>
333  subrange_iterator(subrange_iterator<I> other):m_iterator(other.m_iterator){}
334 
335  template<class I>
336  subrange_iterator& operator=(subrange_iterator<I> const& other){
337  m_iterator = other.m_iterator;
338  return *this;
339  }
340 
341  size_t get_index() const{
342  return m_iterator.index();
343  }
344 
345  /// \internal_
346  template<class Expr>
347  auto operator[](Expr const& expr) const-> decltype(std::declval<Iterator>()[expr]){
348  return m_iterator[expr];
349  }
350 
351 private:
352  friend class ::boost::iterator_core_access;
353 
354  /// \internal_
355  typename Iterator::value_type dereference() const
356  {
357  return typename Iterator::value_type();
358  }
359 
360  /// \internal_
361  template<class I>
362  bool equal(subrange_iterator<I> const& other) const
363  {
364  return m_iterator == other.m_iterator;
365  }
366 
367  /// \internal_
368  void increment()
369  {
370  ++m_iterator;
371  }
372 
373  /// \internal_
374  void decrement()
375  {
376  --m_iterator;
377  }
378 
379  /// \internal_
380  void advance(std::ptrdiff_t n)
381  {
382  m_iterator +=n;
383  }
384 
385  /// \internal_
386  template<class I>
387  std::ptrdiff_t distance_to(subrange_iterator<I> const& other) const
388  {
389  return static_cast<std::ptrdiff_t>(other.m_iterator - m_iterator);
390  }
391 
392 private:
393  Iterator m_iterator;
394  template<class> friend class subrange_iterator;
395 };
396 
397 }//End namespace detail
398 }//End namespace gpu
399 
400 template<>
401 struct device_traits<gpu_tag>{
402  typedef boost::compute::command_queue queue_type;
403 
404  static queue_type& default_queue(){
405  return boost::compute::system::default_queue();
406  }
407 
408  //adding of indices
409  template<class Expr1, class Expr2>
410  static auto index_add(Expr1 const& expr1, Expr2 const& expr2) -> decltype(boost::compute::plus<std::size_t>()(expr1,expr2)){
411  return boost::compute::plus<std::size_t>()(expr1,expr2);
412  }
413 
414  template<class E, class Index>
415  static gpu::detail::invoked_linearized_matrix<typename E::const_closure_type,Index, typename E::orientation>
416  linearized_matrix_element(matrix_expression<E, gpu_tag> const& e, Index const& index){
417  return {e(),index};
418  }
419 
420 
421  template <class Iterator, class Functor>
422  struct transform_iterator{
423  typedef boost::compute::transform_iterator<Iterator, Functor> type;
424  };
425 
426  template <class Iterator>
427  struct subrange_iterator{
428  typedef gpu::detail::subrange_iterator<Iterator> type;
429  };
430 
431  template <class Iterator1, class Iterator2, class Functor>
432  struct binary_transform_iterator{
433  typedef gpu::detail::binary_transform_iterator<Iterator1,Iterator2, Functor> type;
434  };
435 
436  template<class T>
437  struct constant_iterator{
438  typedef boost::compute::constant_iterator<T> type;
439  };
440 
441  template<class T>
442  struct one_hot_iterator{
443  typedef iterators::one_hot_iterator<T> type;
444  };
445 
446  template<class Closure>
447  struct indexed_iterator{
448  typedef gpu::detail::indexed_iterator<Closure> type;
449  };
450 
451  //functors
452 
453  //basic arithmetic
454  template<class T>
455  using add = boost::compute::plus<T>;
456  template<class T>
457  using subtract = boost::compute::minus<T>;
458  template<class T>
459  using multiply = boost::compute::multiplies<T>;
460  template<class T>
461  using divide = boost::compute::divides<T>;
462  template<class T>
463  using pow = boost::compute::pow<T>;
464  template<class T>
465  struct safe_divide : public boost::compute::function<T (T, T)>{
466  typedef T result_type;
467  safe_divide(T default_value) : boost::compute::function<T (T, T)>("safe_divide"), default_value(default_value) { }
468 
469  template<class Arg1, class Arg2>
470  gpu::detail::invoked_safe_div<Arg1,Arg2, T> operator()(const Arg1 &x, const Arg2& y) const
471  {
472  return {x,y,default_value};
473  }
474  T default_value;
475  };
476  template<class T>
477  struct multiply_and_add : public boost::compute::function<T (T,T)>{
478  typedef T result_type;
479  multiply_and_add(T scalar) : boost::compute::function<T (T,T)>("multiply_and_add"), m_scalar(scalar) { }
480 
481  template<class Arg1, class Arg2>
482  gpu::detail::invoked_multiply_and_add<Arg1,Arg2,T> operator()(const Arg1 &x, const Arg2& y) const
483  {
484  return {x,y, m_scalar};
485  }
486  private:
487  T m_scalar;
488  };
489  template<class T>
490  struct multiply_scalar : public boost::compute::function<T (T)>{
491  typedef T result_type;
492  multiply_scalar(T scalar) : boost::compute::function<T (T)>("multiply_scalar"), m_scalar(scalar) { }
493 
494  template<class Arg1>
495  gpu::detail::invoked_multiply_scalar<Arg1,T> operator()(const Arg1 &x) const
496  {
497  return {x, m_scalar};
498  }
499  private:
500  T m_scalar;
501  };
502 
503  template<class T>
504  struct multiply_assign : public boost::compute::function<T (T,T)>{
505  typedef T result_type;
506  multiply_assign(T scalar) : boost::compute::function<T (T,T)>("multiply_assign"), m_scalar(scalar) { }
507 
508  template<class Arg1, class Arg2>
509  gpu::detail::invoked_multiply_scalar<Arg2,T> operator()(const Arg1&, const Arg2& y) const
510  {
511  return {y, m_scalar};
512  }
513  private:
514  T m_scalar;
515  };
516 
517  //math unary functions
518  template<class T>
519  using log = boost::compute::log<T>;
520  template<class T>
521  using exp = boost::compute::exp<T>;
522  template<class T>
523  using sin = boost::compute::sin<T>;
524  template<class T>
525  using cos = boost::compute::cos<T>;
526  template<class T>
527  using tan = boost::compute::tan<T>;
528  template<class T>
529  using asin = boost::compute::asin<T>;
530  template<class T>
531  using acos = boost::compute::acos<T>;
532  template<class T>
533  using atan = boost::compute::atan<T>;
534  template<class T>
535  using tanh = boost::compute::tanh<T>;
536  template<class T>
537  using sqrt = boost::compute::sqrt<T>;
538  template<class T>
539  using cbrt = boost::compute::cbrt<T>;
540  template<class T>
541  using abs = boost::compute::fabs<T>;
542 
543  template<class T>
544  using erf = boost::compute::erf<T>;
545  template<class T>
546  using erfc = boost::compute::erfc<T>;
547 
548  template<class T>
549  struct sqr : public boost::compute::function<T (T)>{
550  typedef T result_type;
551  sqr() : boost::compute::function<T (T)>("sqr") { }
552 
553  template<class Arg1>
554  gpu::detail::invoked_sqr<Arg1,T> operator()(const Arg1 &x) const
555  {
556  return {x};
557  }
558  };
559  template<class T>
560  struct soft_plus : public boost::compute::function<T (T)>{
561  typedef T result_type;
562  soft_plus() : boost::compute::function<T (T)>("soft_plus") { }
563 
564  template<class Arg1>
565  gpu::detail::invoked_soft_plus<Arg1,T> operator()(const Arg1 &x) const
566  {
567  return {x};
568  }
569  };
570  template<class T>
571  struct sigmoid : public boost::compute::function<T (T)>{
572  typedef T result_type;
573  sigmoid() : boost::compute::function<T (T)>("sigmoid") { }
574 
575  template<class Arg1>
576  gpu::detail::invoked_sigmoid<Arg1,T> operator()(const Arg1 &x) const
577  {
578  return {x};
579  }
580  };
581  template<class T>
582  struct inv : public boost::compute::function<T (T)>{
583  typedef T result_type;
584  inv() : boost::compute::function<T (T)>("inv") { }
585 
586  template<class Arg1>
587  gpu::detail::invoked_inv<Arg1,T> operator()(const Arg1 &x) const
588  {
589  return {x};
590  }
591  };
592 
593  //min/max
594  template<class T>
595  using min = boost::compute::fmin<T>;
596  template<class T>
597  using max = boost::compute::fmax<T>;
598 
599  //comparison
600  template<class T>
601  using less = boost::compute::less<T>;
602  template<class T>
603  using less_equal = boost::compute::less_equal<T>;
604  template<class T>
605  using greater = boost::compute::greater<T>;
606  template<class T>
607  using greater_equal = boost::compute::greater_equal<T>;
608  template<class T>
609  using equal = boost::compute::equal_to<T>;
610  template<class T>
611  using not_equal = boost::compute::not_equal_to<T>;
612 
613 };
614 
615 }
616 
617 namespace boost{namespace compute{
618 template<class I1, class I2, class F>
619 struct is_device_iterator<remora::gpu::detail::binary_transform_iterator<I1,I2, F> > : boost::true_type {};
620 template<class Closure>
621 struct is_device_iterator<remora::gpu::detail::indexed_iterator<Closure> > : boost::true_type {};
622 template<class Iterator>
623 struct is_device_iterator<remora::gpu::detail::subrange_iterator<Iterator> > : boost::true_type {};
624 }}
625 
626 #endif