33 #ifndef REMORA_GPU_TRAITS_HPP 34 #define REMORA_GPU_TRAITS_HPP 36 #include <boost/compute/command_queue.hpp> 37 #include <boost/compute/core.hpp> 38 #include <boost/compute/functional/detail/unpack.hpp> 39 #include <boost/compute/container/vector.hpp> 40 #include <boost/compute/functional/operator.hpp> 41 #include <boost/compute/iterator/zip_iterator.hpp> 42 #include <boost/compute/iterator/constant_iterator.hpp> 43 #include <boost/compute/iterator/transform_iterator.hpp> 44 #include <boost/compute/functional.hpp> 47 namespace remora{
namespace gpu{
49 template<
class Device>
52 template<
class T,
class Tag>
53 struct dense_vector_storage{
54 typedef Tag storage_tag;
56 boost::compute::buffer buffer;
60 dense_vector_storage(){}
61 dense_vector_storage(boost::compute::buffer
const& buffer, std::size_t offset, std::size_t stride)
62 :buffer(buffer), offset(offset), stride(stride){}
63 template<
class U,
class Tag2>
64 dense_vector_storage(dense_vector_storage<U, Tag2>
const& storage):
65 buffer(storage.buffer), offset(storage.offset), stride(storage.stride){
66 static_assert(!(std::is_same<Tag,continuous_dense_tag>::value && std::is_same<Tag2,dense_tag>::value),
"Trying to assign dense to continuous dense storage");
69 dense_vector_storage<T,Tag> sub_region(std::size_t offset){
70 return {buffer, this->offset+offset * stride, stride};
74 template<
class T,
class Tag>
75 struct dense_matrix_storage{
76 typedef Tag storage_tag;
78 struct row_storage:
public std::conditional<
79 std::is_same<O,row_major>::value,
80 dense_vector_storage<T, Tag>,
81 dense_vector_storage<T, dense_tag>
84 typedef dense_vector_storage<T,Tag> diag_storage;
85 typedef dense_matrix_storage<T,dense_tag> sub_region_storage;
87 boost::compute::buffer buffer;
89 std::size_t leading_dimension;
91 dense_matrix_storage(){}
92 dense_matrix_storage(boost::compute::buffer
const& buffer, std::size_t offset, std::size_t leading_dimension)
93 :buffer(buffer), offset(offset), leading_dimension(leading_dimension){}
94 template<
class U,
class Tag2>
95 dense_matrix_storage(dense_matrix_storage<U, Tag2>
const& storage):
96 buffer(storage.buffer), offset(storage.offset), leading_dimension(storage.leading_dimension){
97 static_assert(!(std::is_same<Tag,continuous_dense_tag>::value && std::is_same<Tag2,dense_tag>::value),
"Trying to assign dense to continuous dense storage");
100 template<
class Orientation>
101 sub_region_storage sub_region(std::size_t offset1, std::size_t offset2, Orientation){
102 std::size_t offset_major = Orientation::index_M(offset1,offset2);
103 std::size_t offset_minor = Orientation::index_m(offset1,offset2);
104 return {buffer, offset + offset_major*leading_dimension+offset_minor, leading_dimension};
107 template<
class Orientation>
108 typename row_storage<Orientation>::type row(std::size_t i, Orientation){
109 return {buffer, offset + i * Orientation::index_M(leading_dimension,std::size_t(1)), Orientation::index_m(leading_dimension,std::size_t(1))};
112 return {buffer, offset, leading_dimension+1};
119 template<
class Arg1,
class T>
120 struct invoked_multiply_scalar{
121 typedef T result_type;
126 template<
class Arg1,
class Arg2,
class T>
127 struct invoked_multiply_and_add{
128 typedef T result_type;
134 template<
class Arg1,
class T>
135 struct invoked_soft_plus{
136 typedef T result_type;
139 template<
class Arg1,
class T>
140 struct invoked_sigmoid{
141 typedef T result_type;
145 template<
class Arg1,
class T>
147 typedef T result_type;
151 template<
class Arg1,
class T>
153 typedef T result_type;
157 template<
class Arg1,
class Arg2,
class T>
158 struct invoked_safe_div{
159 typedef T result_type;
166 template<
class Arg1,
class T>
167 boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_multiply_scalar<Arg1,T>
const& e){
168 return k <<
'('<<e.m_scalar <<
'*'<< e.arg1<<
')';
170 template<
class Arg1,
class Arg2,
class T>
171 boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_multiply_and_add<Arg1,Arg2,T>
const& e){
172 return k <<
'('<<e.arg1<<
'+'<<e.m_scalar <<
'*'<< e.arg2<<
')';
174 template<
class Arg1,
class T>
175 boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_soft_plus<Arg1,T>
const& e){
176 return k <<
"(log(1+exp("<< e.arg1<<
")))";
178 template<
class Arg1,
class T>
179 boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_sigmoid<Arg1,T>
const& e){
180 return k <<
"(1/(1+exp(-"<< e.arg1<<
")))";
182 template<
class Arg1,
class T>
183 boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_sqr<Arg1,T>
const& e){
184 return k <<
'('<<e.arg1<<
'*'<<e.arg1<<
')';
186 template<
class Arg1,
class T>
187 boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_inv<Arg1,T>
const& e){
188 return k <<
"1/("<<e.arg1<<
')';
191 template<
class Arg1,
class Arg2,
class T>
192 boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_safe_div<Arg1,Arg2,T>
const& e){
193 return k <<
"(("<<e.arg2<<
"!=0)?"<<e.arg1<<
'/'<<e.arg2<<
':'<<e.default_value<<
')';
196 template<
class M,
class Index,
class Orientation>
197 struct invoked_linearized_matrix{
198 typedef typename M::value_type result_type;
203 template<
class M,
class Index>
204 boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_linearized_matrix<M, Index, row_major>
const& e){
205 std::string index_str = k.expr_to_string(e.index);
206 std::string i1 =
"(("+index_str+
")/"+std::to_string(e.mat_closure.size2())+
")";
207 std::string i2 =
"(("+index_str+
")%"+std::to_string(e.mat_closure.size2())+
")";
208 return k << e.mat_closure(k.expr<std::size_t>(i1),k.expr<std::size_t>(i2));
211 template<
class M,
class Index>
212 boost::compute::detail::meta_kernel& operator<<(boost::compute::detail::meta_kernel& k, invoked_linearized_matrix<M, Index, column_major>
const& e){
213 std::string index_str = k.expr_to_string(e.index);
214 std::string i1 =
"(("+index_str+
")/"+std::to_string(e.mat_closure.size1())+
")";
215 std::string i2 =
"(("+index_str+
")%"+std::to_string(e.mat_closure.size1())+
")";
216 return k <<
'('<<e.mat_closure(k.expr<std::size_t>(i2),k.expr<std::size_t>(i1)) <<
')';
219 template<
class Iterator1,
class Iterator2,
class Functor>
220 struct binary_transform_iterator
221 :
public boost::compute::transform_iterator<
222 boost::compute::zip_iterator<boost::tuple<Iterator1, Iterator2> >,
223 boost::compute::detail::unpacked<Functor>
225 typedef boost::compute::transform_iterator<
226 boost::compute::zip_iterator<boost::tuple<Iterator1, Iterator2> >,
227 boost::compute::detail::unpacked<Functor>
229 binary_transform_iterator(){}
230 binary_transform_iterator(
232 Iterator1
const& iter1, Iterator1
const& iter1_end,
233 Iterator2
const& iter2, Iterator2
const& iter2_end
234 ): self_type(
boost::compute::make_zip_iterator(
boost::make_tuple(iter1,iter2)),
boost::compute::detail::unpack(f)){}
237 template<
class Closure>
238 class indexed_iterator :
public boost::iterator_facade<
239 indexed_iterator<Closure>,
240 typename Closure::value_type,
241 std::random_access_iterator_tag,
242 typename Closure::value_type
245 indexed_iterator() =
default;
246 indexed_iterator(Closure
const& closure, std::size_t index)
251 indexed_iterator(indexed_iterator<C>
const& other)
252 : m_closure (other.m_closure)
253 , m_index(other.m_index){}
256 indexed_iterator& operator=(indexed_iterator<C>
const& other){
257 m_closure = other.m_closure;
258 m_index = other.m_index;
262 size_t get_index()
const{
268 auto operator[](Expr
const& expr)
const-> decltype(std::declval<Closure>()(expr)){
269 return m_closure(expr);
273 friend class ::boost::iterator_core_access;
276 typename Closure::value_type dereference()
const 278 return typename Closure::value_type();
283 bool equal(indexed_iterator<C>
const& other)
const 285 return m_index == other.m_index;
301 void advance(std::ptrdiff_t n)
303 m_index =
static_cast<size_t>(
static_cast<std::ptrdiff_t
>(m_index) + n);
308 std::ptrdiff_t distance_to(indexed_iterator<C>
const& other)
const 310 return static_cast<std::ptrdiff_t
>(other.m_index - m_index);
316 template<
class>
friend class indexed_iterator;
320 template<
class Iterator>
321 class subrange_iterator :
public boost::iterator_facade<
322 subrange_iterator<Iterator>,
323 typename Iterator::value_type,
324 std::random_access_iterator_tag,
325 typename Iterator::value_type
328 subrange_iterator() =
default;
329 subrange_iterator(Iterator
const &it, Iterator
const& , std::size_t startIterIndex,std::size_t )
330 : m_iterator(it+startIterIndex){}
333 subrange_iterator(subrange_iterator<I> other):m_iterator(other.m_iterator){}
336 subrange_iterator& operator=(subrange_iterator<I>
const& other){
337 m_iterator = other.m_iterator;
341 size_t get_index()
const{
342 return m_iterator.index();
347 auto operator[](Expr
const& expr)
const-> decltype(std::declval<Iterator>()[expr]){
348 return m_iterator[expr];
352 friend class ::boost::iterator_core_access;
355 typename Iterator::value_type dereference()
const 357 return typename Iterator::value_type();
362 bool equal(subrange_iterator<I>
const& other)
const 364 return m_iterator == other.m_iterator;
380 void advance(std::ptrdiff_t n)
387 std::ptrdiff_t distance_to(subrange_iterator<I>
const& other)
const 389 return static_cast<std::ptrdiff_t
>(other.m_iterator - m_iterator);
394 template<
class>
friend class subrange_iterator;
401 struct device_traits<gpu_tag>{
402 typedef boost::compute::command_queue queue_type;
404 static queue_type& default_queue(){
405 return boost::compute::system::default_queue();
409 template<
class Expr1,
class Expr2>
410 static auto index_add(Expr1
const& expr1, Expr2
const& expr2) -> decltype(boost::compute::plus<std::size_t>()(expr1,expr2)){
411 return boost::compute::plus<std::size_t>()(expr1,expr2);
414 template<
class E,
class Index>
415 static gpu::detail::invoked_linearized_matrix<typename E::const_closure_type,Index, typename E::orientation>
416 linearized_matrix_element(matrix_expression<E, gpu_tag>
const& e, Index
const& index){
421 template <
class Iterator,
class Functor>
422 struct transform_iterator{
423 typedef boost::compute::transform_iterator<Iterator, Functor> type;
426 template <
class Iterator>
427 struct subrange_iterator{
428 typedef gpu::detail::subrange_iterator<Iterator> type;
431 template <
class Iterator1,
class Iterator2,
class Functor>
432 struct binary_transform_iterator{
433 typedef gpu::detail::binary_transform_iterator<Iterator1,Iterator2, Functor> type;
437 struct constant_iterator{
438 typedef boost::compute::constant_iterator<T> type;
442 struct one_hot_iterator{
443 typedef iterators::one_hot_iterator<T> type;
446 template<
class Closure>
447 struct indexed_iterator{
448 typedef gpu::detail::indexed_iterator<Closure> type;
455 using add = boost::compute::plus<T>;
457 using subtract = boost::compute::minus<T>;
459 using multiply = boost::compute::multiplies<T>;
461 using divide = boost::compute::divides<T>;
463 using pow = boost::compute::pow<T>;
465 struct safe_divide :
public boost::compute::function<T (T, T)>{
466 typedef T result_type;
467 safe_divide(T default_value) :
boost::compute::function<T (T, T)>(
"safe_divide"), default_value(default_value) { }
469 template<
class Arg1,
class Arg2>
470 gpu::detail::invoked_safe_div<Arg1,Arg2, T> operator()(
const Arg1 &x,
const Arg2& y)
const 472 return {x,y,default_value};
477 struct multiply_and_add :
public boost::compute::function<T (T,T)>{
478 typedef T result_type;
479 multiply_and_add(T scalar) :
boost::compute::function<T (T,T)>(
"multiply_and_add"), m_scalar(scalar) { }
481 template<
class Arg1,
class Arg2>
482 gpu::detail::invoked_multiply_and_add<Arg1,Arg2,T> operator()(
const Arg1 &x,
const Arg2& y)
const 484 return {x,y, m_scalar};
490 struct multiply_scalar :
public boost::compute::function<T (T)>{
491 typedef T result_type;
492 multiply_scalar(T scalar) :
boost::compute::function<T (T)>(
"multiply_scalar"), m_scalar(scalar) { }
495 gpu::detail::invoked_multiply_scalar<Arg1,T> operator()(
const Arg1 &x)
const 497 return {x, m_scalar};
504 struct multiply_assign :
public boost::compute::function<T (T,T)>{
505 typedef T result_type;
506 multiply_assign(T scalar) :
boost::compute::function<T (T,T)>(
"multiply_assign"), m_scalar(scalar) { }
508 template<
class Arg1,
class Arg2>
509 gpu::detail::invoked_multiply_scalar<Arg2,T> operator()(
const Arg1&,
const Arg2& y)
const 511 return {y, m_scalar};
519 using log = boost::compute::log<T>;
521 using exp = boost::compute::exp<T>;
523 using sin = boost::compute::sin<T>;
525 using cos = boost::compute::cos<T>;
527 using tan = boost::compute::tan<T>;
529 using asin = boost::compute::asin<T>;
531 using acos = boost::compute::acos<T>;
533 using atan = boost::compute::atan<T>;
535 using tanh = boost::compute::tanh<T>;
537 using sqrt = boost::compute::sqrt<T>;
539 using cbrt = boost::compute::cbrt<T>;
541 using abs = boost::compute::fabs<T>;
544 using erf = boost::compute::erf<T>;
546 using erfc = boost::compute::erfc<T>;
549 struct sqr :
public boost::compute::function<T (T)>{
550 typedef T result_type;
551 sqr() :
boost::compute::function<T (T)>(
"sqr") { }
554 gpu::detail::invoked_sqr<Arg1,T> operator()(
const Arg1 &x)
const 560 struct soft_plus :
public boost::compute::function<T (T)>{
561 typedef T result_type;
562 soft_plus() :
boost::compute::function<T (T)>(
"soft_plus") { }
565 gpu::detail::invoked_soft_plus<Arg1,T> operator()(
const Arg1 &x)
const 571 struct sigmoid :
public boost::compute::function<T (T)>{
572 typedef T result_type;
573 sigmoid() :
boost::compute::function<T (T)>(
"sigmoid") { }
576 gpu::detail::invoked_sigmoid<Arg1,T> operator()(
const Arg1 &x)
const 582 struct inv :
public boost::compute::function<T (T)>{
583 typedef T result_type;
584 inv() :
boost::compute::function<T (T)>(
"inv") { }
587 gpu::detail::invoked_inv<Arg1,T> operator()(
const Arg1 &x)
const 595 using min = boost::compute::fmin<T>;
597 using max = boost::compute::fmax<T>;
601 using less = boost::compute::less<T>;
603 using less_equal = boost::compute::less_equal<T>;
605 using greater = boost::compute::greater<T>;
607 using greater_equal = boost::compute::greater_equal<T>;
609 using equal = boost::compute::equal_to<T>;
611 using not_equal = boost::compute::not_equal_to<T>;
618 template<
class I1,
class I2,
class F>
619 struct is_device_iterator<remora::gpu::detail::binary_transform_iterator<I1,I2, F> > : boost::true_type {};
620 template<
class Closure>
621 struct is_device_iterator<remora::gpu::detail::indexed_iterator<Closure> > : boost::true_type {};
622 template<
class Iterator>
623 struct is_device_iterator<remora::gpu::detail::subrange_iterator<Iterator> > : boost::true_type {};