28 #ifndef REMORA_GPU_COPY_HPP 29 #define REMORA_GPU_COPY_HPP 31 #include "../detail/traits.hpp" 33 #include "../detail/vector_proxy_classes.hpp" 34 #include "../detail/vector_expression_classes.hpp" 35 #include "../detail/matrix_proxy_classes.hpp" 36 #include "../detail/matrix_expression_classes.hpp" 45 class vector_transport_to_cpu:
public vector_expression<vector_transport_to_cpu<E>, cpu_tag>{
47 typedef typename E::const_closure_type expression_closure_type;
49 typedef typename E::value_type value_type;
50 typedef typename E::size_type size_type;
51 typedef value_type
const& const_reference;
52 typedef const_reference reference;
54 typedef vector_transport_to_cpu const_closure_type;
55 typedef vector_transport_to_cpu closure_type;
56 typedef unknown_storage storage_type;
57 typedef unknown_storage const_storage_type;
58 typedef blockwise<typename E::evaluation_category::tag> evaluation_category;
62 typedef typename E::const_iterator const_iterator;
63 typedef const_iterator iterator;
66 explicit vector_transport_to_cpu(
67 expression_closure_type
const& expression
68 ):m_expression(expression){}
70 size_type size()
const {
71 return m_expression.size();
74 expression_closure_type
const& expression()
const {
77 boost::compute::command_queue& queue()
const{
78 return m_expression.queue();
83 void assign_to(vector_expression<VecX, cpu_tag>& x, value_type
const& alpha = value_type(1) )
const{
84 assign_to(x, m_expression, alpha,
typename E::storage_type::storage_tag());
87 void plus_assign_to(vector_expression<VecX, cpu_tag>& x, value_type
const& alpha = value_type(1) )
const{
88 plus_assign_to(x, m_expression, alpha,
typename E::storage_type::storage_tag());
92 void minus_assign_to(vector_expression<VecX, cpu_tag>& x, value_type alpha = value_type(1) )
const{
93 plus_assign_to(x,-alpha);
98 template<
class VecX,
class VecE>
100 vector_expression<VecX, cpu_tag>& x, vector_expression<VecE, gpu_tag>
const& e,
101 value_type
const& alpha, dense_tag
103 auto storageE = e().raw_storage();
104 auto& buffer = storageE.buffer;
106 typename VecE::value_type* p = (
typename VecE::value_type*) e().queue().enqueue_map_buffer(
107 buffer, CL_MAP_READ, 0, buffer.size()
110 typedef dense_vector_adaptor<typename VecE::value_type> AdaptE;
111 AdaptE adaptE(p + storageE.offset,size(), storageE.stride);
112 assign(x, vector_scalar_multiply<AdaptE >( adaptE, alpha));
115 e().queue().enqueue_unmap_buffer(buffer,p);
118 template<
class VecX,
class VecE>
120 vector_expression<VecX, cpu_tag>& x, vector_expression<VecE, gpu_tag>
const& e,
121 value_type
const& alpha, dense_tag
123 auto storageE = e().raw_storage();
124 auto& buffer = storageE.buffer;
126 typename VecE::value_type* p = (
typename VecE::value_type*) e().queue().enqueue_map_buffer(
127 buffer, CL_MAP_READ, 0, buffer.size()
130 typedef dense_vector_adaptor<typename VecE::value_type> AdaptE;
131 AdaptE adaptE(p + storageE.offset,size(), storageE.stride);
133 plus_assign(x,vector_scalar_multiply<AdaptE >( adaptE, alpha));
136 e().queue().enqueue_unmap_buffer(buffer,p);
140 template<
class VecX,
class VecE>
142 vector_expression<VecX, cpu_tag>& x, vector_expression<VecE, gpu_tag>
const& e,
143 value_type
const& alpha, unknown_tag
146 typedef typename vector_temporary<E>::type result_type;
147 result_type result = m_expression;
149 assign_to(x, result, alpha,
typename result_type::storage_type::storage_tag());
152 template<
class VecX,
class VecE>
154 vector_expression<VecX, cpu_tag>& x, vector_expression<VecE, gpu_tag>
const& e,
155 value_type
const& alpha, unknown_tag
158 typedef typename vector_temporary<E>::type result_type;
159 result_type result = m_expression;
161 plus_assign_to(x, result, alpha,
typename result_type::storage_type::storage_tag());
163 expression_closure_type m_expression;
167 class vector_transport_to_gpu:
public vector_expression<vector_transport_to_gpu<E>, gpu_tag>{
169 typedef typename E::const_closure_type expression_closure_type;
171 typedef typename E::value_type value_type;
172 typedef typename E::size_type size_type;
173 typedef value_type
const& const_reference;
174 typedef const_reference reference;
176 typedef vector_transport_to_gpu const_closure_type;
177 typedef vector_transport_to_gpu closure_type;
178 typedef unknown_storage storage_type;
179 typedef unknown_storage const_storage_type;
180 typedef blockwise<typename E::evaluation_category::tag> evaluation_category;
184 typedef typename E::const_iterator const_iterator;
185 typedef const_iterator iterator;
188 explicit vector_transport_to_gpu(
189 expression_closure_type
const& expression,
190 boost::compute::command_queue& queue
191 ):m_expression(expression), m_queue(&queue){}
193 size_type size()
const {
194 return m_expression.size();
196 expression_closure_type
const& expression()
const {
199 boost::compute::command_queue& queue()
const{
205 void assign_to(vector_expression<VecX, gpu_tag>& x, value_type
const& alpha = value_type(1) )
const{
206 assign_to(x, m_expression, alpha,
typename E::storage_type::storage_tag());
209 void plus_assign_to(vector_expression<VecX, gpu_tag>& x, value_type
const& alpha = value_type(1) )
const{
210 plus_assign_to(x, m_expression, alpha,
typename E::storage_type::storage_tag());
214 void minus_assign_to(vector_expression<VecX, gpu_tag>& x, value_type alpha = value_type(1) )
const{
215 plus_assign_to(x,-alpha);
220 template<
class VecX,
class VecE>
222 vector_expression<VecX, gpu_tag>& x, vector_expression<VecE, cpu_tag>
const& e,
223 value_type
const& alpha, dense_tag
225 auto storagex = x().raw_storage();
226 auto& buffer = storagex.buffer;
228 typename VecX::value_type* p = (
typename VecX::value_type*) x().queue().enqueue_map_buffer(
229 buffer, CL_MAP_WRITE, 0, buffer.size()
232 dense_vector_adaptor<typename VecX::value_type> adaptX(p + storagex.offset,size(), storagex.stride);
233 assign(adaptX,vector_scalar_multiply<expression_closure_type>(m_expression,alpha));
236 x().queue().enqueue_unmap_buffer(buffer,p);
239 template<
class VecX,
class VecE>
241 vector_expression<VecX, gpu_tag>& x, vector_expression<VecE, cpu_tag>
const& e,
242 value_type
const& alpha, dense_tag
244 auto storagex = x().raw_storage();
245 auto& buffer = storagex.buffer;
247 typename VecX::value_type* p = (
typename VecX::value_type*) x().queue().enqueue_map_buffer(
248 buffer, CL_MAP_WRITE, storagex.offset, buffer.size() - storagex.offset
251 dense_vector_adaptor<typename VecX::value_type> adaptX(p,size(), storagex.stride);
252 plus_assign(adaptX,vector_scalar_multiply<expression_closure_type>(m_expression,alpha));
255 x().queue().enqueue_unmap_buffer(buffer,p);
258 expression_closure_type m_expression;
259 boost::compute::command_queue* m_queue;
268 class matrix_transport_to_cpu:
public matrix_expression<matrix_transport_to_cpu<E>, cpu_tag>{
270 typedef typename E::const_closure_type expression_closure_type;
272 typedef typename E::value_type value_type;
273 typedef typename E::size_type size_type;
274 typedef value_type
const& const_reference;
275 typedef const_reference reference;
277 typedef matrix_transport_to_cpu const_closure_type;
278 typedef matrix_transport_to_cpu closure_type;
279 typedef unknown_storage storage_type;
280 typedef unknown_storage const_storage_type;
281 typedef blockwise<typename E::evaluation_category::tag> evaluation_category;
282 typedef typename E::orientation orientation;
285 typedef typename E::const_row_iterator const_row_iterator;
286 typedef typename E::const_column_iterator const_column_iterator;
287 typedef const_row_iterator row_iterator;
288 typedef const_column_iterator column_iterator;
291 explicit matrix_transport_to_cpu(
292 expression_closure_type
const& expression
293 ):m_expression(expression){}
295 size_type size1()
const {
296 return m_expression.size1();
298 size_type size2()
const {
299 return m_expression.size2();
301 expression_closure_type
const& expression()
const {
304 boost::compute::command_queue& queue()
const{
305 return m_expression.queue();
310 void assign_to(matrix_expression<MatX, cpu_tag>& X, value_type
const& alpha = value_type(1) )
const{
311 assign_to(X, m_expression, alpha,
typename E::storage_type::storage_tag());
314 void plus_assign_to(matrix_expression<MatX, cpu_tag>& X, value_type
const& alpha = value_type(1) )
const{
315 plus_assign_to(X, m_expression, alpha,
typename E::storage_type::storage_tag());
319 void minus_assign_to(matrix_expression<MatX, cpu_tag>& X, value_type alpha = value_type(1) )
const{
320 plus_assign_to(X,-alpha);
325 template<
class MatX,
class MatE>
327 matrix_expression<MatX, cpu_tag>& X, matrix_expression<MatE, gpu_tag>
const& e,
328 value_type
const& alpha, dense_tag
330 auto storageE = e().raw_storage();
331 auto& buffer = storageE.buffer;
333 typename MatE::value_type* p = (
typename MatE::value_type*) e().queue().enqueue_map_buffer(
334 buffer, CL_MAP_READ, 0, buffer.size()
337 typedef typename MatE::orientation EOrientation;
338 std::size_t stride1 = EOrientation::index_M(storageE.leading_dimension,1);
339 std::size_t stride2 = EOrientation::index_m(storageE.leading_dimension,1);
340 typedef dense_matrix_adaptor<typename MatE::value_type, EOrientation> AdaptE;
341 AdaptE adaptE(p + storageE.offset,size1(), size2(), stride1,stride2);
343 assign(X, matrix_scalar_multiply<AdaptE >( adaptE, alpha));
346 e().queue().enqueue_unmap_buffer(buffer,p);
349 template<
class MatX,
class MatE>
351 matrix_expression<MatX, cpu_tag>& X, matrix_expression<MatE, gpu_tag>
const& e,
352 value_type
const& alpha, dense_tag
354 auto storageE = e().raw_storage();
355 auto& buffer = storageE.buffer;
357 typename MatE::value_type* p = (
typename MatE::value_type*) e().queue().enqueue_map_buffer(
358 buffer, CL_MAP_READ, 0, buffer.size()
361 typedef typename MatE::orientation EOrientation;
362 std::size_t stride1 = EOrientation::index_M(storageE.leading_dimension,1);
363 std::size_t stride2 = EOrientation::index_m(storageE.leading_dimension,1);
364 typedef dense_matrix_adaptor<typename MatE::value_type, EOrientation> AdaptE;
365 AdaptE adaptE(p + storageE.offset, size1(), size2(), stride1,stride2);
367 plus_assign(X,matrix_scalar_multiply<AdaptE >( adaptE, alpha));
370 e().queue().enqueue_unmap_buffer(buffer,p);
374 template<
class MatX,
class MatE>
376 matrix_expression<MatX, cpu_tag>& X, matrix_expression<MatE, gpu_tag>
const& e,
377 value_type
const& alpha, unknown_tag
380 typedef typename matrix_temporary<E>::type result_type;
381 result_type result = m_expression;
383 assign_to(X, result, alpha,
typename result_type::storage_type::storage_tag());
386 template<
class MatX,
class MatE>
388 matrix_expression<MatX, cpu_tag>& X, matrix_expression<MatE, gpu_tag>
const& e,
389 value_type
const& alpha, unknown_tag
392 typedef typename matrix_temporary<E>::type result_type;
393 result_type result = m_expression;
395 plus_assign_to(X, result, alpha,
typename result_type::storage_type::storage_tag());
398 expression_closure_type m_expression;
402 class matrix_transport_to_gpu:
public matrix_expression<matrix_transport_to_gpu<E>, gpu_tag>{
404 typedef typename E::const_closure_type expression_closure_type;
406 typedef typename E::value_type value_type;
407 typedef typename E::size_type size_type;
408 typedef value_type
const& const_reference;
409 typedef const_reference reference;
411 typedef matrix_transport_to_gpu const_closure_type;
412 typedef matrix_transport_to_gpu closure_type;
413 typedef unknown_storage storage_type;
414 typedef unknown_storage const_storage_type;
415 typedef blockwise<typename E::evaluation_category::tag> evaluation_category;
416 typedef typename E::orientation orientation;
419 typedef typename E::const_row_iterator const_row_iterator;
420 typedef typename E::const_column_iterator const_column_iterator;
421 typedef const_row_iterator row_iterator;
422 typedef const_column_iterator column_iterator;
425 explicit matrix_transport_to_gpu(
426 expression_closure_type
const& expression,
427 boost::compute::command_queue& queue
428 ):m_expression(expression), m_queue(&queue){}
430 size_type size1()
const {
431 return m_expression.size1();
433 size_type size2()
const {
434 return m_expression.size2();
436 expression_closure_type
const& expression()
const {
439 boost::compute::command_queue& queue()
const{
445 void assign_to(matrix_expression<MatX, gpu_tag>& X, value_type
const& alpha = value_type(1) )
const{
446 assign_to(X, m_expression, alpha,
typename E::storage_type::storage_tag());
449 void plus_assign_to(matrix_expression<MatX, gpu_tag>& X, value_type
const& alpha = value_type(1) )
const{
450 plus_assign_to(X, m_expression, alpha,
typename E::storage_type::storage_tag());
454 void minus_assign_to(matrix_expression<MatX, cpu_tag>& X, value_type alpha = value_type(1) )
const{
455 plus_assign_to(X,-alpha);
460 template<
class MatX,
class MatE>
462 matrix_expression<MatX, gpu_tag>& X, matrix_expression<MatE, cpu_tag>
const& e,
463 value_type
const& alpha, dense_tag
465 auto storageX = X().raw_storage();
466 auto& buffer = storageX.buffer;
468 typename MatX::value_type* p = (
typename MatX::value_type*) X().queue().enqueue_map_buffer(
469 buffer, CL_MAP_WRITE, 0, buffer.size()
472 typedef typename MatX::orientation XOrientation;
473 std::size_t stride1 = XOrientation::index_M(storageX.leading_dimension, 1);
474 std::size_t stride2 = XOrientation::index_m(storageX.leading_dimension, 1);
475 dense_matrix_adaptor<typename MatX::value_type, XOrientation> adaptX(p, size1(), size2(), stride1, stride2);
476 assign(adaptX,matrix_scalar_multiply<MatE>(e(),alpha));
479 X().queue().enqueue_unmap_buffer(buffer,p);
482 template<
class MatX,
class MatE>
484 matrix_expression<MatX, gpu_tag>& X, matrix_expression<MatE, cpu_tag>
const& e,
485 value_type
const& alpha, dense_tag
487 auto storageX = X().raw_storage();
488 auto& buffer = storageX.buffer;
490 typename MatX::value_type* p = (
typename MatX::value_type*) X().queue().enqueue_map_buffer(
491 buffer, CL_MAP_WRITE, 0, buffer.size()
494 typedef typename MatX::orientation XOrientation;
495 std::size_t stride1 = XOrientation::index_M(storageX.leading_dimension, 1);
496 std::size_t stride2 = XOrientation::index_m(storageX.leading_dimension, 1);
497 typedef dense_matrix_adaptor<typename MatX::value_type, XOrientation> AdaptX;
498 AdaptX adaptX(p + storageX.offset, size1(), size2(), stride1, stride2);
500 plus_assign(adaptX,matrix_scalar_multiply<MatE >( e(), alpha));
503 X().queue().enqueue_unmap_buffer(buffer,p);
506 expression_closure_type m_expression;
507 boost::compute::command_queue* m_queue;
516 vector_transport_to_cpu<E> copy_to_cpu(vector_expression<E, gpu_tag>
const& e){
517 return vector_transport_to_cpu<E>(e());
521 matrix_transport_to_cpu<E> copy_to_cpu(matrix_expression<E, gpu_tag>
const& e){
522 return matrix_transport_to_cpu<E>(e());
525 vector_transport_to_gpu<E> copy_to_gpu(
526 vector_expression<E, cpu_tag>
const& e,
527 boost::compute::command_queue& queue = boost::compute::system::default_queue()
529 return vector_transport_to_gpu<E>(e(), queue);
533 matrix_transport_to_gpu<E> copy_to_gpu(
534 matrix_expression<E, cpu_tag>
const& e,
535 boost::compute::command_queue& queue = boost::compute::system::default_queue()
537 return matrix_transport_to_gpu<E>(e(),queue);
543 E
const& copy_to_gpu(
544 vector_expression<E, gpu_tag>
const& e,
545 boost::compute::command_queue& queue = boost::compute::system::default_queue()
551 E
const& copy_to_gpu(
552 matrix_expression<E, gpu_tag>
const& e,
553 boost::compute::command_queue& queue = boost::compute::system::default_queue()