32 #ifndef REMORA_KERNELS_CBLAS_GEMV_HPP 33 #define REMORA_KERNELS_CBLAS_GEMV_HPP 36 #include <type_traits> 38 namespace remora{
namespace bindings {
40 inline void gemv(CBLAS_ORDER
const Order,
41 CBLAS_TRANSPOSE
const TransA,
int const M,
int const N,
42 double alpha,
float const *A,
int const lda,
43 float const *X,
int const incX,
44 double beta,
float *Y,
int const incY
46 cblas_sgemv(Order, TransA, M, N, alpha, A, lda,
51 inline void gemv(CBLAS_ORDER
const Order,
52 CBLAS_TRANSPOSE
const TransA,
int const M,
int const N,
53 double alpha,
double const *A,
int const lda,
54 double const *X,
int const incX,
55 double beta,
double *Y,
int const incY
57 cblas_dgemv(Order, TransA, M, N, alpha, A, lda,
62 inline void gemv(CBLAS_ORDER
const Order,
63 CBLAS_TRANSPOSE
const TransA,
int const M,
int const N,
65 std::complex<float>
const *A,
int const lda,
66 std::complex<float>
const *X,
int const incX,
68 std::complex<float> *Y,
int const incY
70 std::complex<float> alphaArg(alpha,0);
71 std::complex<float> betaArg(beta,0);
72 cblas_cgemv(Order, TransA, M, N,
73 reinterpret_cast<cblas_float_complex_type const *>(&alphaArg),
74 reinterpret_cast<cblas_float_complex_type const *>(A), lda,
75 reinterpret_cast<cblas_float_complex_type const *>(X), incX,
76 reinterpret_cast<cblas_float_complex_type const *>(&betaArg),
77 reinterpret_cast<cblas_float_complex_type *>(Y), incY);
80 inline void gemv(CBLAS_ORDER
const Order,
81 CBLAS_TRANSPOSE
const TransA,
int const M,
int const N,
83 std::complex<double>
const *A,
int const lda,
84 std::complex<double>
const *X,
int const incX,
86 std::complex<double> *Y,
int const incY
88 std::complex<double> alphaArg(alpha,0);
89 std::complex<double> betaArg(beta,0);
90 cblas_zgemv(Order, TransA, M, N,
91 reinterpret_cast<cblas_double_complex_type const *>(&alphaArg),
92 reinterpret_cast<cblas_double_complex_type const *>(A), lda,
93 reinterpret_cast<cblas_double_complex_type const *>(X), incX,
94 reinterpret_cast<cblas_double_complex_type const *>(&betaArg),
95 reinterpret_cast<cblas_double_complex_type *>(Y), incY);
101 template <
typename MatA,
typename VectorX,
typename VectorY>
103 matrix_expression<MatA, cpu_tag>
const &A,
104 vector_expression<VectorX, cpu_tag>
const &x,
105 vector_expression<VectorY, cpu_tag> &y,
106 typename VectorY::value_type alpha,
109 std::size_t m = A().size1();
110 std::size_t n = A().size2();
112 REMORA_SIZE_CHECK(x().size() == A().size2());
113 REMORA_SIZE_CHECK(y().size() == A().size1());
115 CBLAS_ORDER
const stor_ord= (CBLAS_ORDER)storage_order<typename MatA::orientation>::value;
117 auto storageA = A().raw_storage();
118 auto storagex = x().raw_storage();
119 auto storagey = y().raw_storage();
120 gemv(stor_ord, CblasNoTrans, (
int)m, (
int)n, alpha,
122 storageA.leading_dimension,
125 typename VectorY::value_type(1),
131 template<
class M,
class V1,
class V2>
132 struct has_optimized_gemv: std::integral_constant<bool,
133 allowed_cblas_type<typename M::value_type>::type::value
134 && std::is_same<typename M::value_type, typename V1::value_type>::value
135 && std::is_same<typename V1::value_type, typename V2::value_type>::value
136 && std::is_base_of<dense_tag, typename M::storage_type::storage_tag>::value
137 && std::is_base_of<dense_tag, typename V1::storage_type::storage_tag>::value
138 && std::is_base_of<dense_tag, typename V2::storage_type::storage_tag>::value