remora::kernels Namespace Reference

Namespaces

 detail
 

Functions

template<typename MatA , typename MatB , typename MatC >
void gemm (matrix_expression< MatA, gpu_tag > const &A, matrix_expression< MatB, gpu_tag > const &B, matrix_expression< MatC, gpu_tag > &C, typename MatC::value_type const &alpha)
 
template<typename MatA , typename VecX , typename VecV >
void gemv (matrix_expression< MatA, gpu_tag > const &A, vector_expression< VecX, gpu_tag > const &x, vector_expression< VecV, gpu_tag > &v, typename VecV::value_type const &alpha)
 
template<bool Upper, typename MatA , typename MatC >
void syrk (matrix_expression< MatA, gpu_tag > const &A, matrix_expression< MatC, gpu_tag > &C, typename MatC::value_type const &alpha)
 
template<bool Upper, bool Unit, typename MatA , typename MatC >
void trmm (matrix_expression< MatA, gpu_tag > const &A, matrix_expression< MatC, gpu_tag > &C)
 
template<bool Upper, bool Unit, typename MatA , typename VecV >
void trmv (matrix_expression< MatA, gpu_tag > const &A, vector_expression< VecV, gpu_tag > &v)
 
template<class Triangular , typename MatA , typename MatB >
void trsm_impl (matrix_expression< MatA, gpu_tag > const &A, matrix_expression< MatB, gpu_tag > &B, Triangular, left)
 
template<class Triangular , typename MatA , typename MatB >
void trsm_impl (matrix_expression< MatA, gpu_tag > const &A, matrix_expression< MatB, gpu_tag > &B, Triangular, right)
 
template<class Triangular , class Side , typename MatA , typename MatB >
void trsm (matrix_expression< MatA, gpu_tag > const &A, matrix_expression< MatB, gpu_tag > &B)
 
template<class Triangular , class Side , typename MatA , typename VecB >
void trsv (matrix_expression< MatA, gpu_tag > const &A, vector_expression< VecB, gpu_tag > &b)
 
template<class E1 , class E2 , class M >
void conv2d (matrix_expression< E1, cpu_tag > const &image, matrix_expression< E2, cpu_tag > const &filter, matrix_expression< M, cpu_tag > &output, std::size_t num_channels, std::size_t num_filters)
 Computes the convolution of a multi-channel image with a set of filters. More...
 
template<class E1 , class E2 , class result_type , class Device >
void dot (vector_expression< E1, Device > const &e1, vector_expression< E2, Device > const &e2, result_type &result)
 Well known dot-product r=<e1,e2>=sum_i e1_i*e2_i. More...
 
template<class M , class E1 , class E2 >
void gemm (matrix_expression< E1, cpu_tag > const &e1, matrix_expression< E2, cpu_tag > const &e2, matrix_expression< M, cpu_tag > &m, typename M::value_type alpha)
 Well known GEneral Matrix-Matrix product kernel M+=alpha*E1*E2. More...
 
template<class M , class E1 , class E2 >
void gemv (matrix_expression< E1, cpu_tag > const &e1, vector_expression< E2, cpu_tag > const &e2, vector_expression< M, cpu_tag > &m, typename M::value_type alpha)
 Well known GEneral Matrix-Vector product kernel M+=alpha*E1*e2. More...
 
template<typename MatA , typename VecP >
void getrf (matrix_expression< MatA, cpu_tag > &A, vector_expression< VecP, cpu_tag > &P)
 Implements the GEneral TRiangular matrix Factorisation GETRF. More...
 
template<bool Upper, bool Unit, typename MatA , typename MatB >
void trmm (matrix_expression< MatA, gpu_tag > const &A, matrix_expression< MatB, gpu_tag > &B)
 
template<class F , class M , class Device >
void assign (matrix_expression< M, Device > &m, typename M::value_type t)
 
template<class M , class E , class Device >
void assign (matrix_expression< M, Device > &m, matrix_expression< E, Device > const &e)
 
template<class F , class M , class E , class Device >
void assign (matrix_expression< M, Device > &m, const matrix_expression< E, Device > &e, F f=F())
 
template<class F , class M , class Device >
void matrix_fold (matrix_expression< M, Device > const &m, typename F::result_type &value)
 Applies F in any order to the elements of v and seed. More...
 
template<class Triangular , typename MatA >
std::size_t potrf (matrix_container< MatA, cpu_tag > &A)
 Implements the POsitive TRiangular matrix Factorisation POTRF. More...
 
template<class Triangular , class MatA , class VecP >
std::size_t pstrf (matrix_expression< MatA, cpu_tag > &A, vector_expression< VecP, cpu_tag > &P)
 Cholesky decomposition with full pivoting performed in place. More...
 
template<class M , class V , class Device >
void sum_rows (matrix_expression< M, Device > const &A, vector_expression< V, Device > &b, typename V::value_type alpha)
 Sums the rows of a row-major or column major matrix. More...
 
template<typename MatA , typename VectorB >
void syev (matrix_expression< MatA, cpu_tag > &matA, vector_expression< VectorB, cpu_tag > &eigenValues)
 Well known SYmmetric EigenValue function (SYEV). More...
 
template<bool Upper, class M , class E >
void syrk (matrix_expression< E, cpu_tag > const &e, matrix_expression< M, cpu_tag > &m, typename M::value_type alpha)
 Well known SYmmetric Rank-K update kernel M+=alpha*A*A^T. More...
 
template<typename MatA , typename VecB >
void tpmv (matrix_expression< MatA, cpu_tag > const &A, vector_expression< VecB, cpu_tag > &b)
 Implements the Tringular Packed Matrix-Vector multiplication(TPMV) More...
 
template<bool Upper, bool Unit, typename MatA , typename MatB >
void trmm (matrix_expression< MatA, cpu_tag > const &A, matrix_expression< MatB, cpu_tag > &B)
 Implements the TRiangular Matrix Matrix multiply. More...
 
template<bool Upper, bool Unit, typename MatA , typename VecB >
void trmv (matrix_expression< MatA, cpu_tag > const &A, vector_expression< VecB, cpu_tag > &b)
 Implements the TRiangular Solver for Vectors. More...
 
template<class Triangular , class Side , typename MatA , typename MatB >
void trsm (matrix_expression< MatA, cpu_tag > const &A, matrix_expression< MatB, cpu_tag > &B)
 Implements the TRiangular Solver for Vectors. More...
 
template<class Triangular , class Side , typename MatA , typename V >
void trsv (matrix_expression< MatA, cpu_tag > const &A, vector_expression< V, cpu_tag > &b)
 Implements the TRiangular Solver for Vectors. More...
 
template<class F , class V , class Device >
void assign (vector_expression< V, Device > &v, typename V::value_type t)
 
template<class V , class E , class Device >
void assign (vector_expression< V, Device > &v, const vector_expression< E, Device > &e)
 
template<class F , class V , class E , class Device >
void assign (vector_expression< V, Device > &v, const vector_expression< E, Device > &e, F f=F())
 
template<class F , class V , class Device >
void vector_fold (vector_expression< V, Device > const &v, typename F::result_type &value)
 Appliuees F in any order to the elements of v and a given initial value. More...
 
template<class E , class Device >
std::size_t vector_max (vector_expression< E, Device > const &e)
 Computes the index of the maximum element of a vector. More...
 

Function Documentation

◆ assign() [1/6]

template<class F , class V , class Device >
void remora::kernels::assign ( vector_expression< V, Device > &  v,
typename V::value_type  t 
)

Definition at line 40 of file vector_assign.hpp.

◆ assign() [2/6]

template<class F , class M , class Device >
void remora::kernels::assign ( matrix_expression< M, Device > &  m,
typename M::value_type  t 
)

◆ assign() [3/6]

template<class V , class E , class Device >
void remora::kernels::assign ( vector_expression< V, Device > &  v,
const vector_expression< E, Device > &  e 
)

Definition at line 50 of file vector_assign.hpp.

References remora::bindings::vector_assign().

◆ assign() [4/6]

template<class F , class V , class E , class Device >
void remora::kernels::assign ( vector_expression< V, Device > &  v,
const vector_expression< E, Device > &  e,
f = F() 
)

Definition at line 64 of file vector_assign.hpp.

References remora::bindings::vector_assign_functor().

◆ assign() [5/6]

template<class M , class E , class Device >
void remora::kernels::assign ( matrix_expression< M, Device > &  m,
matrix_expression< E, Device > const &  e 
)

Definition at line 94 of file matrix_assign.hpp.

References remora::kernels::detail::matrix_assign().

◆ assign() [6/6]

template<class F , class M , class E , class Device >
void remora::kernels::assign ( matrix_expression< M, Device > &  m,
const matrix_expression< E, Device > &  e,
f = F() 
)

◆ conv2d()

template<class E1 , class E2 , class M >
void remora::kernels::conv2d ( matrix_expression< E1, cpu_tag > const &  image,
matrix_expression< E2, cpu_tag > const &  filter,
matrix_expression< M, cpu_tag > &  output,
std::size_t  num_channels,
std::size_t  num_filters 
)

Computes the convolution of a multi-channel image with a set of filters.

Computes the result of applying k filters to an image where filters and image are allowed to have multiple images (some would call this a 3d or even 4d convolution, but we refrain from this as for two dimensions filter dimensions and image dimension must agree. E.g. it does not behave like convoluting a volume) The base for the convolution is the upper left corner and there is no boundary handling, i.e. only pixels within the image area are computed.

The image are stored block-row-wise. i.e. an image of size nxm with k channels is stored as and (n*k)x m matrix where n consecutive rows for the row of an image. Filters are stored similarly, only that in their case we have the format (n1*k*l) x m1 for a set of l filters of size n1 x m1 with k channels each. the n1 rows form a channel, k*n1 rows form a filter. the output format is stored in the same way as image just with size (l* (m-m1+1))x(n-n1+1). The caller must ensure that enough memory is stored.

Definition at line 56 of file conv2d.hpp.

References remora::bindings::conv2d().

Referenced by benchmark().

◆ dot()

template<class E1 , class E2 , class result_type , class Device >
void remora::kernels::dot ( vector_expression< E1, Device > const &  e1,
vector_expression< E2, Device > const &  e2,
result_type &  result 
)

Well known dot-product r=<e1,e2>=sum_i e1_i*e2_i.

If bindings are included and the vector combination allows for a specific binding to be applied, the binding is called automatically from {binding}/dot.h otherwise default/dot.h is used which is fully implemented for all dense/sparse combinations. if a combination is optimized, bindings::has_optimized_dot<E1,E2,R>::type evaluates to std::true_type The kernels themselves are implemented in bindings::dot.

Definition at line 48 of file dot.hpp.

References remora::bindings::dot().

Referenced by remora::bindings::gemv_impl(), remora::inner_prod(), remora::bindings::trmv_impl(), and remora::bindings::trsv_impl().

◆ gemm() [1/2]

template<typename MatA , typename MatB , typename MatC >
void remora::kernels::gemm ( matrix_expression< MatA, gpu_tag > const &  A,
matrix_expression< MatB, gpu_tag > const &  B,
matrix_expression< MatC, gpu_tag > &  C,
typename MatC::value_type const &  alpha 
)

◆ gemm() [2/2]

template<class M , class E1 , class E2 >
void remora::kernels::gemm ( matrix_expression< E1, cpu_tag > const &  e1,
matrix_expression< E2, cpu_tag > const &  e2,
matrix_expression< M, cpu_tag > &  m,
typename M::value_type  alpha 
)

Well known GEneral Matrix-Matrix product kernel M+=alpha*E1*E2.

If bindings are included and the matrix combination allow for a specific binding to be applied, the binding is called automatically from {binding}/gemm.h otherwise default/gemm.h is used which is fully implemented for all dense/sparse combinations. if a combination is optimized, bindings::has_optimized_gemm<M,E1,E2>::type evaluates to std::true_type The kernels themselves are implemented in bindings::gemm.

Definition at line 88 of file gemm.hpp.

References remora::bindings::gemm().

◆ gemv() [1/2]

template<typename MatA , typename VecX , typename VecV >
void remora::kernels::gemv ( matrix_expression< MatA, gpu_tag > const &  A,
vector_expression< VecX, gpu_tag > const &  x,
vector_expression< VecV, gpu_tag > &  v,
typename VecV::value_type const &  alpha 
)

◆ gemv() [2/2]

template<class M , class E1 , class E2 >
void remora::kernels::gemv ( matrix_expression< E1, cpu_tag > const &  e1,
vector_expression< E2, cpu_tag > const &  e2,
vector_expression< M, cpu_tag > &  m,
typename M::value_type  alpha 
)

Well known GEneral Matrix-Vector product kernel M+=alpha*E1*e2.

If bindings are included and the matrix/vector combination allows for a specific binding to be applied, the binding is called automatically from {binding}/gemv.h otherwise default/gemv.h is used which is fully implemented for all dense/sparse combinations. if a combination is optimized, bindings::has_optimized_gemv<M,E1,E2>::type evaluates to std::true_type The kernels themselves are implemented in bindings::gemv.

Definition at line 59 of file gemv.hpp.

References remora::bindings::gemv().

◆ getrf()

template<typename MatA , typename VecP >
void remora::kernels::getrf ( matrix_expression< MatA, cpu_tag > &  A,
vector_expression< VecP, cpu_tag > &  P 
)

Implements the GEneral TRiangular matrix Factorisation GETRF.

It is better known as the LU decomposition with partial row-pivoting for dense matrices. The algorithm works in place and does not require additional memory.

The algorithm computes A = P * L * U

where L is lower unit-triangular and U upper triangular.

The unit diagonal part of L is not stored explicitely. P is a permutation matrix where P(i) stores the index of the row that row i is swapped with.

Definition at line 52 of file getrf.hpp.

References remora::bindings::getrf().

Referenced by remora::pivoting_lu_decomposition< MatrixStorage >::pivoting_lu_decomposition().

◆ matrix_fold()

template<class F , class M , class Device >
void remora::kernels::matrix_fold ( matrix_expression< M, Device > const &  m,
typename F::result_type &  value 
)

Applies F in any order to the elements of v and seed.

result is the same as value =f(v_1,f(v_2,...f(v_n,value))) assuming f is commutative and associative.

Definition at line 45 of file matrix_fold.hpp.

◆ potrf()

template<class Triangular , typename MatA >
std::size_t remora::kernels::potrf ( matrix_container< MatA, cpu_tag > &  A)

Implements the POsitive TRiangular matrix Factorisation POTRF.

It is better known as the cholesky decomposition for dense matrices. The algorithm works in place and does not require additional memory.

Definition at line 55 of file potrf.hpp.

◆ pstrf()

template<class Triangular , class MatA , class VecP >
std::size_t remora::kernels::pstrf ( matrix_expression< MatA, cpu_tag > &  A,
vector_expression< VecP, cpu_tag > &  P 
)

Cholesky decomposition with full pivoting performed in place.

Given an \( m \times m \) symmetric positive semi-definite matrix \(A\), compute thes matrix \(L\) and permutation Matrix P such that \(P^TAP = LL^T \). If matrix A has rank(A) = k, the first k columns of A hold the full decomposition, while the rest of the matrix is zero. This method is slower than the cholesky decomposition without pivoting but numerically more stable. The diagonal elements are ordered such that i > j => L(i,i) >= L(j,j)

The implementation used here is described in the working paper "LAPACK-Style Codes for Level 2 and 3 Pivoted Cholesky Factorizations" http://www.netlib.org/lapack/lawnspdf/lawn161.pdf

The computation is carried out in place this means A is destroyed and replaced by L.

Parameters
A\( m \times m \) matrix, which must be symmetric and positive definite. It is replaced by L in the end.
PThe pivoting matrix of dimension \( m \)
Returns
The rank of the matrix A

Definition at line 61 of file pstrf.hpp.

References remora::bindings::pstrf().

◆ sum_rows()

template<class M , class V , class Device >
void remora::kernels::sum_rows ( matrix_expression< M, Device > const &  A,
vector_expression< V, Device > &  b,
typename V::value_type  alpha 
)

Sums the rows of a row-major or column major matrix.

This is equivalent to the operation v=1^TA where 1 is the vector of all-ones

Definition at line 57 of file sum_rows.hpp.

References remora::bindings::sum_rows().

◆ syev()

template<typename MatA , typename VectorB >
void remora::kernels::syev ( matrix_expression< MatA, cpu_tag > &  matA,
vector_expression< VectorB, cpu_tag > &  eigenValues 
)

Well known SYmmetric EigenValue function (SYEV).

A given matrix A is decomposed as A=QDQ^T where Q is an orthogonal (or unitary) matrix with QQ^T=Q^TQ=I and D are the eigenvalue of A. As A is symmetric, only the lower part of it is accessed for reading. The wholee matrix will in the end contain the eigenvectors of A and thus A is replaced by Q. Additionally the eigenvalues are stored in the second argument.

Definition at line 52 of file syev.hpp.

References remora::bindings::syev().

Referenced by remora::symm_eigenvalue_decomposition< RealMatrix >::decompose().

◆ syrk() [1/2]

template<bool Upper, typename MatA , typename MatC >
void remora::kernels::syrk ( matrix_expression< MatA, gpu_tag > const &  A,
matrix_expression< MatC, gpu_tag > &  C,
typename MatC::value_type const &  alpha 
)

Definition at line 42 of file syrk.hpp.

References remora::eval_expression().

◆ syrk() [2/2]

template<bool Upper, class M , class E >
void remora::kernels::syrk ( matrix_expression< E, cpu_tag > const &  e,
matrix_expression< M, cpu_tag > &  m,
typename M::value_type  alpha 
)

Well known SYmmetric Rank-K update kernel M+=alpha*A*A^T.

Note that it assumes M to be symmetric and it will only touch the upper or lower triangular area. If bindings are included and the matrix combination allow for a specific binding to be applied, the binding is called automatically from {binding}/syrk.h otherwise default/syrk.h is used.

Definition at line 56 of file syrk.hpp.

◆ tpmv()

template<typename MatA , typename VecB >
void remora::kernels::tpmv ( matrix_expression< MatA, cpu_tag > const &  A,
vector_expression< VecB, cpu_tag > &  b 
)

Implements the Tringular Packed Matrix-Vector multiplication(TPMV)

It computes b=A*b where A is a lower or upper packed triangular matrix.

Definition at line 54 of file tpmv.hpp.

References remora::bindings::tpmv().

◆ trmm() [1/3]

template<bool Upper, bool Unit, typename MatA , typename MatC >
void remora::kernels::trmm ( matrix_expression< MatA, gpu_tag > const &  A,
matrix_expression< MatC, gpu_tag > &  C 
)

Definition at line 42 of file trmm.hpp.

References remora::eval_expression().

◆ trmm() [2/3]

template<bool Upper, bool Unit, typename MatA , typename MatB >
void remora::kernels::trmm ( matrix_expression< MatA, cpu_tag > const &  A,
matrix_expression< MatB, cpu_tag > &  B 
)

Implements the TRiangular Matrix Matrix multiply.

It computes B=A*B in place, where A is a triangular matrix and B a dense matrix

Definition at line 54 of file trmm.hpp.

◆ trmm() [3/3]

template<bool Upper, bool Unit, typename MatA , typename MatB >
void remora::kernels::trmm ( matrix_expression< MatA, gpu_tag > const &  A,
matrix_expression< MatB, gpu_tag > &  B 
)

◆ trmv() [1/2]

template<bool Upper, bool Unit, typename MatA , typename VecV >
void remora::kernels::trmv ( matrix_expression< MatA, gpu_tag > const &  A,
vector_expression< VecV, gpu_tag > &  v 
)

Definition at line 42 of file trmv.hpp.

References remora::eval_expression().

◆ trmv() [2/2]

template<bool Upper, bool Unit, typename MatA , typename VecB >
void remora::kernels::trmv ( matrix_expression< MatA, cpu_tag > const &  A,
vector_expression< VecB, cpu_tag > &  b 
)

Implements the TRiangular Solver for Vectors.

It solves Systems of the form Ax = b where A is a square lower or upper triangular matrix. It can optionally assume that the diagonal is 1 and won't access the diagonal elements.

Definition at line 55 of file trmv.hpp.

◆ trsm() [1/2]

template<class Triangular , class Side , typename MatA , typename MatB >
void remora::kernels::trsm ( matrix_expression< MatA, cpu_tag > const &  A,
matrix_expression< MatB, cpu_tag > &  B 
)

Implements the TRiangular Solver for Vectors.

It solves Systems of the form Ax = b where A is a square lower or upper triangular matrix. It can optionally assume that the diagonal is 1 and won't access the diagonal elements.

Definition at line 56 of file trsm.hpp.

◆ trsm() [2/2]

template<class Triangular , class Side , typename MatA , typename MatB >
void remora::kernels::trsm ( matrix_expression< MatA, gpu_tag > const &  A,
matrix_expression< MatB, gpu_tag > &  B 
)

Definition at line 101 of file trsm.hpp.

References trsm_impl().

◆ trsm_impl() [1/2]

template<class Triangular , typename MatA , typename MatB >
void remora::kernels::trsm_impl ( matrix_expression< MatA, gpu_tag > const &  A,
matrix_expression< MatB, gpu_tag > &  B,
Triangular  ,
left   
)

Definition at line 43 of file trsm.hpp.

References remora::eval_expression().

Referenced by trsm(), and trsm_impl().

◆ trsm_impl() [2/2]

template<class Triangular , typename MatA , typename MatB >
void remora::kernels::trsm_impl ( matrix_expression< MatA, gpu_tag > const &  A,
matrix_expression< MatB, gpu_tag > &  B,
Triangular  ,
right   
)

Definition at line 88 of file trsm.hpp.

References trsm_impl().

◆ trsv() [1/2]

template<class Triangular , class Side , typename MatA , typename VecB >
void remora::kernels::trsv ( matrix_expression< MatA, gpu_tag > const &  A,
vector_expression< VecB, gpu_tag > &  b 
)

Definition at line 42 of file trsv.hpp.

References remora::eval_expression().

◆ trsv() [2/2]

template<class Triangular , class Side , typename MatA , typename V >
void remora::kernels::trsv ( matrix_expression< MatA, cpu_tag > const &  A,
vector_expression< V, cpu_tag > &  b 
)

Implements the TRiangular Solver for Vectors.

It solves Systems of the form Ax = b where A is a square lower or upper triangular matrix. It can optionally assume that the diagonal is 1 and won't access the diagonal elements.

Definition at line 56 of file trsv.hpp.

◆ vector_fold()

template<class F , class V , class Device >
void remora::kernels::vector_fold ( vector_expression< V, Device > const &  v,
typename F::result_type &  value 
)

Appliuees F in any order to the elements of v and a given initial value.

result is the same as value = f(v_1,f(v_2,...f(v_n,value))) assuming f is commutative and associative.

Definition at line 45 of file vector_fold.hpp.

◆ vector_max()

template<class E , class Device >
std::size_t remora::kernels::vector_max ( vector_expression< E, Device > const &  e)

Computes the index of the maximum element of a vector.

Definition at line 42 of file vector_max.hpp.

References remora::bindings::vector_max().

Referenced by remora::arg_max().