2 #include <shark/LinAlg/BLAS/blas.h> 8 template<
class AMat,
class BMat,
class CMat>
10 blas::matrix_expression<AMat, blas::cpu_tag>
const& A,
11 blas::matrix_expression<BMat, blas::cpu_tag>
const& B,
12 blas::matrix_expression<CMat, blas::cpu_tag> & C
14 double minTime = std::numeric_limits<double>::max();
15 for(std::size_t i = 0; i != 10; ++i){
17 noalias(C) += prod(A,B);
18 minTime = min(minTime,time.
stop());
20 return (A().size1()*A().size2()*B().size2())/minTime/1024/1024;
23 int main(
int argc,
char **argv) {
24 std::size_t size = 100;
25 std::cout<<
"Flops"<<std::endl;
26 for(std::size_t iter = 0; iter != 5; ++iter){
27 std::size_t middle = size;
28 blas::matrix<double,blas::row_major> Arow(size,middle);
29 for(std::size_t i = 0; i != size; ++i){
30 for(std::size_t k = 0; k != middle; ++k){
31 Arow(i,k) = 0.1/size*i+0.1/size*k;
35 blas::matrix<double,blas::row_major> Brow(middle,size);
36 for(std::size_t k = 0; k != middle; ++k){
37 for(std::size_t j = 0; j != size; ++j){
38 Brow(k,j) = 0.1/size*j+0.1/size*k;
41 blas::matrix<double,blas::column_major> Acol = Arow;
42 blas::matrix<double,blas::column_major> Bcol = Brow;
44 blas::matrix<double,blas::row_major> Crow(size,size,0.0);
45 blas::matrix<double,blas::column_major> Ccol(size,size,0.0);
46 std::cout<<size<<
"\t row major results\t"<<
benchmark(Arow,Brow,Crow)<<
"\t"<<
benchmark(Acol,Brow,Crow)
48 std::cout<<size<<
"\t column major results\t"<<
benchmark(Arow,Brow,Ccol)<<
"\t"<<
benchmark(Acol,Brow,Ccol)