-
Notifications
You must be signed in to change notification settings - Fork 0
/
blas.cxx
129 lines (108 loc) · 3.54 KB
/
blas.cxx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include <iostream>
#include <stdlib.h>
#include <mpi.h>
#include <chrono>
#include <memory>
#include "utils.h"
#include "dgemm.h"
int main(int argc, char ** argv){
int rank, np;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &np);
const bool warmup(hauta::option<bool>(argc, argv, "-/warmup", true))
, vector(hauta::option<bool>(argc, argv, "--vector", false))
, holes(hauta::option<bool>(argc, argv, "--holes", false))
, initialize(hauta::option<bool>(argc, argv, "--init", false))
;
const std::string container = vector ? "std::vector" : "double[]";
const
size_t No = hauta::option<size_t>(argc, argv, "--no")
, Nv = hauta::option<size_t>(argc, argv, "--nv")
, iterations = hauta::option<size_t>(argc, argv, "-i", 1)
, avg = hauta::option<size_t>(argc, argv, "--avg", 1)
;
int m = holes ? No*No : No
, n = holes ? No : No*No
, k = holes ? No : Nv
;
Timings chrono;
Averages averages;
double *A, *B, *C;
std::vector<double> vA, vB, vC;
vA.reserve(m*k);
vB.reserve(n*k);
vC.reserve(n*m);
if (!vector) {
A = new double[m*k];
B = new double[k*n];
C = new double[m*n];
if (initialize) {
for (size_t i=0; i<m*k; i++) A[i] = 0.0;
for (size_t i=0; i<k*n; i++) B[i] = 0.0;
for (size_t i=0; i<m*n; i++) C[i] = 0.0;
}
} else {
if (initialize) {
vA.resize(m*k);
vB.resize(n*k);
vC.resize(n*m);
}
A = vA.data();
B = vB.data();
C = vC.data();
}
double one(1.0);
const double flopCount = double(2*n*m*k) * 6 * double(iterations) / 1e9;
LOG << "======= BLAS ======\n";
LOG << SHOW_VAR(container) << "\n";
LOG << SHOW_VAR(initialize) << "\n";
LOG << SHOW_VAR(np) << "\n";
LOG << SHOW_VAR(No) << "\n";
LOG << SHOW_VAR(Nv) << "\n";
LOG << SHOW_VAR(iterations) << "\n";
LOG << SHOW_VAR(flopCount) << "\n";
LOG << SHOW_VAR(holes) << "\n";
#if defined(BLIS_ARCH)
LOG << SHOW_MACRO(BLIS_ARCH) << "\n";
#endif
LOG << SHOW_MACRO(GIT_COMMIT) << "\n";
LOG << SHOW_MACRO(DATE) << "\n";
LOG << SHOW_MACRO(CONFIG) << "\n";
LOG << SHOW_MACRO(COMPILER_VERSION) << "\n";
LOG << SHOW_VAR(m) << " " << SHOW_VAR(n) << " " << SHOW_VAR(k) << "\n";
for (size_t __avg = 1; __avg <= avg ; __avg++) {
if (warmup) {
LOG << "Warming up \n";
dgemm_("N", "N", &m, &n, &k, &one, A, &m, B, &k, &one, C, &m);
}
chrono["main"].start();
for (size_t it = 0; it < iterations; it++) {
dgemm_("N", "N", &m, &n, &k, &one, A, &m, B, &k, &one, C, &m);
dgemm_("N", "N", &m, &n, &k, &one, A, &m, B, &k, &one, C, &m);
dgemm_("N", "N", &m, &n, &k, &one, A, &m, B, &k, &one, C, &m);
dgemm_("N", "N", &m, &n, &k, &one, A, &m, B, &k, &one, C, &m);
dgemm_("N", "N", &m, &n, &k, &one, A, &m, B, &k, &one, C, &m);
dgemm_("N", "N", &m, &n, &k, &one, A, &m, B, &k, &one, C, &m);
}
chrono["main"].stop();
averages["flops:main"].push(__avg * flopCount / chrono["main"].count());
}
LOG << "main: " << chrono["main"].count() << std::endl;
LOG << (holes ? "holes" : "particles") << ":flops: "
<< avg * flopCount / chrono["main"].count()
<< "\n"
;
for (auto const& a: averages)
LOG << (holes ? "holes" : "particles") << ":"
<< a.first << " "
<< a.second.count() << " ± " << a.second.sigma()
<< " :avg " << a.second.size()
<< "\n"
;
for (auto f: averages["flops:main"].values)
LOG << f << ", ";
LOG << "\n";
MPI_Finalize();
return 0;
}