-
Notifications
You must be signed in to change notification settings - Fork 0
/
mul0.py
26 lines (24 loc) · 835 Bytes
/
mul0.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import numpy as np
import time
N = 4096
if __name__ == "__main__":
# numpy float64 by default
A = np.random.randn(N, N).astype(np.float32)
B = np.random.randn(N, N).astype(np.float32)
# print(A.dtype)
# for each cell, in the N^2 output, you are multiplying
# two vectors of size N. note: its actually 2N compute
# for each cell because you have to mul and then add
# these are floating point ops
flop = N*N*2*N
# now need to get number of seconds
# for i in range(100):
start = time.monotonic()
C = A @ B
end = time.monotonic()
print((flop*1e-9)/(end-start), "GFLOPs")
# M1 Pro Mac, float32 - 0.5 TFLOPs
# float64 is half as fast, and we don't need that precision
# 10 cores (8 performance and 2 efficiency)
# 16GB memory
# what is theoretical TFLOPs??