-
Notifications
You must be signed in to change notification settings - Fork 0
/
matrix_multiply.wat
134 lines (124 loc) · 3.28 KB
/
matrix_multiply.wat
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
;;4x4 matrix multiplication using SIMD instruction set
;;Created by Otto-Ville Lamminpää
;;ottoville.lamminpaa@gmail.com
;; LICENSE
;; This code can be freely used, as long as author is mentioned in credits section of the software
;; If this code, or any part of it, is included in software or set viewable to public audiance, this LICENSE section must be included with the code
(module
(import "env" "memory" (memory $mem 1 2))
(func $row
(param $v0 v128)
(param $v1 v128)
(param $v2 v128)
(param $v3 v128)
(param $pointers v128)
(result v128)
(f32x4.add
(f32x4.mul
(f32x4.splat
(f32x4.extract_lane 0
(local.get $pointers)
)
)
(local.get $v0)
)
(f32x4.mul
(f32x4.splat
(f32x4.extract_lane 1
(local.get $pointers)
)
)
(local.get $v1)
)
)
(f32x4.add
(f32x4.mul
(f32x4.splat
(f32x4.extract_lane 2
(local.get $pointers)
)
)
(local.get $v2)
)
(f32x4.mul
(f32x4.splat
(f32x4.extract_lane 3
(local.get $pointers)
)
)
(local.get $v3)
)
)
f32x4.add
)
(func $multiply
(param $0 i32)
(param $1 i32)
(param $2 i32)
(local $v0 v128)
(local $v1 v128)
(local $v2 v128)
(local $v3 v128)
local.get $2
(call $row
(local.tee $v0
(v128.load
(local.get $1)
)
)
(local.tee $v1
(v128.load offset=16
(local.get $1)
)
)
(local.tee $v2
(v128.load offset=32
(local.get $1)
)
)
(local.tee $v3
(v128.load offset=48
(local.get $1)
)
)
(v128.load
(local.get $0)
)
)
v128.store
local.get $2
(call $row
(local.get $v0)
(local.get $v1)
(local.get $v2)
(local.get $v3)
(v128.load offset=16
(local.get $0)
)
)
v128.store offset=16
local.get $2
(call $row
(local.get $v0)
(local.get $v1)
(local.get $v2)
(local.get $v3)
(v128.load offset=32
(local.get $0)
)
)
v128.store offset=32
local.get $2
(call $row
(local.get $v0)
(local.get $v1)
(local.get $v2)
(local.get $v3)
(v128.load offset=48
(local.get $0)
)
)
v128.store offset=48
)
(export "multiply" (func $multiply))
)