forked from minio/sha256-simd
-
Notifications
You must be signed in to change notification settings - Fork 0
/
sha256blockIntelSha_amd64.s
194 lines (190 loc) · 11.4 KB
/
sha256blockIntelSha_amd64.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
//+build !noasm !appengine
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
DATA LCDATA1<>+0x000(SB)/8, $0x0405060700010203
DATA LCDATA1<>+0x008(SB)/8, $0x0c0d0e0f08090a0b
DATA LCDATA1<>+0x010(SB)/8, $0x71374491428a2f98
DATA LCDATA1<>+0x018(SB)/8, $0xe9b5dba5b5c0fbcf
DATA LCDATA1<>+0x020(SB)/8, $0x59f111f13956c25b
DATA LCDATA1<>+0x028(SB)/8, $0xab1c5ed5923f82a4
DATA LCDATA1<>+0x030(SB)/8, $0x12835b01d807aa98
DATA LCDATA1<>+0x038(SB)/8, $0x550c7dc3243185be
DATA LCDATA1<>+0x040(SB)/8, $0x80deb1fe72be5d74
DATA LCDATA1<>+0x048(SB)/8, $0xc19bf1749bdc06a7
DATA LCDATA1<>+0x050(SB)/8, $0xefbe4786e49b69c1
DATA LCDATA1<>+0x058(SB)/8, $0x240ca1cc0fc19dc6
DATA LCDATA1<>+0x060(SB)/8, $0x4a7484aa2de92c6f
DATA LCDATA1<>+0x068(SB)/8, $0x76f988da5cb0a9dc
DATA LCDATA1<>+0x070(SB)/8, $0xa831c66d983e5152
DATA LCDATA1<>+0x078(SB)/8, $0xbf597fc7b00327c8
DATA LCDATA1<>+0x080(SB)/8, $0xd5a79147c6e00bf3
DATA LCDATA1<>+0x088(SB)/8, $0x1429296706ca6351
DATA LCDATA1<>+0x090(SB)/8, $0x2e1b213827b70a85
DATA LCDATA1<>+0x098(SB)/8, $0x53380d134d2c6dfc
DATA LCDATA1<>+0x0a0(SB)/8, $0x766a0abb650a7354
DATA LCDATA1<>+0x0a8(SB)/8, $0x92722c8581c2c92e
DATA LCDATA1<>+0x0b0(SB)/8, $0xa81a664ba2bfe8a1
DATA LCDATA1<>+0x0b8(SB)/8, $0xc76c51a3c24b8b70
DATA LCDATA1<>+0x0c0(SB)/8, $0xd6990624d192e819
DATA LCDATA1<>+0x0c8(SB)/8, $0x106aa070f40e3585
DATA LCDATA1<>+0x0d0(SB)/8, $0x1e376c0819a4c116
DATA LCDATA1<>+0x0d8(SB)/8, $0x34b0bcb52748774c
DATA LCDATA1<>+0x0e0(SB)/8, $0x4ed8aa4a391c0cb3
DATA LCDATA1<>+0x0e8(SB)/8, $0x682e6ff35b9cca4f
DATA LCDATA1<>+0x0f0(SB)/8, $0x78a5636f748f82ee
DATA LCDATA1<>+0x0f8(SB)/8, $0x8cc7020884c87814
DATA LCDATA1<>+0x100(SB)/8, $0xa4506ceb90befffa
DATA LCDATA1<>+0x108(SB)/8, $0xc67178f2bef9a3f7
GLOBL LCDATA1<>(SB), 8, $272
TEXT ·blockIntelSha(SB), $0-24
MOVQ h+0(FP), DI
MOVQ data+8(FP), SI
MOVQ length+16(FP), DX
LEAQ LCDATA1<>(SB), BP
LONG $0x10478d48 // lea rax, [rdi + 16]
LONG $0x0770f9c5; BYTE $0xb1 // vpshufd xmm0, oword [rdi], 177
LONG $0x4f70f9c5; WORD $0x1b10 // vpshufd xmm1, oword [rdi + 16], 27
LONG $0x0f7963c4; WORD $0x08e9 // vpalignr xmm13, xmm0, xmm1, 8
LONG $0x0e7163c4; WORD $0xf0e0 // vpblendw xmm12, xmm1, xmm0, 240
WORD $0xfa83; BYTE $0x40 // cmp edx, 64
JB LBB0_3
LONG $0x456f79c5; BYTE $0x00 // vmovdqa xmm8, oword 0[rbp] /* [rip + .LCPI0_0] */
QUAD $0x000000e08d6f79c5 // vmovdqa xmm9, oword 224[rbp] /* [rip + .LCPI0_14] */
QUAD $0x000000f0956f79c5 // vmovdqa xmm10, oword 240[rbp] /* [rip + .LCPI0_15] */
QUAD $0x000001009d6f79c5 // vmovdqa xmm11, oword 256[rbp] /* [rip + .LCPI0_16] */
LBB0_2:
LONG $0x066ffac5 // vmovdqu xmm0, oword [rsi]
LONG $0x666ffac5; BYTE $0x10 // vmovdqu xmm4, oword [rsi + 16]
LONG $0x6e6ffac5; BYTE $0x20 // vmovdqu xmm5, oword [rsi + 32]
LONG $0x5e6ffac5; BYTE $0x30 // vmovdqu xmm3, oword [rsi + 48]
LONG $0x0079c2c4; BYTE $0xf8 // vpshufb xmm7, xmm0, xmm8
LONG $0x45fec1c5; BYTE $0x10 // vpaddd xmm0, xmm7, oword 16[rbp] /* [rip + .LCPI0_1] */
LONG $0x6f79c1c4; BYTE $0xd4 // vmovdqa xmm2, xmm12
LONG $0xcb380f41; BYTE $0xd5 // sha256rnds2 xmm2, xmm13
LONG $0xc070f9c5; BYTE $0x0e // vpshufd xmm0, xmm0, 14
LONG $0x6f79c1c4; BYTE $0xcd // vmovdqa xmm1, xmm13
LONG $0xcacb380f // sha256rnds2 xmm1, xmm2
LONG $0x0059c2c4; BYTE $0xf0 // vpshufb xmm6, xmm4, xmm8
LONG $0x45fec9c5; BYTE $0x20 // vpaddd xmm0, xmm6, oword 32[rbp] /* [rip + .LCPI0_2] */
LONG $0xd1cb380f // sha256rnds2 xmm2, xmm1
LONG $0xc070f9c5; BYTE $0x0e // vpshufd xmm0, xmm0, 14
LONG $0xcacb380f // sha256rnds2 xmm1, xmm2
LONG $0xfecc380f // sha256msg1 xmm7, xmm6
LONG $0x0051c2c4; BYTE $0xe0 // vpshufb xmm4, xmm5, xmm8
LONG $0x45fed9c5; BYTE $0x30 // vpaddd xmm0, xmm4, oword 48[rbp] /* [rip + .LCPI0_3] */
LONG $0xd1cb380f // sha256rnds2 xmm2, xmm1
LONG $0xc070f9c5; BYTE $0x0e // vpshufd xmm0, xmm0, 14
LONG $0xcacb380f // sha256rnds2 xmm1, xmm2
LONG $0xf4cc380f // sha256msg1 xmm6, xmm4
LONG $0x0061c2c4; BYTE $0xe8 // vpshufb xmm5, xmm3, xmm8
LONG $0x45fed1c5; BYTE $0x40 // vpaddd xmm0, xmm5, oword 64[rbp] /* [rip + .LCPI0_4] */
LONG $0xd1cb380f // sha256rnds2 xmm2, xmm1
LONG $0x0f51e3c4; WORD $0x04dc // vpalignr xmm3, xmm5, xmm4, 4
LONG $0xf7fe61c5 // vpaddd xmm14, xmm3, xmm7
LONG $0xcd380f44; BYTE $0xf5 // sha256msg2 xmm14, xmm5
LONG $0xc070f9c5; BYTE $0x0e // vpshufd xmm0, xmm0, 14
LONG $0xcacb380f // sha256rnds2 xmm1, xmm2
LONG $0xe5cc380f // sha256msg1 xmm4, xmm5
LONG $0x45fe89c5; BYTE $0x50 // vpaddd xmm0, xmm14, oword 80[rbp] /* [rip + .LCPI0_5] */
LONG $0xd1cb380f // sha256rnds2 xmm2, xmm1
LONG $0x0f09e3c4; WORD $0x04dd // vpalignr xmm3, xmm14, xmm5, 4
LONG $0xfefe61c5 // vpaddd xmm15, xmm3, xmm6
LONG $0xcd380f45; BYTE $0xfe // sha256msg2 xmm15, xmm14
LONG $0xc070f9c5; BYTE $0x0e // vpshufd xmm0, xmm0, 14
LONG $0xcacb380f // sha256rnds2 xmm1, xmm2
LONG $0xcc380f41; BYTE $0xee // sha256msg1 xmm5, xmm14
LONG $0x45fe81c5; BYTE $0x60 // vpaddd xmm0, xmm15, oword 96[rbp] /* [rip + .LCPI0_6] */
LONG $0xd1cb380f // sha256rnds2 xmm2, xmm1
LONG $0x0f01c3c4; WORD $0x04de // vpalignr xmm3, xmm15, xmm14, 4
LONG $0xe4fee1c5 // vpaddd xmm4, xmm3, xmm4
LONG $0xcd380f41; BYTE $0xe7 // sha256msg2 xmm4, xmm15
LONG $0xc070f9c5; BYTE $0x0e // vpshufd xmm0, xmm0, 14
LONG $0xcacb380f // sha256rnds2 xmm1, xmm2
LONG $0xcc380f45; BYTE $0xf7 // sha256msg1 xmm14, xmm15
LONG $0x45fed9c5; BYTE $0x70 // vpaddd xmm0, xmm4, oword 112[rbp] /* [rip + .LCPI0_7] */
LONG $0xd1cb380f // sha256rnds2 xmm2, xmm1
LONG $0x0f59c3c4; WORD $0x04df // vpalignr xmm3, xmm4, xmm15, 4
LONG $0xedfee1c5 // vpaddd xmm5, xmm3, xmm5
LONG $0xeccd380f // sha256msg2 xmm5, xmm4
LONG $0xc070f9c5; BYTE $0x0e // vpshufd xmm0, xmm0, 14
LONG $0xcacb380f // sha256rnds2 xmm1, xmm2
LONG $0xcc380f44; BYTE $0xfc // sha256msg1 xmm15, xmm4
QUAD $0x0000008085fed1c5 // vpaddd xmm0, xmm5, oword 128[rbp] /* [rip + .LCPI0_8] */
LONG $0xd1cb380f // sha256rnds2 xmm2, xmm1
LONG $0x0f51e3c4; WORD $0x04dc // vpalignr xmm3, xmm5, xmm4, 4
LONG $0xfe61c1c4; BYTE $0xf6 // vpaddd xmm6, xmm3, xmm14
LONG $0xf5cd380f // sha256msg2 xmm6, xmm5
LONG $0xc070f9c5; BYTE $0x0e // vpshufd xmm0, xmm0, 14
LONG $0xcacb380f // sha256rnds2 xmm1, xmm2
LONG $0xe5cc380f // sha256msg1 xmm4, xmm5
QUAD $0x0000009085fec9c5 // vpaddd xmm0, xmm6, oword 144[rbp] /* [rip + .LCPI0_9] */
LONG $0xd1cb380f // sha256rnds2 xmm2, xmm1
LONG $0x0f49e3c4; WORD $0x04dd // vpalignr xmm3, xmm6, xmm5, 4
LONG $0xfe61c1c4; BYTE $0xff // vpaddd xmm7, xmm3, xmm15
LONG $0xfecd380f // sha256msg2 xmm7, xmm6
LONG $0xc070f9c5; BYTE $0x0e // vpshufd xmm0, xmm0, 14
LONG $0xcacb380f // sha256rnds2 xmm1, xmm2
LONG $0xeecc380f // sha256msg1 xmm5, xmm6
QUAD $0x000000a085fec1c5 // vpaddd xmm0, xmm7, oword 160[rbp] /* [rip + .LCPI0_10] */
LONG $0xd1cb380f // sha256rnds2 xmm2, xmm1
LONG $0x0f41e3c4; WORD $0x04de // vpalignr xmm3, xmm7, xmm6, 4
LONG $0xe4fee1c5 // vpaddd xmm4, xmm3, xmm4
LONG $0xe7cd380f // sha256msg2 xmm4, xmm7
LONG $0xc070f9c5; BYTE $0x0e // vpshufd xmm0, xmm0, 14
LONG $0xcacb380f // sha256rnds2 xmm1, xmm2
LONG $0xf7cc380f // sha256msg1 xmm6, xmm7
QUAD $0x000000b085fed9c5 // vpaddd xmm0, xmm4, oword 176[rbp] /* [rip + .LCPI0_11] */
LONG $0xd1cb380f // sha256rnds2 xmm2, xmm1
LONG $0x0f59e3c4; WORD $0x04df // vpalignr xmm3, xmm4, xmm7, 4
LONG $0xedfee1c5 // vpaddd xmm5, xmm3, xmm5
LONG $0xeccd380f // sha256msg2 xmm5, xmm4
LONG $0xc070f9c5; BYTE $0x0e // vpshufd xmm0, xmm0, 14
LONG $0xcacb380f // sha256rnds2 xmm1, xmm2
LONG $0xfccc380f // sha256msg1 xmm7, xmm4
QUAD $0x000000c085fed1c5 // vpaddd xmm0, xmm5, oword 192[rbp] /* [rip + .LCPI0_12] */
LONG $0xd1cb380f // sha256rnds2 xmm2, xmm1
LONG $0x0f51e3c4; WORD $0x04dc // vpalignr xmm3, xmm5, xmm4, 4
LONG $0xdefee1c5 // vpaddd xmm3, xmm3, xmm6
LONG $0xddcd380f // sha256msg2 xmm3, xmm5
LONG $0xc070f9c5; BYTE $0x0e // vpshufd xmm0, xmm0, 14
LONG $0xcacb380f // sha256rnds2 xmm1, xmm2
QUAD $0x000000d085fee1c5 // vpaddd xmm0, xmm3, oword 208[rbp] /* [rip + .LCPI0_13] */
LONG $0xd1cb380f // sha256rnds2 xmm2, xmm1
LONG $0x0f61e3c4; WORD $0x04f5 // vpalignr xmm6, xmm3, xmm5, 4
LONG $0xf7fec9c5 // vpaddd xmm6, xmm6, xmm7
LONG $0xf3cd380f // sha256msg2 xmm6, xmm3
LONG $0xc070f9c5; BYTE $0x0e // vpshufd xmm0, xmm0, 14
LONG $0xcacb380f // sha256rnds2 xmm1, xmm2
LONG $0xfe49c1c4; BYTE $0xc1 // vpaddd xmm0, xmm6, xmm9
LONG $0xd1cb380f // sha256rnds2 xmm2, xmm1
LONG $0xe5cc380f // sha256msg1 xmm4, xmm5
LONG $0xebcc380f // sha256msg1 xmm5, xmm3
LONG $0x0f49e3c4; WORD $0x04db // vpalignr xmm3, xmm6, xmm3, 4
LONG $0xdcfee1c5 // vpaddd xmm3, xmm3, xmm4
LONG $0xdecd380f // sha256msg2 xmm3, xmm6
LONG $0xc070f9c5; BYTE $0x0e // vpshufd xmm0, xmm0, 14
LONG $0xcacb380f // sha256rnds2 xmm1, xmm2
LONG $0xfe61c1c4; BYTE $0xc2 // vpaddd xmm0, xmm3, xmm10
LONG $0xd1cb380f // sha256rnds2 xmm2, xmm1
LONG $0x0f61e3c4; WORD $0x04e6 // vpalignr xmm4, xmm3, xmm6, 4
LONG $0xe5fed9c5 // vpaddd xmm4, xmm4, xmm5
LONG $0xe3cd380f // sha256msg2 xmm4, xmm3
LONG $0xc070f9c5; BYTE $0x0e // vpshufd xmm0, xmm0, 14
LONG $0xcacb380f // sha256rnds2 xmm1, xmm2
LONG $0xfe59c1c4; BYTE $0xc3 // vpaddd xmm0, xmm4, xmm11
LONG $0xd1cb380f // sha256rnds2 xmm2, xmm1
LONG $0xc070f9c5; BYTE $0x0e // vpshufd xmm0, xmm0, 14
LONG $0xcacb380f // sha256rnds2 xmm1, xmm2
LONG $0xfe7141c4; BYTE $0xed // vpaddd xmm13, xmm1, xmm13
LONG $0xfe6941c4; BYTE $0xe4 // vpaddd xmm12, xmm2, xmm12
WORD $0xc283; BYTE $0xc0 // add edx, -64
LONG $0x40c68348 // add rsi, 64
WORD $0xfa83; BYTE $0x3f // cmp edx, 63
JA LBB0_2
LBB0_3:
LONG $0x7079c1c4; WORD $0x1bc5 // vpshufd xmm0, xmm13, 27
LONG $0x7079c1c4; WORD $0xb1cc // vpshufd xmm1, xmm12, 177
LONG $0x0e79e3c4; WORD $0xf0d1 // vpblendw xmm2, xmm0, xmm1, 240
LONG $0x0f71e3c4; WORD $0x08c0 // vpalignr xmm0, xmm1, xmm0, 8
LONG $0x177ffac5 // vmovdqu oword [rdi], xmm2
LONG $0x007ffac5 // vmovdqu oword [rax], xmm0
RET