Skip to content

Commit

Permalink
Update build files in generated-src
Browse files Browse the repository at this point in the history
  • Loading branch information
hanno-becker committed Jan 15, 2024
1 parent 0380bac commit 2e3ff96
Show file tree
Hide file tree
Showing 11 changed files with 87 additions and 461 deletions.
144 changes: 0 additions & 144 deletions generated-src/ios-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-unroll8.S

Large diffs are not rendered by default.

8 changes: 0 additions & 8 deletions generated-src/ios-aarch64/crypto/fipsmodule/aesv8-gcm-armv8.S
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ _aes_gcm_enc_kernel:
aese v1.16b, v19.16b
aesmc v1.16b, v1.16b // AES block 1 - round 1
ldr q14, [x6, #48] // load h3l | h3h
ext v14.16b, v14.16b, v14.16b, #8
aese v3.16b, v18.16b
aesmc v3.16b, v3.16b // AES block 3 - round 0
aese v2.16b, v19.16b
Expand All @@ -92,14 +91,12 @@ _aes_gcm_enc_kernel:
aese v1.16b, v20.16b
aesmc v1.16b, v1.16b // AES block 1 - round 2
ldr q13, [x6, #32] // load h2l | h2h
ext v13.16b, v13.16b, v13.16b, #8
aese v3.16b, v19.16b
aesmc v3.16b, v3.16b // AES block 3 - round 1
ldr q30, [x8, #192] // load rk12
aese v2.16b, v20.16b
aesmc v2.16b, v2.16b // AES block 2 - round 2
ldr q15, [x6, #80] // load h4l | h4h
ext v15.16b, v15.16b, v15.16b, #8
aese v1.16b, v21.16b
aesmc v1.16b, v1.16b // AES block 1 - round 3
ldr q29, [x8, #176] // load rk11
Expand Down Expand Up @@ -142,7 +139,6 @@ _aes_gcm_enc_kernel:
aese v0.16b, v24.16b
aesmc v0.16b, v0.16b // AES block 0 - round 6
ldr q12, [x6] // load h1l | h1h
ext v12.16b, v12.16b, v12.16b, #8
aese v2.16b, v24.16b
aesmc v2.16b, v2.16b // AES block 2 - round 6
ldr q28, [x8, #160] // load rk10
Expand Down Expand Up @@ -843,15 +839,12 @@ _aes_gcm_dec_kernel:
aese v0.16b, v18.16b
aesmc v0.16b, v0.16b // AES block 0 - round 0
ldr q14, [x6, #48] // load h3l | h3h
ext v14.16b, v14.16b, v14.16b, #8
aese v3.16b, v18.16b
aesmc v3.16b, v3.16b // AES block 3 - round 0
ldr q15, [x6, #80] // load h4l | h4h
ext v15.16b, v15.16b, v15.16b, #8
aese v1.16b, v18.16b
aesmc v1.16b, v1.16b // AES block 1 - round 0
ldr q13, [x6, #32] // load h2l | h2h
ext v13.16b, v13.16b, v13.16b, #8
aese v2.16b, v18.16b
aesmc v2.16b, v2.16b // AES block 2 - round 0
ldr q20, [x8, #32] // load rk2
Expand All @@ -871,7 +864,6 @@ _aes_gcm_dec_kernel:
aese v0.16b, v20.16b
aesmc v0.16b, v0.16b // AES block 0 - round 2
ldr q12, [x6] // load h1l | h1h
ext v12.16b, v12.16b, v12.16b, #8
aese v2.16b, v20.16b
aesmc v2.16b, v2.16b // AES block 2 - round 2
ldr q28, [x8, #160] // load rk10
Expand Down
24 changes: 23 additions & 1 deletion generated-src/ios-aarch64/crypto/fipsmodule/ghashv8-armx.S
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ _gcm_init_v8:
and v16.16b,v16.16b,v17.16b
orr v3.16b,v3.16b,v18.16b //H<<<=1
eor v20.16b,v3.16b,v16.16b //twisted H
ext v20.16b, v20.16b, v20.16b, #8
st1 {v20.2d},[x0],#16 //store Htable[0]
ext v20.16b, v20.16b, v20.16b, #8

//calculate H^2
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
Expand All @@ -57,7 +59,10 @@ _gcm_init_v8:
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
eor v17.16b,v17.16b,v22.16b
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
st1 {v21.2d,v22.2d},[x0],#32 //store Htable[1..2]
st1 {v21.2d},[x0],#16 //store Htable[1..2]
ext v22.16b, v22.16b, v22.16b, #8
st1 {v22.2d},[x0],#16 //store Htable[1..2]
ext v22.16b, v22.16b, v22.16b, #8
//calculate H^3 and H^4
pmull v0.1q,v20.1d, v22.1d
pmull v5.1q,v22.1d,v22.1d
Expand Down Expand Up @@ -101,7 +106,11 @@ _gcm_init_v8:
eor v17.16b,v17.16b,v25.16b
eor v18.16b,v18.16b,v22.16b
ext v24.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
ext v23.16b, v23.16b, v23.16b, #8
ext v25.16b, v25.16b, v25.16b, #8
st1 {v23.2d,v24.2d,v25.2d},[x0],#48 //store Htable[3..5]
ext v23.16b, v23.16b, v23.16b, #8
ext v25.16b, v25.16b, v25.16b, #8

//calculate H^5 and H^6
pmull v0.1q,v22.1d, v23.1d
Expand Down Expand Up @@ -145,7 +154,11 @@ _gcm_init_v8:
eor v17.16b,v17.16b,v28.16b
eor v18.16b,v18.16b,v22.16b
ext v27.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
ext v26.16b, v26.16b, v26.16b, #8
ext v28.16b, v28.16b, v28.16b, #8
st1 {v26.2d,v27.2d,v28.2d},[x0],#48 //store Htable[6..8]
ext v26.16b, v26.16b, v26.16b, #8
ext v28.16b, v28.16b, v28.16b, #8

//calculate H^7 and H^8
pmull v0.1q,v22.1d,v26.1d
Expand Down Expand Up @@ -187,6 +200,8 @@ _gcm_init_v8:
eor v16.16b,v16.16b,v29.16b
eor v17.16b,v17.16b,v31.16b
ext v30.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
ext v29.16b, v29.16b, v29.16b, #8
ext v31.16b, v31.16b, v31.16b, #8
st1 {v29.2d,v30.2d,v31.2d},[x0] //store Htable[9..11]
ret

Expand All @@ -199,6 +214,7 @@ _gcm_gmult_v8:
ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
ext v20.16b,v20.16b,v20.16b,#8
shl v19.2d,v19.2d,#57
#ifndef __AARCH64EB__
rev64 v17.16b,v17.16b
Expand Down Expand Up @@ -258,8 +274,10 @@ _gcm_ghash_v8:
//loaded twice, but last
//copy is not processed
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
ext v20.16b,v20.16b,v20.16b,#8
movi v19.16b,#0xe1
ld1 {v22.2d},[x1]
ext v22.16b,v22.16b,v22.16b,#8
csel x12,xzr,x12,eq //is it time to zero x12?
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
Expand Down Expand Up @@ -373,8 +391,12 @@ gcm_ghash_v8_4x:
Lgcm_ghash_v8_4x:
ld1 {v0.2d},[x0] //load [rotated] Xi
ld1 {v20.2d,v21.2d,v22.2d},[x1],#48 //load twisted H, ..., H^2
ext v20.16b,v20.16b,v20.16b,#8
ext v22.16b,v22.16b,v22.16b,#8
movi v19.16b,#0xe1
ld1 {v26.2d,v27.2d,v28.2d},[x1] //load twisted H^3, ..., H^4
ext v26.16b,v26.16b,v26.16b,#8
ext v28.16b,v28.16b,v28.16b,#8
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant

ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
Expand Down
10 changes: 9 additions & 1 deletion generated-src/ios-arm/crypto/fipsmodule/ghashv8-armx.S
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ _gcm_init_v8:
vand q8,q8,q9
vorr q3,q3,q10 @ H<<<=1
veor q12,q3,q8 @ twisted H
vext.8 q12, q12, q12, #8
vst1.64 {q12},[r0]! @ store Htable[0]
vext.8 q12, q12, q12, #8

@ calculate H^2
vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing
Expand All @@ -61,7 +63,10 @@ _gcm_init_v8:
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
veor q9,q9,q14
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
vst1.64 {q13,q14},[r0]! @ store Htable[1..2]
vst1.64 {q13},[r0]! @ store Htable[1..2]
vext.8 q14, q14, q14, #8
vst1.64 {q14},[r0]! @ store Htable[1..2]
vext.8 q14, q14, q14, #8
bx lr

.globl _gcm_gmult_v8
Expand All @@ -75,6 +80,7 @@ _gcm_gmult_v8:
vld1.64 {q9},[r0] @ load Xi
vmov.i8 q11,#0xe1
vld1.64 {q12,q13},[r1] @ load twisted H, ...
vext.8 q12,q12,q12,#8
vshl.u64 q11,q11,#57
#ifndef __ARMEB__
vrev64.8 q9,q9
Expand Down Expand Up @@ -135,8 +141,10 @@ _gcm_ghash_v8:
@ loaded twice, but last
@ copy is not processed
vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2
vext.8 q12,q12,q12,#8
vmov.i8 q11,#0xe1
vld1.64 {q14},[r1]
vext.8 q14,q14,q14,#8
moveq r12,#0 @ is it time to zero r12?
vext.8 q0,q0,q0,#8 @ rotate Xi
vld1.64 {q8},[r2]! @ load [rotated] I[0]
Expand Down
Loading

0 comments on commit 2e3ff96

Please sign in to comment.