Skip to content

Commit

Permalink
Update build files in generated-src
Browse files Browse the repository at this point in the history
  • Loading branch information
hanno-becker committed Mar 21, 2024
1 parent 8e421a3 commit 0c86c7d
Show file tree
Hide file tree
Showing 11 changed files with 145 additions and 558 deletions.
144 changes: 0 additions & 144 deletions generated-src/ios-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-unroll8.S

Large diffs are not rendered by default.

8 changes: 0 additions & 8 deletions generated-src/ios-aarch64/crypto/fipsmodule/aesv8-gcm-armv8.S
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ _aes_gcm_enc_kernel:
aese v1.16b, v19.16b
aesmc v1.16b, v1.16b // AES block 1 - round 1
ldr q14, [x6, #48] // load h3l | h3h
ext v14.16b, v14.16b, v14.16b, #8
aese v3.16b, v18.16b
aesmc v3.16b, v3.16b // AES block 3 - round 0
aese v2.16b, v19.16b
Expand All @@ -92,14 +91,12 @@ _aes_gcm_enc_kernel:
aese v1.16b, v20.16b
aesmc v1.16b, v1.16b // AES block 1 - round 2
ldr q13, [x6, #32] // load h2l | h2h
ext v13.16b, v13.16b, v13.16b, #8
aese v3.16b, v19.16b
aesmc v3.16b, v3.16b // AES block 3 - round 1
ldr q30, [x8, #192] // load rk12
aese v2.16b, v20.16b
aesmc v2.16b, v2.16b // AES block 2 - round 2
ldr q15, [x6, #80] // load h4l | h4h
ext v15.16b, v15.16b, v15.16b, #8
aese v1.16b, v21.16b
aesmc v1.16b, v1.16b // AES block 1 - round 3
ldr q29, [x8, #176] // load rk11
Expand Down Expand Up @@ -142,7 +139,6 @@ _aes_gcm_enc_kernel:
aese v0.16b, v24.16b
aesmc v0.16b, v0.16b // AES block 0 - round 6
ldr q12, [x6] // load h1l | h1h
ext v12.16b, v12.16b, v12.16b, #8
aese v2.16b, v24.16b
aesmc v2.16b, v2.16b // AES block 2 - round 6
ldr q28, [x8, #160] // load rk10
Expand Down Expand Up @@ -843,15 +839,12 @@ _aes_gcm_dec_kernel:
aese v0.16b, v18.16b
aesmc v0.16b, v0.16b // AES block 0 - round 0
ldr q14, [x6, #48] // load h3l | h3h
ext v14.16b, v14.16b, v14.16b, #8
aese v3.16b, v18.16b
aesmc v3.16b, v3.16b // AES block 3 - round 0
ldr q15, [x6, #80] // load h4l | h4h
ext v15.16b, v15.16b, v15.16b, #8
aese v1.16b, v18.16b
aesmc v1.16b, v1.16b // AES block 1 - round 0
ldr q13, [x6, #32] // load h2l | h2h
ext v13.16b, v13.16b, v13.16b, #8
aese v2.16b, v18.16b
aesmc v2.16b, v2.16b // AES block 2 - round 0
ldr q20, [x8, #32] // load rk2
Expand All @@ -871,7 +864,6 @@ _aes_gcm_dec_kernel:
aese v0.16b, v20.16b
aesmc v0.16b, v0.16b // AES block 0 - round 2
ldr q12, [x6] // load h1l | h1h
ext v12.16b, v12.16b, v12.16b, #8
aese v2.16b, v20.16b
aesmc v2.16b, v2.16b // AES block 2 - round 2
ldr q28, [x8, #160] // load rk10
Expand Down
71 changes: 41 additions & 30 deletions generated-src/ios-aarch64/crypto/fipsmodule/ghashv8-armx.S
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,14 @@ _gcm_init_v8:
and v16.16b,v16.16b,v17.16b
orr v3.16b,v3.16b,v18.16b //H<<<=1
eor v20.16b,v3.16b,v16.16b //twisted H
ext v20.16b, v20.16b, v20.16b, #8
st1 {v20.2d},[x0],#16 //store Htable[0]

//calculate H^2
//calculate H^2
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
pmull v0.1q,v20.1d,v20.1d
pmull2 v0.1q,v20.2d,v20.2d
eor v16.16b,v16.16b,v20.16b
pmull2 v2.1q,v20.2d,v20.2d
pmull v2.1q,v20.1d,v20.1d
pmull v1.1q,v16.1d,v16.1d

ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
Expand All @@ -52,17 +53,19 @@ _gcm_init_v8:
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v22.16b,v0.16b,v18.16b
eor v17.16b,v0.16b,v18.16b

ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
ext v22.16b,v17.16b,v17.16b,#8 //Karatsuba pre-processing
eor v17.16b,v17.16b,v22.16b
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
st1 {v21.2d,v22.2d},[x0],#32 //store Htable[1..2]
st1 {v21.2d},[x0],#16 //store Htable[1..2]
st1 {v22.2d},[x0],#16 //store Htable[1..2]

//calculate H^3 and H^4
pmull v0.1q,v20.1d, v22.1d
pmull v5.1q,v22.1d,v22.1d
pmull2 v2.1q,v20.2d, v22.2d
pmull2 v7.1q,v22.2d,v22.2d
pmull2 v0.1q,v20.2d, v22.2d
pmull2 v5.1q,v22.2d,v22.2d
pmull v2.1q,v20.1d, v22.1d
pmull v7.1q,v22.1d,v22.1d
pmull v1.1q,v16.1d,v17.1d
pmull v6.1q,v17.1d,v17.1d

Expand Down Expand Up @@ -91,11 +94,11 @@ _gcm_init_v8:
eor v18.16b,v18.16b,v2.16b
eor v4.16b,v4.16b,v7.16b

eor v23.16b, v0.16b,v18.16b //H^3
eor v25.16b,v5.16b,v4.16b //H^4
eor v16.16b, v0.16b,v18.16b //H^3
eor v17.16b, v5.16b,v4.16b //H^4

ext v16.16b,v23.16b, v23.16b,#8 //Karatsuba pre-processing
ext v17.16b,v25.16b,v25.16b,#8
ext v23.16b,v16.16b,v16.16b,#8 //Karatsuba pre-processing
ext v25.16b,v17.16b,v17.16b,#8
ext v18.16b,v22.16b,v22.16b,#8
eor v16.16b,v16.16b,v23.16b
eor v17.16b,v17.16b,v25.16b
Expand All @@ -104,10 +107,10 @@ _gcm_init_v8:
st1 {v23.2d,v24.2d,v25.2d},[x0],#48 //store Htable[3..5]

//calculate H^5 and H^6
pmull v0.1q,v22.1d, v23.1d
pmull v5.1q,v23.1d,v23.1d
pmull2 v2.1q,v22.2d, v23.2d
pmull2 v7.1q,v23.2d,v23.2d
pmull2 v0.1q,v22.2d, v23.2d
pmull2 v5.1q,v23.2d,v23.2d
pmull v2.1q,v22.1d, v23.1d
pmull v7.1q,v23.1d,v23.1d
pmull v1.1q,v16.1d,v18.1d
pmull v6.1q,v16.1d,v16.1d

Expand Down Expand Up @@ -135,11 +138,12 @@ _gcm_init_v8:
pmull v5.1q,v5.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v4.16b,v4.16b,v7.16b
eor v26.16b,v0.16b,v18.16b //H^5
eor v28.16b,v5.16b,v4.16b //H^6

ext v16.16b,v26.16b, v26.16b,#8 //Karatsuba pre-processing
ext v17.16b,v28.16b,v28.16b,#8
eor v16.16b,v0.16b,v18.16b //H^5
eor v17.16b,v5.16b,v4.16b //H^6

ext v26.16b, v16.16b, v16.16b,#8 //Karatsuba pre-processing
ext v28.16b, v17.16b, v17.16b,#8
ext v18.16b,v22.16b,v22.16b,#8
eor v16.16b,v16.16b,v26.16b
eor v17.16b,v17.16b,v28.16b
Expand All @@ -148,10 +152,10 @@ _gcm_init_v8:
st1 {v26.2d,v27.2d,v28.2d},[x0],#48 //store Htable[6..8]

//calculate H^7 and H^8
pmull v0.1q,v22.1d,v26.1d
pmull v5.1q,v22.1d,v28.1d
pmull2 v2.1q,v22.2d,v26.2d
pmull2 v7.1q,v22.2d,v28.2d
pmull2 v0.1q,v22.2d,v26.2d
pmull2 v5.1q,v22.2d,v28.2d
pmull v2.1q,v22.1d,v26.1d
pmull v7.1q,v22.1d,v28.1d
pmull v1.1q,v16.1d,v18.1d
pmull v6.1q,v17.1d,v18.1d

Expand Down Expand Up @@ -179,11 +183,11 @@ _gcm_init_v8:
pmull v5.1q,v5.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v4.16b,v4.16b,v7.16b
eor v29.16b,v0.16b,v18.16b //H^7
eor v31.16b,v5.16b,v4.16b //H^8
eor v16.16b,v0.16b,v18.16b //H^7
eor v17.16b,v5.16b,v4.16b //H^8

ext v16.16b,v29.16b,v29.16b,#8 //Karatsuba pre-processing
ext v17.16b,v31.16b,v31.16b,#8
ext v29.16b,v16.16b,v16.16b,#8 //Karatsuba pre-processing
ext v31.16b,v17.16b,v17.16b,#8
eor v16.16b,v16.16b,v29.16b
eor v17.16b,v17.16b,v31.16b
ext v30.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
Expand All @@ -199,6 +203,7 @@ _gcm_gmult_v8:
ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
ext v20.16b,v20.16b,v20.16b,#8
shl v19.2d,v19.2d,#57
#ifndef __AARCH64EB__
rev64 v17.16b,v17.16b
Expand Down Expand Up @@ -258,8 +263,10 @@ _gcm_ghash_v8:
//loaded twice, but last
//copy is not processed
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
ext v20.16b,v20.16b,v20.16b,#8
movi v19.16b,#0xe1
ld1 {v22.2d},[x1]
ext v22.16b,v22.16b,v22.16b,#8
csel x12,xzr,x12,eq //is it time to zero x12?
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
Expand Down Expand Up @@ -373,8 +380,12 @@ gcm_ghash_v8_4x:
Lgcm_ghash_v8_4x:
ld1 {v0.2d},[x0] //load [rotated] Xi
ld1 {v20.2d,v21.2d,v22.2d},[x1],#48 //load twisted H, ..., H^2
ext v20.16b,v20.16b,v20.16b,#8
ext v22.16b,v22.16b,v22.16b,#8
movi v19.16b,#0xe1
ld1 {v26.2d,v27.2d,v28.2d},[x1] //load twisted H^3, ..., H^4
ext v26.16b,v26.16b,v26.16b,#8
ext v28.16b,v28.16b,v28.16b,#8
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant

ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
Expand Down
17 changes: 11 additions & 6 deletions generated-src/ios-arm/crypto/fipsmodule/ghashv8-armx.S
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,14 @@ _gcm_init_v8:
vand q8,q8,q9
vorr q3,q3,q10 @ H<<<=1
veor q12,q3,q8 @ twisted H
vext.8 q12, q12, q12, #8
vst1.64 {q12},[r0]! @ store Htable[0]

@ calculate H^2
@ calculate H^2
vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing
.byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12
.byte 0xa9,0x0e,0xa9,0xf2 @ pmull2 q0,q12,q12
veor q8,q8,q12
.byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12
.byte 0xa8,0x4e,0xa8,0xf2 @ pmull q2,q12,q12
.byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8

vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
Expand All @@ -56,12 +57,13 @@ _gcm_init_v8:
vext.8 q10,q0,q0,#8 @ 2nd phase
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q14,q0,q10
veor q9,q0,q10

vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
vext.8 q14,q9,q9,#8 @ Karatsuba pre-processing
veor q9,q9,q14
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
vst1.64 {q13,q14},[r0]! @ store Htable[1..2]
vst1.64 {q13},[r0]! @ store Htable[1..2]
vst1.64 {q14},[r0]! @ store Htable[1..2]
bx lr

.globl _gcm_gmult_v8
Expand All @@ -75,6 +77,7 @@ _gcm_gmult_v8:
vld1.64 {q9},[r0] @ load Xi
vmov.i8 q11,#0xe1
vld1.64 {q12,q13},[r1] @ load twisted H, ...
vext.8 q12,q12,q12,#8
vshl.u64 q11,q11,#57
#ifndef __ARMEB__
vrev64.8 q9,q9
Expand Down Expand Up @@ -135,8 +138,10 @@ _gcm_ghash_v8:
@ loaded twice, but last
@ copy is not processed
vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2
vext.8 q12,q12,q12,#8
vmov.i8 q11,#0xe1
vld1.64 {q14},[r1]
vext.8 q14,q14,q14,#8
moveq r12,#0 @ is it time to zero r12?
vext.8 q0,q0,q0,#8 @ rotate Xi
vld1.64 {q8},[r2]! @ load [rotated] I[0]
Expand Down
Loading

0 comments on commit 0c86c7d

Please sign in to comment.