Skip to content

Commit

Permalink
aes-gcm-v8: Rename labels to reflect block-alignment of input
Browse files Browse the repository at this point in the history
  • Loading branch information
hanno-becker committed Jul 8, 2024
1 parent 2fac8fc commit 5d53371
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 64 deletions.
32 changes: 16 additions & 16 deletions crypto/fipsmodule/modes/asm/aesv8-gcm-armv8.pl
Original file line number Diff line number Diff line change
Expand Up @@ -777,22 +777,22 @@
fmov $ctr_t0d, $input_l0 // AES block 4k+4 - mov low
fmov $ctr_t0.d[1], $input_h0 // AES block 4k+4 - mov high
eor $res1b, $ctr_t0b, $ctr0b // AES block 4k+4 - result
b.gt .Lenc_blocks_more_than_3
b.gt .Lenc_blocks_4_remaining
cmp $main_end_input_ptr, #32
mov $ctr3b, $ctr2b
movi $acc_l.8b, #0
movi $acc_h.8b, #0
sub $rctr32w, $rctr32w, #1
mov $ctr2b, $ctr1b
movi $acc_m.8b, #0
b.gt .Lenc_blocks_more_than_2
b.gt .Lenc_blocks_3_remaining
mov $ctr3b, $ctr1b
sub $rctr32w, $rctr32w, #1
cmp $main_end_input_ptr, #16
b.gt .Lenc_blocks_more_than_1
b.gt .Lenc_blocks_2_remaining
sub $rctr32w, $rctr32w, #1
b .Lenc_blocks_less_than_1
.Lenc_blocks_more_than_3: // blocks left > 3
b .Lenc_blocks_1_remaining
.Lenc_blocks_4_remaining: // blocks left = 4
st1 { $res1b}, [$output_ptr], #16 // AES final-3 block - store result
ldp $input_l0, $input_h0, [$input_ptr], #16 // AES final-2 block - load input low & high
rev64 $res0b, $res1b // GHASH final-3 block
Expand All @@ -809,7 +809,7 @@
pmull2 $acc_h.1q, $res0.2d, $h4.2d // GHASH final-3 block - high
pmull $acc_m.1q, $rk4v.1d, $acc_m.1d // GHASH final-3 block - mid
eor $res1b, $res1b, $ctr1b // AES final-2 block - result
.Lenc_blocks_more_than_2: // blocks left > 2
.Lenc_blocks_3_remaining: // blocks left = 3
st1 { $res1b}, [$output_ptr], #16 // AES final-2 block - store result
ldp $input_l0, $input_h0, [$input_ptr], #16 // AES final-1 block - load input low & high
rev64 $res0b, $res1b // GHASH final-2 block
Expand All @@ -828,7 +828,7 @@
pmull $rk4v.1q, $rk4v.1d, $h34k.1d // GHASH final-2 block - mid
eor $acc_lb, $acc_lb, $rk3 // GHASH final-2 block - low
eor $acc_mb, $acc_mb, $rk4v.16b // GHASH final-2 block - mid
.Lenc_blocks_more_than_1: // blocks left > 1
.Lenc_blocks_2_remaining: // blocks left = 2
st1 { $res1b}, [$output_ptr], #16 // AES final-1 block - store result
rev64 $res0b, $res1b // GHASH final-1 block
ldp $input_l0, $input_h0, [$input_ptr], #16 // AES final block - load input low & high
Expand All @@ -848,7 +848,7 @@
eor $res1b, $res1b, $ctr3b // AES final block - result
eor $acc_mb, $acc_mb, $rk4v.16b // GHASH final-1 block - mid
eor $acc_lb, $acc_lb, $rk3 // GHASH final-1 block - low
.Lenc_blocks_less_than_1: // blocks left <= 1
.Lenc_blocks_1_remaining: // blocks_left = 1
rev64 $res0b, $res1b // GHASH final block
eor $res0b, $res0b, $t0.16b // feed in partial tag
pmull2 $rk2q1, $res0.2d, $h1.2d // GHASH final block - high
Expand Down Expand Up @@ -1390,22 +1390,22 @@
cmp $main_end_input_ptr, #48
eor $output_l0, $output_l0, $rkN_l // AES block 4k+4 - round N low
eor $output_h0, $output_h0, $rkN_h // AES block 4k+4 - round N high
b.gt .Ldec_blocks_more_than_3
b.gt .Ldec_blocks_4_remaining
sub $rctr32w, $rctr32w, #1
mov $ctr3b, $ctr2b
movi $acc_m.8b, #0
movi $acc_l.8b, #0
cmp $main_end_input_ptr, #32
movi $acc_h.8b, #0
mov $ctr2b, $ctr1b
b.gt .Ldec_blocks_more_than_2
b.gt .Ldec_blocks_3_remaining
sub $rctr32w, $rctr32w, #1
mov $ctr3b, $ctr1b
cmp $main_end_input_ptr, #16
b.gt .Ldec_blocks_more_than_1
b.gt .Ldec_blocks_2_remaining
sub $rctr32w, $rctr32w, #1
b .Ldec_blocks_less_than_1
.Ldec_blocks_more_than_3: // blocks left > 3
b .Ldec_blocks_1_remaining
.Ldec_blocks_4_remaining: // blocks left = 4
rev64 $res0b, $res1b // GHASH final-3 block
ld1 { $res1b}, [$input_ptr], #16 // AES final-2 block - load ciphertext
stp $output_l0, $output_h0, [$output_ptr], #16 // AES final-3 block - store result
Expand All @@ -1422,7 +1422,7 @@
eor $output_l0, $output_l0, $rkN_l // AES final-2 block - round N low
pmull $acc_l.1q, $res0.1d, $h4.1d // GHASH final-3 block - low
eor $output_h0, $output_h0, $rkN_h // AES final-2 block - round N high
.Ldec_blocks_more_than_2: // blocks left > 2
.Ldec_blocks_3_remaining: // blocks left = 3
rev64 $res0b, $res1b // GHASH final-2 block
ld1 { $res1b}, [$input_ptr], #16 // AES final-1 block - load ciphertext
eor $res0b, $res0b, $t0.16b // feed in partial tag
Expand All @@ -1441,7 +1441,7 @@
eor $output_l0, $output_l0, $rkN_l // AES final-1 block - round N low
eor $acc_mb, $acc_mb, $rk4v.16b // GHASH final-2 block - mid
eor $output_h0, $output_h0, $rkN_h // AES final-1 block - round N high
.Ldec_blocks_more_than_1: // blocks left > 1
.Ldec_blocks_2_remaining: // blocks left = 2
stp $output_l0, $output_h0, [$output_ptr], #16 // AES final-1 block - store result
rev64 $res0b, $res1b // GHASH final-1 block
ld1 { $res1b}, [$input_ptr], #16 // AES final block - load ciphertext
Expand All @@ -1461,7 +1461,7 @@
eor $acc_hb, $acc_hb, $rk2 // GHASH final-1 block - high
eor $acc_mb, $acc_mb, $rk4v.16b // GHASH final-1 block - mid
eor $output_h0, $output_h0, $rkN_h // AES final block - round N high
.Ldec_blocks_less_than_1: // blocks left <= 1
.Ldec_blocks_1_remaining: // blocks_left = 1
rev $ctr32w, $rctr32w
rev64 $res0b, $res1b // GHASH final block
eor $res0b, $res0b, $t0.16b // feed in partial tag
Expand Down
32 changes: 16 additions & 16 deletions generated-src/ios-aarch64/crypto/fipsmodule/aesv8-gcm-armv8.S
Original file line number Diff line number Diff line change
Expand Up @@ -657,22 +657,22 @@ Lenc_tail: // TAIL
fmov d4, x6 // AES block 4k+4 - mov low
fmov v4.d[1], x7 // AES block 4k+4 - mov high
eor v5.16b, v4.16b, v0.16b // AES block 4k+4 - result
b.gt Lenc_blocks_more_than_3
b.gt Lenc_blocks_4_remaining
cmp x5, #32
mov v3.16b, v2.16b
movi v11.8b, #0
movi v9.8b, #0
sub w12, w12, #1
mov v2.16b, v1.16b
movi v10.8b, #0
b.gt Lenc_blocks_more_than_2
b.gt Lenc_blocks_3_remaining
mov v3.16b, v1.16b
sub w12, w12, #1
cmp x5, #16
b.gt Lenc_blocks_more_than_1
b.gt Lenc_blocks_2_remaining
sub w12, w12, #1
b Lenc_blocks_less_than_1
Lenc_blocks_more_than_3: // blocks left > 3
b Lenc_blocks_1_remaining
Lenc_blocks_4_remaining: // blocks left = 4
st1 { v5.16b}, [x2], #16 // AES final-3 block - store result
ldp x6, x7, [x0], #16 // AES final-2 block - load input low & high
rev64 v4.16b, v5.16b // GHASH final-3 block
Expand All @@ -689,7 +689,7 @@ Lenc_blocks_more_than_3: // blocks left > 3
pmull2 v9.1q, v4.2d, v15.2d // GHASH final-3 block - high
pmull v10.1q, v22.1d, v10.1d // GHASH final-3 block - mid
eor v5.16b, v5.16b, v1.16b // AES final-2 block - result
Lenc_blocks_more_than_2: // blocks left > 2
Lenc_blocks_3_remaining: // blocks left = 3
st1 { v5.16b}, [x2], #16 // AES final-2 block - store result
ldp x6, x7, [x0], #16 // AES final-1 block - load input low & high
rev64 v4.16b, v5.16b // GHASH final-2 block
Expand All @@ -708,7 +708,7 @@ Lenc_blocks_more_than_2: // blocks left > 2
pmull v22.1q, v22.1d, v17.1d // GHASH final-2 block - mid
eor v11.16b, v11.16b, v21.16b // GHASH final-2 block - low
eor v10.16b, v10.16b, v22.16b // GHASH final-2 block - mid
Lenc_blocks_more_than_1: // blocks left > 1
Lenc_blocks_2_remaining: // blocks left = 2
st1 { v5.16b}, [x2], #16 // AES final-1 block - store result
rev64 v4.16b, v5.16b // GHASH final-1 block
ldp x6, x7, [x0], #16 // AES final block - load input low & high
Expand All @@ -728,7 +728,7 @@ Lenc_blocks_more_than_1: // blocks left > 1
eor v5.16b, v5.16b, v3.16b // AES final block - result
eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid
eor v11.16b, v11.16b, v21.16b // GHASH final-1 block - low
Lenc_blocks_less_than_1: // blocks left <= 1
Lenc_blocks_1_remaining: // blocks_left = 1
rev64 v4.16b, v5.16b // GHASH final block
eor v4.16b, v4.16b, v8.16b // feed in partial tag
pmull2 v20.1q, v4.2d, v12.2d // GHASH final block - high
Expand Down Expand Up @@ -1411,22 +1411,22 @@ Ldec_tail: // TAIL
cmp x5, #48
eor x6, x6, x13 // AES block 4k+4 - round N low
eor x7, x7, x14 // AES block 4k+4 - round N high
b.gt Ldec_blocks_more_than_3
b.gt Ldec_blocks_4_remaining
sub w12, w12, #1
mov v3.16b, v2.16b
movi v10.8b, #0
movi v11.8b, #0
cmp x5, #32
movi v9.8b, #0
mov v2.16b, v1.16b
b.gt Ldec_blocks_more_than_2
b.gt Ldec_blocks_3_remaining
sub w12, w12, #1
mov v3.16b, v1.16b
cmp x5, #16
b.gt Ldec_blocks_more_than_1
b.gt Ldec_blocks_2_remaining
sub w12, w12, #1
b Ldec_blocks_less_than_1
Ldec_blocks_more_than_3: // blocks left > 3
b Ldec_blocks_1_remaining
Ldec_blocks_4_remaining: // blocks left = 4
rev64 v4.16b, v5.16b // GHASH final-3 block
ld1 { v5.16b}, [x0], #16 // AES final-2 block - load ciphertext
stp x6, x7, [x2], #16 // AES final-3 block - store result
Expand All @@ -1443,7 +1443,7 @@ Ldec_blocks_more_than_3: // blocks left > 3
eor x6, x6, x13 // AES final-2 block - round N low
pmull v11.1q, v4.1d, v15.1d // GHASH final-3 block - low
eor x7, x7, x14 // AES final-2 block - round N high
Ldec_blocks_more_than_2: // blocks left > 2
Ldec_blocks_3_remaining: // blocks left = 3
rev64 v4.16b, v5.16b // GHASH final-2 block
ld1 { v5.16b}, [x0], #16 // AES final-1 block - load ciphertext
eor v4.16b, v4.16b, v8.16b // feed in partial tag
Expand All @@ -1462,7 +1462,7 @@ Ldec_blocks_more_than_2: // blocks left > 2
eor x6, x6, x13 // AES final-1 block - round N low
eor v10.16b, v10.16b, v22.16b // GHASH final-2 block - mid
eor x7, x7, x14 // AES final-1 block - round N high
Ldec_blocks_more_than_1: // blocks left > 1
Ldec_blocks_2_remaining: // blocks left = 2
stp x6, x7, [x2], #16 // AES final-1 block - store result
rev64 v4.16b, v5.16b // GHASH final-1 block
ld1 { v5.16b}, [x0], #16 // AES final block - load ciphertext
Expand All @@ -1482,7 +1482,7 @@ Ldec_blocks_more_than_1: // blocks left > 1
eor v9.16b, v9.16b, v20.16b // GHASH final-1 block - high
eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid
eor x7, x7, x14 // AES final block - round N high
Ldec_blocks_less_than_1: // blocks left <= 1
Ldec_blocks_1_remaining: // blocks_left = 1
rev w9, w12
rev64 v4.16b, v5.16b // GHASH final block
eor v4.16b, v4.16b, v8.16b // feed in partial tag
Expand Down
32 changes: 16 additions & 16 deletions generated-src/linux-aarch64/crypto/fipsmodule/aesv8-gcm-armv8.S
Original file line number Diff line number Diff line change
Expand Up @@ -657,22 +657,22 @@ aes_gcm_enc_kernel:
fmov d4, x6 // AES block 4k+4 - mov low
fmov v4.d[1], x7 // AES block 4k+4 - mov high
eor v5.16b, v4.16b, v0.16b // AES block 4k+4 - result
b.gt .Lenc_blocks_more_than_3
b.gt .Lenc_blocks_4_remaining
cmp x5, #32
mov v3.16b, v2.16b
movi v11.8b, #0
movi v9.8b, #0
sub w12, w12, #1
mov v2.16b, v1.16b
movi v10.8b, #0
b.gt .Lenc_blocks_more_than_2
b.gt .Lenc_blocks_3_remaining
mov v3.16b, v1.16b
sub w12, w12, #1
cmp x5, #16
b.gt .Lenc_blocks_more_than_1
b.gt .Lenc_blocks_2_remaining
sub w12, w12, #1
b .Lenc_blocks_less_than_1
.Lenc_blocks_more_than_3: // blocks left > 3
b .Lenc_blocks_1_remaining
.Lenc_blocks_4_remaining: // blocks left = 4
st1 { v5.16b}, [x2], #16 // AES final-3 block - store result
ldp x6, x7, [x0], #16 // AES final-2 block - load input low & high
rev64 v4.16b, v5.16b // GHASH final-3 block
Expand All @@ -689,7 +689,7 @@ aes_gcm_enc_kernel:
pmull2 v9.1q, v4.2d, v15.2d // GHASH final-3 block - high
pmull v10.1q, v22.1d, v10.1d // GHASH final-3 block - mid
eor v5.16b, v5.16b, v1.16b // AES final-2 block - result
.Lenc_blocks_more_than_2: // blocks left > 2
.Lenc_blocks_3_remaining: // blocks left = 3
st1 { v5.16b}, [x2], #16 // AES final-2 block - store result
ldp x6, x7, [x0], #16 // AES final-1 block - load input low & high
rev64 v4.16b, v5.16b // GHASH final-2 block
Expand All @@ -708,7 +708,7 @@ aes_gcm_enc_kernel:
pmull v22.1q, v22.1d, v17.1d // GHASH final-2 block - mid
eor v11.16b, v11.16b, v21.16b // GHASH final-2 block - low
eor v10.16b, v10.16b, v22.16b // GHASH final-2 block - mid
.Lenc_blocks_more_than_1: // blocks left > 1
.Lenc_blocks_2_remaining: // blocks left = 2
st1 { v5.16b}, [x2], #16 // AES final-1 block - store result
rev64 v4.16b, v5.16b // GHASH final-1 block
ldp x6, x7, [x0], #16 // AES final block - load input low & high
Expand All @@ -728,7 +728,7 @@ aes_gcm_enc_kernel:
eor v5.16b, v5.16b, v3.16b // AES final block - result
eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid
eor v11.16b, v11.16b, v21.16b // GHASH final-1 block - low
.Lenc_blocks_less_than_1: // blocks left <= 1
.Lenc_blocks_1_remaining: // blocks_left = 1
rev64 v4.16b, v5.16b // GHASH final block
eor v4.16b, v4.16b, v8.16b // feed in partial tag
pmull2 v20.1q, v4.2d, v12.2d // GHASH final block - high
Expand Down Expand Up @@ -1411,22 +1411,22 @@ aes_gcm_dec_kernel:
cmp x5, #48
eor x6, x6, x13 // AES block 4k+4 - round N low
eor x7, x7, x14 // AES block 4k+4 - round N high
b.gt .Ldec_blocks_more_than_3
b.gt .Ldec_blocks_4_remaining
sub w12, w12, #1
mov v3.16b, v2.16b
movi v10.8b, #0
movi v11.8b, #0
cmp x5, #32
movi v9.8b, #0
mov v2.16b, v1.16b
b.gt .Ldec_blocks_more_than_2
b.gt .Ldec_blocks_3_remaining
sub w12, w12, #1
mov v3.16b, v1.16b
cmp x5, #16
b.gt .Ldec_blocks_more_than_1
b.gt .Ldec_blocks_2_remaining
sub w12, w12, #1
b .Ldec_blocks_less_than_1
.Ldec_blocks_more_than_3: // blocks left > 3
b .Ldec_blocks_1_remaining
.Ldec_blocks_4_remaining: // blocks left = 4
rev64 v4.16b, v5.16b // GHASH final-3 block
ld1 { v5.16b}, [x0], #16 // AES final-2 block - load ciphertext
stp x6, x7, [x2], #16 // AES final-3 block - store result
Expand All @@ -1443,7 +1443,7 @@ aes_gcm_dec_kernel:
eor x6, x6, x13 // AES final-2 block - round N low
pmull v11.1q, v4.1d, v15.1d // GHASH final-3 block - low
eor x7, x7, x14 // AES final-2 block - round N high
.Ldec_blocks_more_than_2: // blocks left > 2
.Ldec_blocks_3_remaining: // blocks left = 3
rev64 v4.16b, v5.16b // GHASH final-2 block
ld1 { v5.16b}, [x0], #16 // AES final-1 block - load ciphertext
eor v4.16b, v4.16b, v8.16b // feed in partial tag
Expand All @@ -1462,7 +1462,7 @@ aes_gcm_dec_kernel:
eor x6, x6, x13 // AES final-1 block - round N low
eor v10.16b, v10.16b, v22.16b // GHASH final-2 block - mid
eor x7, x7, x14 // AES final-1 block - round N high
.Ldec_blocks_more_than_1: // blocks left > 1
.Ldec_blocks_2_remaining: // blocks left = 2
stp x6, x7, [x2], #16 // AES final-1 block - store result
rev64 v4.16b, v5.16b // GHASH final-1 block
ld1 { v5.16b}, [x0], #16 // AES final block - load ciphertext
Expand All @@ -1482,7 +1482,7 @@ aes_gcm_dec_kernel:
eor v9.16b, v9.16b, v20.16b // GHASH final-1 block - high
eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid
eor x7, x7, x14 // AES final block - round N high
.Ldec_blocks_less_than_1: // blocks left <= 1
.Ldec_blocks_1_remaining: // blocks_left = 1
rev w9, w12
rev64 v4.16b, v5.16b // GHASH final block
eor v4.16b, v4.16b, v8.16b // feed in partial tag
Expand Down
Loading

0 comments on commit 5d53371

Please sign in to comment.