Skip to content

Commit

Permalink
Add clean AES-GCM-128 decrypt variants
Browse files Browse the repository at this point in the history
  • Loading branch information
hanno-becker committed Jul 9, 2024
1 parent 81ff142 commit 581dae5
Show file tree
Hide file tree
Showing 96 changed files with 7,267 additions and 419 deletions.
9 changes: 6 additions & 3 deletions crypto/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,12 @@ if(ARCH STREQUAL "aarch64")
test/trampoline-armv8.${ASM_EXT}
cipher_extra/chacha20_poly1305_armv8.${ASM_EXT}

fipsmodule/modes/asm/aesv8-gcm-armv8-slothy-128.S
fipsmodule/modes/asm/aesv8-gcm-armv8-slothy-192.S
fipsmodule/modes/asm/aesv8-gcm-armv8-slothy-256.S
fipsmodule/modes/asm/aesv8-gcm-armv8-enc-slothy-128.S
fipsmodule/modes/asm/aesv8-gcm-armv8-enc-slothy-192.S
fipsmodule/modes/asm/aesv8-gcm-armv8-enc-slothy-256.S
fipsmodule/modes/asm/aesv8-gcm-armv8-dec-slothy-128.S
fipsmodule/modes/asm/aesv8-gcm-armv8-dec-slothy-192.S
fipsmodule/modes/asm/aesv8-gcm-armv8-dec-slothy-256.S
)
endif()

Expand Down
828 changes: 828 additions & 0 deletions crypto/fipsmodule/modes/asm/aesv8-gcm-armv8-dec-slothy-128.S

Large diffs are not rendered by default.

828 changes: 828 additions & 0 deletions crypto/fipsmodule/modes/asm/aesv8-gcm-armv8-dec-slothy-192.S

Large diffs are not rendered by default.

828 changes: 828 additions & 0 deletions crypto/fipsmodule/modes/asm/aesv8-gcm-armv8-dec-slothy-256.S

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,17 @@
#include <openssl/arm_arch.h>
.arch armv8-a+crypto
.text
.globl aes_gcm_enc_kernel_slothy_base_192
.hidden aes_gcm_enc_kernel_slothy_base_192
.type aes_gcm_enc_kernel_slothy_base_192,%function
.globl aes_gcm_dec_kernel_slothy_base_128
.hidden aes_gcm_dec_kernel_slothy_base_128
.type aes_gcm_dec_kernel_slothy_base_128,%function
#elif defined(__APPLE__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.globl _aes_gcm_enc_kernel_slothy_base_192
.private_extern _aes_gcm_enc_kernel_slothy_base_192
.globl _aes_gcm_dec_kernel_slothy_base_128
.private_extern _aes_gcm_dec_kernel_slothy_base_128
#else
#error Unknown configuration
#endif
Expand All @@ -74,17 +74,11 @@ full_blocks .req x7
remainder .req x9
unroll .req x10

aes_st0 .req v0
aes_st0_q .req q0
aes_st .req v0
aes_st_q .req q0

aes_st1 .req v2
aes_st1_q .req q2

res0 .req v0
res0_q .req q0

res1 .req v2
res1_q .req q2
res .req v0
res_q .req q0

ghash_hi .req v9
ghash_lo .req v8
Expand Down Expand Up @@ -144,9 +138,6 @@ rk8q .req q26
rk9q .req q27
rk10q .req q28

rk11q .req q15
rk12q .req q16

rk0 .req v18
rk1 .req v19
rk2 .req v20
Expand All @@ -159,9 +150,6 @@ rk8 .req v26
rk9 .req v27
rk10 .req v28

rk11 .req v15
rk12 .req v16

plain .req v29
plain_q .req q29

Expand Down Expand Up @@ -217,10 +205,14 @@ tag_q .req q11
ldp x29, x30, [sp, #(STACK_BASE_GPRS + 16*5)]
.endm

// Derive number of iterations of unrolled loop and single-block loop
.macro prepare_loop_counts
mov unroll, #UNROLL
// Number of AES Blocks (16b each)
lsr full_blocks, byte_len, #4
// Number of iterations of the unrolled loop
udiv count, full_blocks, unroll
// Number of iterations for the tail loop handling 1 block each
msub remainder, count, unroll, full_blocks
.endm

Expand All @@ -231,17 +223,20 @@ tag_q .req q11
.macro load_iv
ldr rtmp_ctr_q, [ivec]

mov constant_temp, #0x100000000 // set up counter increment
// set up counter increment
mov constant_temp, #0x100000000
movi rctr_inc.16b, #0x0
fmov rctr_inc.d[1], constant_temp

rev32 rtmp_ctr.16b, rtmp_ctr.16b
.endm

// Increase AES counter
.macro aes_ctr_inc
add rtmp_ctr.4s, rtmp_ctr.4s, rctr_inc.4s
.endm

// Increase AES counter and initialize new AES state
.macro next_ctr_init_aes aes_st
rev32 \aes_st\().16b, rtmp_ctr.16b
aes_ctr_inc
Expand Down Expand Up @@ -283,14 +278,13 @@ tag_q .req q11
.endm

.macro aesr_final aes_st, plain, out
aese \aes_st\().16b, rk11.16b
eor3 \out\().16b, \plain\().16b, rk12.16b, \aes_st\().16b
aese \aes_st\().16b, rk9.16b
eor3 \out\().16b, \aes_st\().16b, rk10.16b, \plain\().16b
.endm

.macro aes_full_block aes_st, input, output
next_ctr_init_aes \aes_st
aesr_0_8 \aes_st\(), rk
aesr_9_10 \aes_st\(), rk
aesr_0_8 \aes_st\(), rk
aesr_final \aes_st, \input, \output
.endm

Expand All @@ -310,8 +304,6 @@ tag_q .req q11
load_round_key 8
load_round_key 9
load_round_key 10
load_round_key 11
load_round_key 12
.endm

/********************************************************************/
Expand Down Expand Up @@ -408,52 +400,6 @@ tag_q .req q11
/* Macros for GHASH udpate */
/********************************************************************/

.macro ghash_init_pair inputa, inputb, Ha, Hb, Hk_mid
rev64 \inputa\().16b, \inputa\().16b
rev64 \inputb\().16b, \inputb\().16b
eor \inputa\().16b, \inputa\().16b, tag.16b

// Low product
pmull ghash_lo.1q, \inputa\().1d, \Ha\().1d
pmull ghash_tmp.1q, \inputb\().1d, \Hb\().1d
eor ghash_lo.16b, ghash_lo.16b, ghash_tmp.16b
// High product
pmull2 ghash_hi.1q, \inputa\().2d, \Ha\().2d
pmull2 ghash_tmp.1q, \inputb\().2d, \Hb\().2d
eor ghash_hi.16b, ghash_hi.16b, ghash_tmp.16b
// Middle product
trn1 ghash_tmp.2d, \inputb\().2d, \inputa\().2d
trn2 \inputb\().2d, \inputb\().2d, \inputa\().2d
eor ghash_tmp.16b, ghash_tmp.16b, \inputb\().16b
pmull2 ghash_mid.1q, ghash_tmp.2d, \Hk_mid\().2d
pmull ghash_tmp.1q, ghash_tmp.1d, \Hk_mid\().1d
eor ghash_mid.16b, ghash_mid.16b, ghash_tmp.16b
.endm

.macro ghash_acc_pair inputa, inputb, Ha, Hb, Hk_mid
rev64 \inputa\().16b, \inputa\().16b
rev64 \inputb\().16b, \inputb\().16b

// Low product
pmull ghash_tmp.1q, \inputa\().1d, \Ha\().1d
eor ghash_lo.16b, ghash_lo.16b, ghash_tmp.16b
pmull ghash_tmp.1q, \inputb\().1d, \Hb\().1d
eor ghash_lo.16b, ghash_lo.16b, ghash_tmp.16b
// High product
pmull2 ghash_tmp.1q, \inputa\().2d, \Ha\().2d
eor ghash_hi.16b, ghash_hi.16b, ghash_tmp.16b
pmull2 ghash_tmp.1q, \inputb\().2d, \Hb\().2d
eor ghash_hi.16b, ghash_hi.16b, ghash_tmp.16b
// Middle product
trn1 ghash_tmp.2d, \inputb\().2d, \inputa\().2d
trn2 \inputb\().2d, \inputb\().2d, \inputa\().2d
eor ghash_tmp.16b, ghash_tmp.16b, \inputb\().16b
pmull2 \inputa\().1q, ghash_tmp.2d, \Hk_mid\().2d
eor ghash_mid.16b, ghash_mid.16b, \inputa\().16b
pmull ghash_tmp.1q, ghash_tmp.1d, \Hk_mid\().1d
eor ghash_mid.16b, ghash_mid.16b, ghash_tmp.16b
.endm

.macro ghash_init_0 input, Hk, Hk_mid, tag
rev64 \input\().16b, \input\().16b
eor \input\().16b, \input\().16b, \tag\().16b
Expand Down Expand Up @@ -541,19 +487,19 @@ tag_q .req q11
/********************************************************************/

.align 4
_aes_gcm_enc_kernel_slothy_base_192:
aes_gcm_enc_kernel_slothy_base_192:
_aes_gcm_dec_kernel_slothy_base_128:
aes_gcm_dec_kernel_slothy_base_128:
#ifdef BORINGSSL_DISPATCH_TEST
adrp x9,_BORINGSSL_function_hit@PAGE
add x9, x9, _BORINGSSL_function_hit@PAGEOFF
mov w10, #1
strb w10, [x9,#2] // kFlag_aes_gcm_enc_kernel
strb w10, [x9,#2] // kFlag_aes_gcm_dec_kernel
#endif

AARCH64_SIGN_LINK_REGISTER
AARCH64_VALID_CALL_TARGET
sub sp, sp, #STACK_SIZE

Lenc_preamble_start:
Ldec_preamble_start:
save_gprs
save_vregs

Expand All @@ -566,33 +512,37 @@ Lenc_preamble_start:
prepare_loop_counts
prepare_ghash

Lenc_preamble_end:
Ldec_preamble_end:

cbz count, Lloop_unrolled_end
Lloop_unrolled_start:


ldr plain_q, [input], #(4*16)
aes_full_block aes_st0, plain, res0
str res0_q, [output], #(4*16)
aes_full_block aes_st, plain, res
str res_q, [output], #(4*16)

load_htable_34
ghash_init_1 plain, Ht4, Ht34, tag

ldr plain_q, [input, #(-3*16)]
aes_full_block aes_st1, plain, res1
str res1_q, [output, #(-3*16)]
aes_full_block aes_st, plain, res
str res_q, [output, #(-3*16)]

load_htable_34
ghash_init_pair res0, res1, Ht4, Ht3, Ht34
ghash_acc_0 plain, Ht3, Ht34

ldr plain_q, [input, #(-2*16)]
aes_full_block aes_st0, plain, res0
str res0_q, [output, #(-2*16)]

ldr plain_q, [input, #(-1*16)]
aes_full_block aes_st1, plain, res1
str res1_q, [output, #(-1*16)]
aes_full_block aes_st, plain, res
str res_q, [output, #(-2*16)]

load_htable_12
ghash_acc_pair res0, res1, Ht2, Ht1, Ht12
ghash_acc_1 plain, Ht2, Ht12

ldr plain_q, [input, #(-1*16)]
aes_full_block aes_st, plain, res
str res_q, [output, #(-1*16)]

ghash_acc_0 plain, Ht1, Ht12
ghash_finalize tag

sub count, count, #1
Expand All @@ -605,10 +555,10 @@ Lloop_unrolled_end:
Lloop_1x_start:

ldr plain_q, [input], #16
aes_full_block aes_st0, plain, res0
str res0_q, [output], #16
ghash_init_0 res0, Ht1, Ht12, tag
aes_full_block aes_st, plain, res
str res_q, [output], #16

ghash_init_0 plain, Ht1, Ht12, tag
ghash_finalize tag

sub remainder, remainder, #1
Expand All @@ -627,10 +577,9 @@ Lloop_1x_end:
restore_vregs
restore_gprs

Lenc_postamble_end:
Ldec_postamble_end:
add sp, sp, #STACK_SIZE

AARCH64_VALIDATE_LINK_REGISTER
ret

#endif
Expand Down
Loading

0 comments on commit 581dae5

Please sign in to comment.