Skip to content

Commit

Permalink
Arm64: Minor optimization in AESKEYGENASSIST
Browse files Browse the repository at this point in the history
The less number of FPR<->GPR movement instructions the better.
This removes one instance of `ins` and replaces the other with a 64-bit
`dup` instead.
The LoadConstant still turns in to a single `movz` instruction with the
shift.
  • Loading branch information
Sonicadvance1 committed Dec 3, 2022
1 parent 863a59a commit 5e46d63
Showing 1 changed file with 7 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ DEF_OP(AESImc) {
}

DEF_OP(AESEnc) {
auto Op = IROp->C<IR::IROp_VAESEnc>();
auto Op = IROp->C<IR::IROp_VAESEnc>();
eor(VTMP2.V16B(), VTMP2.V16B(), VTMP2.V16B());
mov(VTMP1.V16B(), GetSrc(Op->State.ID()).V16B());
aese(VTMP1.V16B(), VTMP2.V16B());
Expand All @@ -27,15 +27,15 @@ DEF_OP(AESEnc) {
}

DEF_OP(AESEncLast) {
auto Op = IROp->C<IR::IROp_VAESEncLast>();
auto Op = IROp->C<IR::IROp_VAESEncLast>();
eor(VTMP2.V16B(), VTMP2.V16B(), VTMP2.V16B());
mov(VTMP1.V16B(), GetSrc(Op->State.ID()).V16B());
aese(VTMP1.V16B(), VTMP2.V16B());
eor(GetDst(Node).V16B(), VTMP1.V16B(), GetSrc(Op->Key.ID()).V16B());
}

DEF_OP(AESDec) {
auto Op = IROp->C<IR::IROp_VAESDec>();
auto Op = IROp->C<IR::IROp_VAESDec>();
eor(VTMP2.V16B(), VTMP2.V16B(), VTMP2.V16B());
mov(VTMP1.V16B(), GetSrc(Op->State.ID()).V16B());
aesd(VTMP1.V16B(), VTMP2.V16B());
Expand All @@ -44,15 +44,15 @@ DEF_OP(AESDec) {
}

DEF_OP(AESDecLast) {
auto Op = IROp->C<IR::IROp_VAESDecLast>();
auto Op = IROp->C<IR::IROp_VAESDecLast>();
eor(VTMP2.V16B(), VTMP2.V16B(), VTMP2.V16B());
mov(VTMP1.V16B(), GetSrc(Op->State.ID()).V16B());
aesd(VTMP1.V16B(), VTMP2.V16B());
eor(GetDst(Node).V16B(), VTMP1.V16B(), GetSrc(Op->Key.ID()).V16B());
}

DEF_OP(AESKeyGenAssist) {
auto Op = IROp->C<IR::IROp_VAESKeyGenAssist>();
auto Op = IROp->C<IR::IROp_VAESKeyGenAssist>();

aarch64::Literal ConstantLiteral (0x0C030609'0306090CULL, 0x040B0E01'0B0E0104ULL);
aarch64::Label PastConstant;
Expand All @@ -69,9 +69,8 @@ DEF_OP(AESKeyGenAssist) {
if (Op->RCON) {
tbl(VTMP1.V16B(), VTMP1.V16B(), VTMP3.V16B());

LoadConstant(TMP1.W(), Op->RCON);
ins(VTMP2.V4S(), 1, TMP1.W());
ins(VTMP2.V4S(), 3, TMP1.W());
LoadConstant(TMP1, static_cast<uint64_t>(Op->RCON) << 32);
dup(VTMP2.V2D(), TMP1);
eor(GetDst(Node).V16B(), VTMP1.V16B(), VTMP2.V16B());
}
else {
Expand Down

0 comments on commit 5e46d63

Please sign in to comment.