Skip to content

Commit

Permalink
Merge pull request #3760 from alyssarosenzweig/avx/vpclmulqdql
Browse files Browse the repository at this point in the history
AVX128: fix VPCLMULQDQl
  • Loading branch information
Sonicadvance1 authored Jun 25, 2024
2 parents 77aaa9a + 41923ba commit 5da205d
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 14 deletions.
14 changes: 3 additions & 11 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2260,19 +2260,11 @@ void OpDispatchBuilder::AVX128_VPERMD(OpcodeArgs) {
}

void OpDispatchBuilder::AVX128_VPCLMULQDQ(OpcodeArgs) {
const auto Size = GetDstSize(Op);
const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;
const auto Selector = static_cast<uint8_t>(Op->Src[2].Literal());

auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit);

RefPair Result {};
Result.Low = _PCLMUL(OpSize::i128Bit, Src1.Low, Src2.Low, Selector);
if (!Is128Bit) {
Result.High = _PCLMUL(OpSize::i128Bit, Src1.High, Src2.High, Selector);
}
AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), 0, [this, Selector](size_t _, Ref Src1, Ref Src2) {
return _PCLMUL(OpSize::i128Bit, Src1, Src2, Selector & 0b1'0001);
});
}

} // namespace FEXCore::IR
4 changes: 2 additions & 2 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ void OpDispatchBuilder::PCLMULQDQOp(OpcodeArgs) {
Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
const auto Selector = static_cast<uint8_t>(Op->Src[1].Literal());

auto Res = _PCLMUL(16, Dest, Src, Selector);
auto Res = _PCLMUL(16, Dest, Src, Selector & 0b1'0001);
StoreResult(FPRClass, Op, Res, -1);
}

Expand All @@ -420,7 +420,7 @@ void OpDispatchBuilder::VPCLMULQDQOp(OpcodeArgs) {
Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags);
const auto Selector = static_cast<uint8_t>(Op->Src[2].Literal());

Ref Res = _PCLMUL(DstSize, Src1, Src2, Selector);
Ref Res = _PCLMUL(DstSize, Src1, Src2, Selector & 0b1'0001);
StoreResult(FPRClass, Op, Res, -1);
}

Expand Down
8 changes: 7 additions & 1 deletion unittests/ASM/VEX/vpclmulqdq_256.asm
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
"XMM3": ["0x1E2017C5BEE29400", "0x38358E40CC367C7A", "0x4b4b4b4b4b4b4b4b", "0x4b4b4b4b4b4b4b4b"],
"XMM4": ["0xE208147952DE57A0", "0x317D360F86C80DC9", "0x4646464646464646", "0x4646464646464646"],
"XMM5": ["0xBBA54C87DA872B40", "0x6495428B7641EBE6", "0x4444444444444444", "0x4444444444444444"],
"XMM6": ["0x170B5A1B5CDD42EA", "0x719F094BB2358CA1", "0x4848484848484848", "0x4848484848484848"]
"XMM6": ["0x170B5A1B5CDD42EA", "0x719F094BB2358CA1", "0x4848484848484848", "0x4848484848484848"],
"XMM7": ["0x1e2017c5bee29400", "0x38358e40cc367c7a", "0", "0"]
}
}
%endif
Expand All @@ -23,6 +24,7 @@ vmovaps ymm3, [rdx + 32 * 0]
vmovaps ymm4, [rdx + 32 * 0]
vmovaps ymm5, [rdx + 32 * 0]
vmovaps ymm6, [rdx + 32 * 0]
vmovaps ymm7, [rdx + 32 * 0]

; With imm = 0b00000000
vpclmulqdq ymm3, ymm1, ymm2, 0
Expand All @@ -36,6 +38,10 @@ vpclmulqdq ymm5, ymm1, ymm2, 16
; With imm = 0b00010001
vpclmulqdq ymm6, ymm1, ymm2, 17

; Test zero-extension
; Also test a wacky immediate.
vpclmulqdq xmm7, xmm1, xmm2, 11101110b

hlt

align 32
Expand Down

0 comments on commit 5da205d

Please sign in to comment.