diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp index 2d38fb6a45..239a42f30f 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp @@ -2260,19 +2260,11 @@ void OpDispatchBuilder::AVX128_VPERMD(OpcodeArgs) { } void OpDispatchBuilder::AVX128_VPCLMULQDQ(OpcodeArgs) { - const auto Size = GetDstSize(Op); - const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE; const auto Selector = static_cast(Op->Src[2].Literal()); - auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit); - auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit); - - RefPair Result {}; - Result.Low = _PCLMUL(OpSize::i128Bit, Src1.Low, Src2.Low, Selector); - if (!Is128Bit) { - Result.High = _PCLMUL(OpSize::i128Bit, Src1.High, Src2.High, Selector); - } - AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result); + AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), 0, [this, Selector](size_t _, Ref Src1, Ref Src2) { + return _PCLMUL(OpSize::i128Bit, Src1, Src2, Selector & 0b1'0001); + }); } } // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp index ab1a865de5..31529c18e4 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp @@ -409,7 +409,7 @@ void OpDispatchBuilder::PCLMULQDQOp(OpcodeArgs) { Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); const auto Selector = static_cast(Op->Src[1].Literal()); - auto Res = _PCLMUL(16, Dest, Src, Selector); + auto Res = _PCLMUL(16, Dest, Src, Selector & 0b1'0001); StoreResult(FPRClass, Op, Res, -1); } @@ -420,7 +420,7 @@ void OpDispatchBuilder::VPCLMULQDQOp(OpcodeArgs) { Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); const auto Selector = static_cast(Op->Src[2].Literal()); - Ref Res = _PCLMUL(DstSize, Src1, Src2, Selector); + Ref Res = _PCLMUL(DstSize, Src1, Src2, Selector & 0b1'0001); StoreResult(FPRClass, Op, Res, -1); } diff --git a/unittests/ASM/VEX/vpclmulqdq_256.asm b/unittests/ASM/VEX/vpclmulqdq_256.asm index a2b5627cec..a2f67861ee 100644 --- a/unittests/ASM/VEX/vpclmulqdq_256.asm +++ b/unittests/ASM/VEX/vpclmulqdq_256.asm @@ -7,7 +7,8 @@ "XMM3": ["0x1E2017C5BEE29400", "0x38358E40CC367C7A", "0x4b4b4b4b4b4b4b4b", "0x4b4b4b4b4b4b4b4b"], "XMM4": ["0xE208147952DE57A0", "0x317D360F86C80DC9", "0x4646464646464646", "0x4646464646464646"], "XMM5": ["0xBBA54C87DA872B40", "0x6495428B7641EBE6", "0x4444444444444444", "0x4444444444444444"], - "XMM6": ["0x170B5A1B5CDD42EA", "0x719F094BB2358CA1", "0x4848484848484848", "0x4848484848484848"] + "XMM6": ["0x170B5A1B5CDD42EA", "0x719F094BB2358CA1", "0x4848484848484848", "0x4848484848484848"], + "XMM7": ["0x1e2017c5bee29400", "0x38358e40cc367c7a", "0", "0"] } } %endif @@ -23,6 +24,7 @@ vmovaps ymm3, [rdx + 32 * 0] vmovaps ymm4, [rdx + 32 * 0] vmovaps ymm5, [rdx + 32 * 0] vmovaps ymm6, [rdx + 32 * 0] +vmovaps ymm7, [rdx + 32 * 0] ; With imm = 0b00000000 vpclmulqdq ymm3, ymm1, ymm2, 0 @@ -36,6 +38,10 @@ vpclmulqdq ymm5, ymm1, ymm2, 16 ; With imm = 0b00010001 vpclmulqdq ymm6, ymm1, ymm2, 17 +; Test zero-extension +; Also test a wacky immediate. +vpclmulqdq xmm7, xmm1, xmm2, 11101110b + hlt align 32