Skip to content

Commit

Permalink
Merge pull request #2288 from lioncash/hrsw
Browse files Browse the repository at this point in the history
OpcodeDispatcher: Handle VPMULHRSW
  • Loading branch information
Sonicadvance1 authored Dec 22, 2022
2 parents 4a3af8d + 94cb2dd commit 82adc2f
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5982,6 +5982,7 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
{OPD(2, 0b01, 0x08), 1, &OpDispatchBuilder::VPSIGN<1>},
{OPD(2, 0b01, 0x09), 1, &OpDispatchBuilder::VPSIGN<2>},
{OPD(2, 0b01, 0x0A), 1, &OpDispatchBuilder::VPSIGN<4>},
{OPD(2, 0b01, 0x0B), 1, &OpDispatchBuilder::VPMULHRSWOp},

{OPD(2, 0b01, 0x18), 1, &OpDispatchBuilder::VBROADCASTOp<4>},
{OPD(2, 0b01, 0x19), 1, &OpDispatchBuilder::VBROADCASTOp<8>},
Expand Down
4 changes: 4 additions & 0 deletions External/FEXCore/Source/Interface/Core/OpcodeDispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,8 @@ enum class SelectionFlag {

void VPHMINPOSUWOp(OpcodeArgs);

void VPMULHRSWOp(OpcodeArgs);

template <bool Signed>
void VPMULHWOp(OpcodeArgs);

Expand Down Expand Up @@ -750,6 +752,8 @@ enum class SelectionFlag {

OrderedNode* PHMINPOSUWOpImpl(OpcodeArgs);

OrderedNode* PMULHRSWOpImpl(OpcodeArgs, OrderedNode *Src1, OrderedNode *Src2);

OrderedNode* PMULHWOpImpl(OpcodeArgs, bool Signed,
OrderedNode *Src1, OrderedNode *Src2);

Expand Down
44 changes: 30 additions & 14 deletions External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2784,28 +2784,24 @@ void OpDispatchBuilder::VPMULHWOp<false>(OpcodeArgs);
template
void OpDispatchBuilder::VPMULHWOp<true>(OpcodeArgs);

void OpDispatchBuilder::PMULHRSW(OpcodeArgs) {
auto Size = GetSrcSize(Op);

OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
OrderedNode* OpDispatchBuilder::PMULHRSWOpImpl(OpcodeArgs, OrderedNode *Src1, OrderedNode *Src2) {
const auto Size = GetSrcSize(Op);

OrderedNode *Res{};
if (Size == 8) {
// Implementation is more efficient for 8byte registers
Res = _VSMull(Size * 2, 2, Dest, Src);
Res = _VSMull(Size * 2, 2, Src1, Src2);
Res = _VSShrI(Size * 2, 4, Res, 14);
auto OneVector = _VectorImm(Size * 2, 4, 1);
Res = _VAdd(Size * 2, 4, Res, OneVector);
Res = _VUShrNI(Size * 2, 4, Res, 1);
}
else {
// 128bit is less efficient
return _VUShrNI(Size * 2, 4, Res, 1);
} else {
// 128-bit and 256-bit are less efficient
OrderedNode *ResultLow;
OrderedNode *ResultHigh;

ResultLow = _VSMull(Size, 2, Dest, Src);
ResultHigh = _VSMull2(Size, 2, Dest, Src);
ResultLow = _VSMull(Size, 2, Src1, Src2);
ResultHigh = _VSMull2(Size, 2, Src1, Src2);

ResultLow = _VSShrI(Size, 4, ResultLow, 14);
ResultHigh = _VSShrI(Size, 4, ResultHigh, 14);
Expand All @@ -2816,10 +2812,30 @@ void OpDispatchBuilder::PMULHRSW(OpcodeArgs) {

// Combine the results
Res = _VUShrNI(Size, 4, ResultLow, 1);
Res = _VUShrNI2(Size, 4, Res, ResultHigh, 1);
return _VUShrNI2(Size, 4, Res, ResultHigh, 1);
}
}

StoreResult(FPRClass, Op, Res, -1);
void OpDispatchBuilder::PMULHRSW(OpcodeArgs) {
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
OrderedNode *Result = PMULHRSWOpImpl(Op, Dest, Src);

StoreResult(FPRClass, Op, Result, -1);
}

void OpDispatchBuilder::VPMULHRSWOp(OpcodeArgs) {
const auto DstSize = GetDstSize(Op);
const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;

OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, -1);
OrderedNode *Result = PMULHRSWOpImpl(Op, Dest, Src);

if (Is128Bit) {
Result = _VMov(16, Result);
}
StoreResult(FPRClass, Op, Result, -1);
}

template<size_t ElementSize>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ void InitializeVEXTables() {
{OPD(2, 0b01, 0x08), 1, X86InstInfo{"VPSIGNB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(2, 0b01, 0x09), 1, X86InstInfo{"VPSIGNW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(2, 0b01, 0x0A), 1, X86InstInfo{"VPSIGND", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(2, 0b01, 0x0B), 1, X86InstInfo{"VPMULHRSW", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(2, 0b01, 0x0B), 1, X86InstInfo{"VPMULHRSW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(2, 0b01, 0x0C), 1, X86InstInfo{"VPERMILPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(2, 0b01, 0x0D), 1, X86InstInfo{"VPERMILPD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(2, 0b01, 0x0E), 1, X86InstInfo{"VTESTPS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
Expand Down
36 changes: 36 additions & 0 deletions unittests/ASM/VEX/vpmulhrsw.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM2": ["0x31A6343B36E09E7A", "0x48134B294E4F5186", "0x0000000000000000", "0x0000000000000000"],
"XMM3": ["0x31A6343B36E09E7A", "0x48134B294E4F5186", "0x0000000000000000", "0x0000000000000000"],
"XMM4": ["0x31A6343B36E09E7A", "0x48134B294E4F5186", "0x31A6343B36E09E7A", "0x48134B294E4F5186"],
"XMM5": ["0x31A6343B36E09E7A", "0x48134B294E4F5186", "0x31A6343B36E09E7A", "0x48134B294E4F5186"]
}
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

vpmulhrsw xmm2, xmm0, xmm1
vpmulhrsw xmm3, xmm0, [rdx + 32]

vpmulhrsw ymm4, ymm0, ymm1
vpmulhrsw ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x4142434445468748
dq 0x5152535455565758
dq 0x4142434445468748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778

0 comments on commit 82adc2f

Please sign in to comment.