Skip to content

Commit

Permalink
Merge pull request #3544 from alyssarosenzweig/ra/zero-multiple
Browse files Browse the repository at this point in the history
OpcodeDispatcher: drop ZeroMultipleFlags
  • Loading branch information
alyssarosenzweig authored Apr 2, 2024
2 parents e8abc88 + 7852909 commit b632f72
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 112 deletions.
13 changes: 2 additions & 11 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -999,16 +999,7 @@ friend class FEXCore::IR::PassManager;
}

static bool IsNZCV(unsigned BitOffset) {
switch (BitOffset) {
case FEXCore::X86State::RFLAG_CF_RAW_LOC:
case FEXCore::X86State::RFLAG_ZF_RAW_LOC:
case FEXCore::X86State::RFLAG_SF_RAW_LOC:
case FEXCore::X86State::RFLAG_OF_RAW_LOC:
return true;

default:
return false;
}
return ContainsNZCV(1U << BitOffset);
}

OrderedNode* CachedNZCV{};
Expand Down Expand Up @@ -1440,7 +1431,7 @@ friend class FEXCore::IR::PassManager;
SetRFLAG<FEXCore::X86State::RFLAG_AF_RAW_LOC>(_Constant(Constant << 4));
}

void ZeroMultipleFlags(uint32_t BitMask);
void ZeroPF_AF();

CondClassType CondForNZCVBit(unsigned BitOffset, bool Invert) {
switch (BitOffset) {
Expand Down
72 changes: 7 additions & 65 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,61 +38,10 @@ constexpr std::array<uint32_t, 17> FlagOffsets = {
FEXCore::X86State::RFLAG_ID_LOC,
};

void OpDispatchBuilder::ZeroMultipleFlags(uint32_t FlagsMask) {
auto ZeroConst = _Constant(0);

if (ContainsNZCV(FlagsMask)) {
// NZCV is stored packed together.
// It's more optimal to zero NZCV with move+bic instead of multiple bics.
auto NZCVFlagsMask = FlagsMask & FullNZCVMask;
if (NZCVFlagsMask == FullNZCVMask) {
ZeroNZCV();
}
else {
const auto IndexMask = NZCVIndexMask(FlagsMask);

if (std::popcount(NZCVFlagsMask) == 1) {
// It's more optimal to store only one here.

for (size_t i = 0; NZCVFlagsMask && i < FlagOffsets.size(); ++i) {
const auto FlagOffset = FlagOffsets[i];
const auto FlagMask = 1U << FlagOffset;
if (!(FlagMask & NZCVFlagsMask)) {
continue;
}
SetRFLAG(ZeroConst, FlagOffset);
NZCVFlagsMask &= ~(FlagMask);
}
}
else {
auto IndexMaskConstant = _Constant(IndexMask);
auto NewNZCV = _Andn(OpSize::i64Bit, GetNZCV(), IndexMaskConstant);
SetNZCV(NewNZCV);
}
// Unset the possibly set bits.
PossiblySetNZCVBits &= ~IndexMask;
}

// Handled NZCV, so remove it from the mask.
FlagsMask &= ~FullNZCVMask;
}

void OpDispatchBuilder::ZeroPF_AF() {
// PF is stored inverted, so invert it when we zero.
if (FlagsMask & (1u << X86State::RFLAG_PF_RAW_LOC)) {
SetRFLAG<FEXCore::X86State::RFLAG_PF_RAW_LOC>(_Constant(1));
FlagsMask &= ~(1u << X86State::RFLAG_PF_RAW_LOC);
}

// Handle remaining masks.
for (size_t i = 0; FlagsMask && i < FlagOffsets.size(); ++i) {
const auto FlagOffset = FlagOffsets[i];
const auto FlagMask = 1U << FlagOffset;
if (!(FlagMask & FlagsMask)) {
continue;
}
SetRFLAG(ZeroConst, FlagOffset);
FlagsMask &= ~(FlagMask);
}
SetRFLAG<FEXCore::X86State::RFLAG_PF_RAW_LOC>(_Constant(1));
SetAF(0);
}

void OpDispatchBuilder::SetPackedRFLAG(bool Lower8, OrderedNode *Src) {
Expand Down Expand Up @@ -865,9 +814,7 @@ void OpDispatchBuilder::CalculateFlags_POPCOUNT(OrderedNode *Result) {
// is in the range [0, 63]. In particular, it is always positive. So a
// combined NZ test will correctly zero SF/CF/OF while setting ZF.
SetNZ_ZeroCV(OpSize::i32Bit, Result);

ZeroMultipleFlags((1U << X86State::RFLAG_AF_RAW_LOC) |
(1U << X86State::RFLAG_PF_RAW_LOC));
ZeroPF_AF();
}

void OpDispatchBuilder::CalculateFlags_BZHI(uint8_t SrcSize, OrderedNode *Result, OrderedNode *Src) {
Expand All @@ -893,15 +840,10 @@ void OpDispatchBuilder::CalculateFlags_ZCNT(uint8_t SrcSize, OrderedNode *Result

void OpDispatchBuilder::CalculateFlags_RDRAND(OrderedNode *Src) {
// OF, SF, ZF, AF, PF all zero
// CF is set to the incoming source

uint32_t FlagsMaskToZero =
FullNZCVMask |
(1U << X86State::RFLAG_AF_RAW_LOC) |
(1U << X86State::RFLAG_PF_RAW_LOC);

ZeroMultipleFlags(FlagsMaskToZero);
ZeroNZCV();
ZeroPF_AF();

// CF is set to the incoming source
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Src);
}

Expand Down
15 changes: 3 additions & 12 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4594,11 +4594,7 @@ void OpDispatchBuilder::PTestOp(OpcodeArgs) {
SetNZ_ZeroCV(32, Test1);
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Test2);

uint32_t FlagsMaskToZero =
(1U << X86State::RFLAG_PF_RAW_LOC) |
(1U << X86State::RFLAG_AF_RAW_LOC);

ZeroMultipleFlags(FlagsMaskToZero);
ZeroPF_AF();
}

void OpDispatchBuilder::VTESTOpImpl(OpcodeArgs, size_t ElementSize) {
Expand Down Expand Up @@ -4635,8 +4631,7 @@ void OpDispatchBuilder::VTESTOpImpl(OpcodeArgs, size_t ElementSize) {
SetNZ_ZeroCV(32, AndGPR);
SetRFLAG<X86State::RFLAG_CF_RAW_LOC>(CFResult);

ZeroMultipleFlags((1U << X86State::RFLAG_PF_RAW_LOC) |
(1U << X86State::RFLAG_AF_RAW_LOC));
ZeroPF_AF();
}

template <size_t ElementSize>
Expand Down Expand Up @@ -5568,11 +5563,7 @@ void OpDispatchBuilder::PCMPXSTRXOpImpl(OpcodeArgs, bool IsExplicit, bool IsMask
SetRFLAG<X86State::RFLAG_CF_RAW_LOC>(GetFlagBit(18));
SetRFLAG<X86State::RFLAG_OF_RAW_LOC>(GetFlagBit(19));

uint32_t FlagsMaskToZero =
(1U << X86State::RFLAG_PF_RAW_LOC) |
(1U << X86State::RFLAG_AF_RAW_LOC);

ZeroMultipleFlags(FlagsMaskToZero);
ZeroPF_AF();
}

void OpDispatchBuilder::VPCMPESTRIOp(OpcodeArgs) {
Expand Down
12 changes: 6 additions & 6 deletions unittests/InstructionCountCI/FlagM/SecondaryGroup.json
Original file line number Diff line number Diff line change
Expand Up @@ -704,8 +704,8 @@
"mov x22, x20",
"mov x20, x21",
"bfxil x4, x22, #0, #16",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -719,8 +719,8 @@
"mov x22, x20",
"mov x20, x21",
"mov w4, w22",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -733,8 +733,8 @@
"cset x21, ne",
"mov x4, x20",
"mov x20, x21",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -748,8 +748,8 @@
"mov x22, x20",
"mov x20, x21",
"bfxil x4, x22, #0, #16",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -763,8 +763,8 @@
"mov x22, x20",
"mov x20, x21",
"mov w4, w22",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -777,8 +777,8 @@
"cset x21, ne",
"mov x4, x20",
"mov x20, x21",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand Down
12 changes: 6 additions & 6 deletions unittests/InstructionCountCI/FlagM/Secondary_REP.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
"umov w20, v0.b[0]",
"bfxil x4, x20, #0, #16",
"tst w20, w20",
"mov w27, #0x0",
"mov w26, #0x1"
"mov w26, #0x1",
"mov w27, #0x0"
]
},
"popcnt eax, ebx": {
Expand All @@ -37,8 +37,8 @@
"addv b0, v0.8b",
"umov w4, v0.b[0]",
"tst w4, w4",
"mov w27, #0x0",
"mov w26, #0x1"
"mov w26, #0x1",
"mov w27, #0x0"
]
},
"popcnt rax, rbx": {
Expand All @@ -50,8 +50,8 @@
"addv b0, v0.8b",
"umov w4, v0.b[0]",
"tst w4, w4",
"mov w27, #0x0",
"mov w26, #0x1"
"mov w26, #0x1",
"mov w27, #0x0"
]
},
"tzcnt ax, bx": {
Expand Down
12 changes: 6 additions & 6 deletions unittests/InstructionCountCI/SecondaryGroup.json
Original file line number Diff line number Diff line change
Expand Up @@ -840,8 +840,8 @@
"mov x22, x20",
"mov x20, x21",
"bfxil x4, x22, #0, #16",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -855,8 +855,8 @@
"mov x22, x20",
"mov x20, x21",
"mov w4, w22",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -869,8 +869,8 @@
"cset x21, ne",
"mov x4, x20",
"mov x20, x21",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -884,8 +884,8 @@
"mov x22, x20",
"mov x20, x21",
"bfxil x4, x22, #0, #16",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -899,8 +899,8 @@
"mov x22, x20",
"mov x20, x21",
"mov w4, w22",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand All @@ -913,8 +913,8 @@
"cset x21, ne",
"mov x4, x20",
"mov x20, x21",
"mov w27, #0x0",
"mov w26, #0x1",
"mov w27, #0x0",
"lsl x20, x20, #29",
"msr nzcv, x20"
]
Expand Down
12 changes: 6 additions & 6 deletions unittests/InstructionCountCI/Secondary_REP.json
Original file line number Diff line number Diff line change
Expand Up @@ -409,8 +409,8 @@
"umov w20, v0.b[0]",
"bfxil x4, x20, #0, #16",
"tst w20, w20",
"mov w27, #0x0",
"mov w26, #0x1"
"mov w26, #0x1",
"mov w27, #0x0"
]
},
"popcnt eax, ebx": {
Expand All @@ -422,8 +422,8 @@
"addv b0, v0.8b",
"umov w4, v0.b[0]",
"tst w4, w4",
"mov w27, #0x0",
"mov w26, #0x1"
"mov w26, #0x1",
"mov w27, #0x0"
]
},
"popcnt rax, rbx": {
Expand All @@ -435,8 +435,8 @@
"addv b0, v0.8b",
"umov w4, v0.b[0]",
"tst w4, w4",
"mov w27, #0x0",
"mov w26, #0x1"
"mov w26, #0x1",
"mov w27, #0x0"
]
},
"tzcnt ax, bx": {
Expand Down

0 comments on commit b632f72

Please sign in to comment.