Skip to content

Commit

Permalink
Merge pull request #3522 from alyssarosenzweig/ra/cmpxchg8
Browse files Browse the repository at this point in the history
OpcodeDispatcher: eliminate branch in cmpxchg pair
  • Loading branch information
Sonicadvance1 authored Mar 28, 2024
2 parents 3d31291 + d1722ab commit aa26b62
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 36 deletions.
24 changes: 8 additions & 16 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4270,23 +4270,15 @@ void OpDispatchBuilder::CMPXCHGPairOp(OpcodeArgs) {
SetRFLAG<FEXCore::X86State::RFLAG_ZF_RAW_LOC>(ZFResult);
CalculateDeferredFlags();

auto CondJump_ = CondJump(ZFResult);

// Make sure to start a new block after ending this one
auto JumpTarget = CreateNewCodeBlockAfter(GetCurrentBlock());
SetFalseJumpTarget(CondJump_, JumpTarget);
SetCurrentCodeBlock(JumpTarget);
StartNewBlock();

StoreGPRRegister(X86State::REG_RAX, Result_Lower);
StoreGPRRegister(X86State::REG_RDX, Result_Upper);
auto UpdateIfNotZF = [this](auto Reg, auto Value) {
// Always use 64-bit csel to preserve existing upper bits. If we have a
// 32-bit cmpxchg in a 64-bit context, Value will be zeroed in upper bits.
StoreGPRRegister(Reg, _NZCVSelect(OpSize::i64Bit, CondClassType{COND_NEQ},
Value, LoadGPRRegister(Reg)));
};

auto Jump_ = Jump();
auto NextJumpTarget = CreateNewCodeBlockAfter(JumpTarget);
SetJumpTarget(Jump_, NextJumpTarget);
SetTrueJumpTarget(CondJump_, NextJumpTarget);
SetCurrentCodeBlock(NextJumpTarget);
StartNewBlock();
UpdateIfNotZF(X86State::REG_RAX, Result_Lower);
UpdateIfNotZF(X86State::REG_RDX, Result_Upper);
}

void OpDispatchBuilder::CreateJumpBlocks(fextl::vector<FEXCore::Frontend::Decoder::DecodedBlocks> const *Blocks) {
Expand Down
7 changes: 3 additions & 4 deletions unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
]
},
"dxvk hotblock from MGRR": {
"ExpectedInstructionCount": 43,
"ExpectedInstructionCount": 42,
"Comment": [
"Hottest block in Metal Gear Rising: Revengeance render thread"
],
Expand Down Expand Up @@ -141,9 +141,8 @@
"cset x22, eq",
"msr nzcv, x21",
"rmif x22, #62, #nZcv",
"cbnz x22, #+0xc",
"mov w4, w20",
"mov w6, w12"
"csel x4, x20, x4, ne",
"csel x6, x12, x6, ne"
]
},
"Psychonauts matrix swizzle": {
Expand Down
14 changes: 6 additions & 8 deletions unittests/InstructionCountCI/FlagM/SecondaryGroup.json
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,7 @@
]
},
"cmpxchg8b [rbp]": {
"ExpectedInstructionCount": 25,
"ExpectedInstructionCount": 24,
"Comment": "GROUP9 0x0F 0xC7 /1",
"ExpectedArm64ASM": [
"add x20, x9, #0x0 (0)",
Expand All @@ -669,13 +669,12 @@
"cset x22, eq",
"msr nzcv, x21",
"rmif x22, #62, #nZcv",
"cbnz x22, #+0xc",
"mov x4, x20",
"mov x6, x30"
"csel x4, x20, x4, ne",
"csel x6, x30, x6, ne"
]
},
"cmpxchg16b [rbp]": {
"ExpectedInstructionCount": 21,
"ExpectedInstructionCount": 20,
"Comment": "GROUP9 0x0F 0xC7 /1",
"ExpectedArm64ASM": [
"add x20, x9, #0x0 (0)",
Expand All @@ -696,9 +695,8 @@
"cset x22, eq",
"msr nzcv, x21",
"rmif x22, #62, #nZcv",
"cbnz x22, #+0xc",
"mov x4, x20",
"mov x6, x30"
"csel x4, x20, x4, ne",
"csel x6, x30, x6, ne"
]
},
"rdrand ax": {
Expand Down
14 changes: 6 additions & 8 deletions unittests/InstructionCountCI/SecondaryGroup.json
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,7 @@
]
},
"cmpxchg8b [rbp]": {
"ExpectedInstructionCount": 25,
"ExpectedInstructionCount": 24,
"Comment": "GROUP9 0x0F 0xC7 /1",
"ExpectedArm64ASM": [
"add x20, x9, #0x0 (0)",
Expand All @@ -801,13 +801,12 @@
"cset x22, eq",
"bfi w21, w22, #30, #1",
"msr nzcv, x21",
"cbnz x22, #+0xc",
"mov x4, x20",
"mov x6, x30"
"csel x4, x20, x4, ne",
"csel x6, x30, x6, ne"
]
},
"cmpxchg16b [rbp]": {
"ExpectedInstructionCount": 21,
"ExpectedInstructionCount": 20,
"Comment": "GROUP9 0x0F 0xC7 /1",
"ExpectedArm64ASM": [
"add x20, x9, #0x0 (0)",
Expand All @@ -828,9 +827,8 @@
"cset x22, eq",
"bfi w21, w22, #30, #1",
"msr nzcv, x21",
"cbnz x22, #+0xc",
"mov x4, x20",
"mov x6, x30"
"csel x4, x20, x4, ne",
"csel x6, x30, x6, ne"
]
},
"rdrand ax": {
Expand Down

0 comments on commit aa26b62

Please sign in to comment.