Skip to content

Commit

Permalink
Merge pull request #3480 from alyssarosenzweig/setf
Browse files Browse the repository at this point in the history
Use SETF8/16 for 8/16-bit INC/DEC
  • Loading branch information
Sonicadvance1 authored Mar 2, 2024
2 parents 2dd922c + 5fd91d3 commit ea7d169
Show file tree
Hide file tree
Showing 7 changed files with 138 additions and 52 deletions.
14 changes: 14 additions & 0 deletions FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,20 @@ DEF_OP(RmifNZCV) {
rmif(GetReg(Op->Src.ID()).X(), Op->Rotate, Op->Mask);
}

DEF_OP(SetSmallNZV) {
auto Op = IROp->C<IR::IROp_SetSmallNZV>();
LOGMAN_THROW_A_FMT(CTX->HostFeatures.SupportsFlagM, "Unsupported flagm op");

const uint8_t OpSize = IROp->Size;
LOGMAN_THROW_AA_FMT(OpSize == 1 || OpSize == 2, "Unsupported {} size: {}", __func__, OpSize);

if (OpSize == 1) {
setf8(GetReg(Op->Src.ID()).W());
} else {
setf16(GetReg(Op->Src.ID()).W());
}
}

DEF_OP(AXFlag) {
LOGMAN_THROW_A_FMT(CTX->HostFeatures.SupportsFlagM2, "Unsupported flagm2 op");
axflag();
Expand Down
37 changes: 35 additions & 2 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3544,7 +3544,23 @@ void OpDispatchBuilder::INCOp(OpcodeArgs) {
}

CalculateDeferredFlags();
Result = CalculateFlags_ADD(OpSizeFromSrc(Op), Dest, OneConst, false);

if (Size < 32 && CTX->HostFeatures.SupportsFlagM) {
// Addition producing upper garbage
Result = _Add(OpSize::i32Bit, Dest, OneConst);
CalculatePF(Result);
CalculateAF(Dest, OneConst);

// Correctly set NZ flags, preserving C
HandleNZCV_RMW();
_SetSmallNZV(OpSizeFromSrc(Op), Result);

// Fix up V flag. INC overflows only when incrementing a positive and
// getting a negative. So compare the sign bits to calculate V.
_RmifNZCV(_Andn(OpSize::i32Bit, Result, Dest), Size - 1, 1);
} else {
Result = CalculateFlags_ADD(OpSizeFromSrc(Op), Dest, OneConst, false);
}

if (!IsLocked) {
StoreResult(GPRClass, Op, Result, -1);
Expand Down Expand Up @@ -3577,7 +3593,24 @@ void OpDispatchBuilder::DECOp(OpcodeArgs) {
}

CalculateDeferredFlags();
Result = CalculateFlags_SUB(OpSizeFromSrc(Op), Dest, OneConst, false);

if (Size < 32 && CTX->HostFeatures.SupportsFlagM) {
// Subtraction producing upper garbage
Result = _Sub(OpSize::i32Bit, Dest, OneConst);
CalculatePF(Result);
CalculateAF(Dest, OneConst);

// Correctly set NZ flags, preserving C
HandleNZCV_RMW();
_SetSmallNZV(OpSizeFromSrc(Op), Result);

// Fix up V flag. DEC overflows only when decrementing a negative and
// getting a positive. So compare the sign bits to calculate V.
_RmifNZCV(_Andn(OpSize::i32Bit, Dest, Result), Size - 1, 1);
} else {
Result = CalculateFlags_SUB(OpSizeFromSrc(Op), Dest, OneConst, false);
}

if (!IsLocked) {
StoreResult(GPRClass, Op, Result, -1);
}
Expand Down
8 changes: 8 additions & 0 deletions FEXCore/Source/Interface/IR/IR.json
Original file line number Diff line number Diff line change
Expand Up @@ -992,6 +992,14 @@
"HasSideEffects": true,
"DestSize": "Size"
},
"SetSmallNZV OpSize:#Size, GPR:$Src": {
"Desc": ["Set NZV with a SETF instruction. Preserves CF."],
"HasSideEffects": true,
"DestSize": "Size",
"EmitValidation": [
"Size == FEXCore::IR::OpSize::i8Bit || Size == FEXCore::IR::OpSize::i16Bit"
]
},
"CarryInvert": {
"Desc": ["Invert carry flag in NZCV"],
"HasSideEffects": true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,12 @@ DeadFlagCalculationEliminination::Classify(IROp_Header *IROp)
.CanEliminate = true,
};

case OP_SETSMALLNZV:
return {
.Write = FLAG_N | FLAG_Z | FLAG_V,
.CanEliminate = true,
};

case OP_LOADNZCV:
return {.Read = FLAG_NZCV};

Expand Down
46 changes: 20 additions & 26 deletions unittests/InstructionCountCI/FlagM/Atomics.json
Original file line number Diff line number Diff line change
Expand Up @@ -1296,17 +1296,15 @@
]
},
"lock dec byte [rax]": {
"ExpectedInstructionCount": 8,
"ExpectedInstructionCount": 6,
"Comment": "GROUP3 0xfe /1",
"ExpectedArm64ASM": [
"mov w20, #0x1",
"mov w21, #0xff",
"ldaddalb w21, w27, [x4]",
"cset w21, hs",
"lsl w0, w27, #24",
"cmp w0, w20, lsl #24",
"mov w20, #0xff",
"ldaddalb w20, w27, [x4]",
"sub w26, w27, #0x1 (1)",
"rmif x21, #63, #nzCv"
"setf8 w26",
"bic w20, w27, w26",
"rmif x20, #7, #nzcV"
]
},
"lock not byte [rax]": {
Expand Down Expand Up @@ -1396,17 +1394,15 @@
]
},
"lock dec word [rax]": {
"ExpectedInstructionCount": 8,
"ExpectedInstructionCount": 6,
"Comment": "GROUP4 0xfe /1",
"ExpectedArm64ASM": [
"mov w20, #0x1",
"mov w21, #0xffff",
"ldaddalh w21, w27, [x4]",
"cset w21, hs",
"lsl w0, w27, #16",
"cmp w0, w20, lsl #16",
"mov w20, #0xffff",
"ldaddalh w20, w27, [x4]",
"sub w26, w27, #0x1 (1)",
"rmif x21, #63, #nzCv"
"setf16 w26",
"bic w20, w27, w26",
"rmif x20, #15, #nzcV"
]
},
"lock dec dword [rax]": {
Expand All @@ -1432,29 +1428,27 @@
]
},
"lock inc byte [rax]": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": "GROUP4 0xfe /0",
"ExpectedArm64ASM": [
"mov w20, #0x1",
"ldaddalb w20, w27, [x4]",
"cset w21, hs",
"lsl w0, w27, #24",
"cmn w0, w20, lsl #24",
"add w26, w27, #0x1 (1)",
"rmif x21, #63, #nzCv"
"setf8 w26",
"bic w20, w26, w27",
"rmif x20, #7, #nzcV"
]
},
"lock inc word [rax]": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": "GROUP4 0xfe /0",
"ExpectedArm64ASM": [
"mov w20, #0x1",
"ldaddalh w20, w27, [x4]",
"cset w21, hs",
"lsl w0, w27, #16",
"cmn w0, w20, lsl #16",
"add w26, w27, #0x1 (1)",
"rmif x21, #63, #nzCv"
"setf16 w26",
"bic w20, w26, w27",
"rmif x20, #15, #nzcV"
]
},
"lock inc dword [rax]": {
Expand Down
39 changes: 39 additions & 0 deletions unittests/InstructionCountCI/FlagM/FlagOpts.json
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,45 @@
"ands x26, x4, x5"
]
},
"8-bit DEC consumed": {
"ExpectedInstructionCount": 12,
"x86Insts": [
"sub al, ah",
"dec al"
],
"ExpectedArm64ASM": [
"lsr w20, w4, #8",
"lsl w0, w4, #24",
"cmp w0, w20, lsl #24",
"sub w20, w4, w20",
"cfinv",
"bfxil x4, x20, #0, #8",
"uxtb w27, w4",
"sub w26, w27, #0x1 (1)",
"setf8 w26",
"bic w20, w27, w26",
"rmif x20, #7, #nzcV",
"bfxil x4, x26, #0, #8"
]
},
"8-bit DEC dead": {
"ExpectedInstructionCount": 8,
"x86Insts": [
"sub al, ah",
"dec al",
"test al, al"
],
"ExpectedArm64ASM": [
"lsr w20, w4, #8",
"sub w20, w4, w20",
"bfxil x4, x20, #0, #8",
"uxtb w20, w4",
"sub w20, w20, #0x1 (1)",
"bfxil x4, x20, #0, #8",
"mov x26, x4",
"cmn wzr, w26, lsl #24"
]
},
"Variable shift dead": {
"ExpectedInstructionCount": 2,
"x86Insts": [
Expand Down
40 changes: 16 additions & 24 deletions unittests/InstructionCountCI/FlagM/PrimaryGroup.json
Original file line number Diff line number Diff line change
Expand Up @@ -2388,44 +2388,38 @@
]
},
"inc al": {
"ExpectedInstructionCount": 8,
"ExpectedInstructionCount": 6,
"Comment": "GROUP3 0xfe /0",
"ExpectedArm64ASM": [
"mov w20, #0x1",
"uxtb w27, w4",
"cset w21, hs",
"lsl w0, w27, #24",
"cmn w0, w20, lsl #24",
"add w26, w27, #0x1 (1)",
"rmif x21, #63, #nzCv",
"setf8 w26",
"bic w20, w26, w27",
"rmif x20, #7, #nzcV",
"bfxil x4, x26, #0, #8"
]
},
"dec al": {
"ExpectedInstructionCount": 8,
"ExpectedInstructionCount": 6,
"Comment": "GROUP3 0xfe /1",
"ExpectedArm64ASM": [
"mov w20, #0x1",
"uxtb w27, w4",
"cset w21, hs",
"lsl w0, w27, #24",
"cmp w0, w20, lsl #24",
"sub w26, w27, #0x1 (1)",
"rmif x21, #63, #nzCv",
"setf8 w26",
"bic w20, w27, w26",
"rmif x20, #7, #nzcV",
"bfxil x4, x26, #0, #8"
]
},
"inc ax": {
"ExpectedInstructionCount": 8,
"ExpectedInstructionCount": 6,
"Comment": "GROUP4 0xfe /0",
"ExpectedArm64ASM": [
"mov w20, #0x1",
"uxth w27, w4",
"cset w21, hs",
"lsl w0, w27, #16",
"cmn w0, w20, lsl #16",
"add w26, w27, #0x1 (1)",
"rmif x21, #63, #nzCv",
"setf16 w26",
"bic w20, w26, w27",
"rmif x20, #15, #nzcV",
"bfxil x4, x26, #0, #16"
]
},
Expand All @@ -2452,16 +2446,14 @@
]
},
"dec ax": {
"ExpectedInstructionCount": 8,
"ExpectedInstructionCount": 6,
"Comment": "GROUP4 0xfe /1",
"ExpectedArm64ASM": [
"mov w20, #0x1",
"uxth w27, w4",
"cset w21, hs",
"lsl w0, w27, #16",
"cmp w0, w20, lsl #16",
"sub w26, w27, #0x1 (1)",
"rmif x21, #63, #nzCv",
"setf16 w26",
"bic w20, w27, w26",
"rmif x20, #15, #nzcV",
"bfxil x4, x26, #0, #16"
]
},
Expand Down

0 comments on commit ea7d169

Please sign in to comment.