Skip to content

Commit

Permalink
Merge pull request #3368 from bylaws/preprcr
Browse files Browse the repository at this point in the history
FEXCore: Fix RCL/RCR shift wraparound behaviour
  • Loading branch information
Sonicadvance1 authored Jan 21, 2024
2 parents 0e97f8f + c4c10d0 commit c0be974
Show file tree
Hide file tree
Showing 7 changed files with 508 additions and 279 deletions.
189 changes: 106 additions & 83 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2421,62 +2421,73 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) {
return;
}

const auto Mask = (Size == 64) ? 0x3F : 0x1F;

// Calculate flags early.
CalculateDeferredFlags();
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);

OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags);
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags);
const auto OpSize = OpSizeFromSrc(Op);

CalculateFlags_ShiftVariable(Src, [this, CF, Op, Size, Src](){
const auto OpSize = OpSizeFromSrc(Op);
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags);

// Res = Src >> Shift
OrderedNode *Res = _Lshr(OpSize, Dest, Src);
// Res = Src >> Shift
OrderedNode *Res = _Lshr(OpSize, Dest, Src);
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);

uint64_t Const;
if (!IsValueConstant(WrapNode(Src), &Const)) {
auto Zero = _Constant(Size, 0);
auto One = _Constant(Size, 1);
uint64_t Const;
if (IsValueConstant(WrapNode(Src), &Const)) {
Const &= Mask;
if (!Const)
return;

// Res |= (Src << (Size - Shift + 1));
OrderedNode *SrcShl = _Sub(OpSize, _Constant(Size, Size + 1), Src);
auto TmpHigher = _Lshl(OpSize, Dest, SrcShl);
InvalidateDeferredFlags();

auto CompareResult = _Select(FEXCore::IR::COND_UGT, Src, One, TmpHigher, Zero);
Res = _Or(OpSize, Res, CompareResult);
// Constant folded version of the above, with fused shifts.
if (Const > 1)
Res = _Orlshl(OpSize, Res, Dest, Size + 1 - Const);

// Our new CF will be bit (Shift - 1) of the source. this is hoisted up to
// avoid the need to copy the source.
auto NewCF = _Lshr(OpSize, Dest, _Sub(OpSize, Src, One));
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(NewCF, 0, true);
// Our new CF will be bit (Shift - 1) of the source.
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Dest, Const - 1, true);

// Since shift != 0 we can inject the CF
OrderedNode *CFShl = _Sub(OpSize, _Constant(Size, Size), Src);
Res = _Or(OpSize, Res, _Lshl(OpSize::i64Bit, CF, CFShl));
// Since shift != 0 we can inject the CF
Res = _Orlshl(OpSize, Res, CF, Size - Const);

// OF is the top two MSBs XOR'd together
// Only when Shift == 1, it is undefined otherwise
// OF is the top two MSBs XOR'd together
// Only when Shift == 1, it is undefined otherwise
if (Const == 1) {
auto Xor = _XorShift(OpSize, Res, Res, ShiftType::LSR, 1);
SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(Xor, Size - 2, true);
} else {
// Constant folded version of the above, with fused shifts.
if (Const > 1)
Res = _Orlshl(OpSize, Res, Dest, Size + 1 - Const);
}

// Our new CF will be bit (Shift - 1) of the source.
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Dest, Const - 1, true);
StoreResult(GPRClass, Op, Res, -1);
return;
}

// Since shift != 0 we can inject the CF
Res = _Orlshl(OpSize, Res, CF, Size - Const);
OrderedNode *SrcMasked = _And(OpSize, Src, _Constant(Size, Mask));
CalculateFlags_ShiftVariable(SrcMasked, [this, CF, Op, Size, OpSize, SrcMasked, Dest, &Res](){
auto Zero = _Constant(Size, 0);
auto One = _Constant(Size, 1);

// OF is the top two MSBs XOR'd together
// Only when Shift == 1, it is undefined otherwise
if (Const == 1) {
auto Xor = _XorShift(OpSize, Res, Res, ShiftType::LSR, 1);
SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(Xor, Size - 2, true);
}
}
// Res |= (SrcMasked << (Size - Shift + 1));
OrderedNode *SrcMaskedShl = _Sub(OpSize, _Constant(Size, Size + 1), SrcMasked);
auto TmpHigher = _Lshl(OpSize, Dest, SrcMaskedShl);

auto CompareResult = _Select(FEXCore::IR::COND_UGT, SrcMasked, One, TmpHigher, Zero);
Res = _Or(OpSize, Res, CompareResult);

// Our new CF will be bit (Shift - 1) of the source. this is hoisted up to
// avoid the need to copy the source.
auto NewCF = _Lshr(OpSize, Dest, _Sub(OpSize, SrcMasked, One));
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(NewCF, 0, true);

// Since shift != 0 we can inject the CF
OrderedNode *CFShl = _Sub(OpSize, _Constant(Size, Size), SrcMasked);
Res = _Or(OpSize, Res, _Lshl(OpSize::i64Bit, CF, CFShl));

// OF is the top two MSBs XOR'd together
// Only when Shift == 1, it is undefined otherwise
auto Xor = _XorShift(OpSize, Res, Res, ShiftType::LSR, 1);
SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(Xor, Size - 2, true);

StoreResult(GPRClass, Op, Res, -1);
});
Expand Down Expand Up @@ -2607,64 +2618,76 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) {
return;
}

const auto Mask = (Size == 64) ? 0x3F : 0x1F;

// Calculate flags early.
CalculateDeferredFlags();

OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags);
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags);
const auto OpSize = OpSizeFromSrc(Op);

CalculateFlags_ShiftVariable(Src, [this, CF, Op, Size, Src](){
// Res = Src << Shift
const auto OpSize = OpSizeFromSrc(Op);
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags);
OrderedNode *Res = _Lshl(OpSize, Dest, Src);
// Res = Src << Shift
OrderedNode *Res = _Lshl(OpSize, Dest, Src);
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);

uint64_t Const;
if (!IsValueConstant(WrapNode(Src), &Const)) {
// Res |= (Src << (Size - Shift + 1));
OrderedNode *SrcShl = _Sub(OpSize, _Constant(Size, Size + 1), Src);
auto TmpHigher = _Lshr(OpSize, Dest, SrcShl);
uint64_t Const;
if (IsValueConstant(WrapNode(Src), &Const)) {
Const &= Mask;
if (!Const)
return;

auto One = _Constant(Size, 1);
auto Zero = _Constant(Size, 0);
InvalidateDeferredFlags();

auto CompareResult = _Select(FEXCore::IR::COND_UGT, Src, One, TmpHigher, Zero);
Res = _Or(OpSize, Res, CompareResult);
// Res |= (Src << (Size - Shift + 1));
if (Const > 1)
Res = _Orlshr(OpSize, Res, Dest, Size + 1 - Const);

// Our new CF will be bit (Shift - 1) of the source
auto NewCF = _Lshr(OpSize, Dest, _Sub(OpSize, _Constant(Size, Size), Src));
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(NewCF, 0, true);
// Our new CF will be bit (Shift - 1) of the source
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Dest, Size - Const, true);

// Since Shift != 0 we can inject the CF
OrderedNode *CFShl = _Sub(OpSize, Src, _Constant(Size, 1));
auto TmpCF = _Lshl(OpSize::i64Bit, CF, CFShl);
Res = _Or(OpSize, Res, TmpCF);
// Since Shift != 0 we can inject the CF
Res = _Orlshl(OpSize, Res, CF, Const - 1);

// OF is the top two MSBs XOR'd together
// Only when Shift == 1, it is undefined otherwise
//
// Note that NewCF has garbage in the upper bits, but we ignore them here
// and mask as part of the set after.
auto NewOF = _XorShift(OpSize, Res, NewCF, ShiftType::LSL, Size - 1);
// OF is the top two MSBs XOR'd together
// Only when Shift == 1, it is undefined otherwise
if (Const == 1) {
auto NewOF = _Xor(OpSize, Res, Dest);
SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(NewOF, Size - 1, true);
} else {
// Res |= (Src << (Size - Shift + 1));
if (Const > 1)
Res = _Orlshr(OpSize, Res, Dest, Size + 1 - Const);
}

// Our new CF will be bit (Shift - 1) of the source
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Dest, Size - Const, true);
StoreResult(GPRClass, Op, Res, -1);
return;
}

// Since Shift != 0 we can inject the CF
Res = _Orlshl(OpSize, Res, CF, Const - 1);
OrderedNode *SrcMasked = _And(OpSize, Src, _Constant(Size, Mask));
CalculateFlags_ShiftVariable(SrcMasked, [this, CF, Op, Size, OpSize, SrcMasked, Dest, &Res](){
// Res |= (SrcMasked << (Size - Shift + 1));
OrderedNode *SrcMaskedShl = _Sub(OpSize, _Constant(Size, Size + 1), SrcMasked);
auto TmpHigher = _Lshr(OpSize, Dest, SrcMaskedShl);

// OF is the top two MSBs XOR'd together
// Only when Shift == 1, it is undefined otherwise
if (Const == 1) {
auto NewOF = _Xor(OpSize, Res, Dest);
SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(NewOF, Size - 1, true);
}
}
auto One = _Constant(Size, 1);
auto Zero = _Constant(Size, 0);

auto CompareResult = _Select(FEXCore::IR::COND_UGT, SrcMasked, One, TmpHigher, Zero);
Res = _Or(OpSize, Res, CompareResult);

// Our new CF will be bit (Shift - 1) of the source
auto NewCF = _Lshr(OpSize, Dest, _Sub(OpSize, _Constant(Size, Size), SrcMasked));
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(NewCF, 0, true);

// Since Shift != 0 we can inject the CF
OrderedNode *CFShl = _Sub(OpSize, SrcMasked, _Constant(Size, 1));
auto TmpCF = _Lshl(OpSize::i64Bit, CF, CFShl);
Res = _Or(OpSize, Res, TmpCF);

// OF is the top two MSBs XOR'd together
// Only when Shift == 1, it is undefined otherwise
//
// Note that NewCF has garbage in the upper bits, but we ignore them here
// and mask as part of the set after.
auto NewOF = _XorShift(OpSize, Res, NewCF, ShiftType::LSL, Size - 1);
SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(NewOF, Size - 1, true);

StoreResult(GPRClass, Op, Res, -1);
});
Expand Down
50 changes: 50 additions & 0 deletions unittests/ASM/PrimaryGroup/2_D3_02_4.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
%ifdef CONFIG
{
"RegData": {
"RBX": "0x00000006",
"RDI": "0x00000004",
"RDX": "0x00000002",
"RSI": "0x00000000",
"R8": "0x0",
"R9": "0x0",
"R10": "0x1",
"R11": "0x1"
}
}
%endif

mov rbx, 0x00000001
mov rdi, 0x00000001
mov rdx, 0x40000000
mov rsi, 0x40000000
mov rcx, 34 ; Test wraparound

stc
rcl ebx, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcl edi, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcl edx, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcl esi, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt
50 changes: 50 additions & 0 deletions unittests/ASM/PrimaryGroup/2_D3_02_5.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
%ifdef CONFIG
{
"RegData": {
"RBX": "0x00000001",
"RDI": "0x00000001",
"RDX": "0x40000000",
"RSI": "0x40000000",
"R8": "0x1",
"R9": "0x0",
"R10": "0x1",
"R11": "0x0"
}
}
%endif

mov rbx, 0x00000001
mov rdi, 0x00000001
mov rdx, 0x40000000
mov rsi, 0x40000000
mov rcx, 32 ; Test wraparound with zero shift

stc
rcl ebx, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcl edi, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcl edx, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcl esi, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt
50 changes: 50 additions & 0 deletions unittests/ASM/PrimaryGroup/2_D3_03_7.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
%ifdef CONFIG
{
"RegData": {
"RBX": "0x40000000",
"RDI": "0x00000000",
"RDX": "0x60000000",
"RSI": "0x20000000",
"R8": "0x1",
"R9": "0x1",
"R10": "0x0",
"R11": "0x0"
}
}
%endif

mov rbx, 0x00000002
mov rdi, 0x00000002
mov rdx, 0x80000000
mov rsi, 0x80000000
mov rcx, 34 ; Test wraparound

stc
rcr ebx, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcr edi, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcr edx, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcr esi, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt
Loading

0 comments on commit c0be974

Please sign in to comment.