From f346f89678cb0c701aaed4fa749f35c28e9c72bb Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Tue, 27 Feb 2024 19:56:12 -0800 Subject: [PATCH 1/3] OpcodeDispatcher: Don't use AddShift with no shift This accidentally removed optimizations elsewhere that was only checking for Add. --- FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 47b29082d8..0ac40e2f7d 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -4757,7 +4757,12 @@ OrderedNode *OpDispatchBuilder::LoadSource_WithOpSize(RegisterClassType Class, X if (!IsVSIB && Operand.Data.SIB.Index != FEXCore::X86State::REG_INVALID && Operand.Data.SIB.Base != FEXCore::X86State::REG_INVALID) { auto Base = LoadGPRRegister(Operand.Data.SIB.Base, GPRSize); auto Index = LoadGPRRegister(Operand.Data.SIB.Index, GPRSize); - Tmp = _AddShift(IR::SizeToOpSize(GPRSize), Base, Index, ShiftType::LSL, FEXCore::ilog2(Operand.Data.SIB.Scale)); + if (Operand.Data.SIB.Scale == 1) { + Tmp = _Add(IR::SizeToOpSize(GPRSize), Base, Index); + } + else { + Tmp = _AddShift(IR::SizeToOpSize(GPRSize), Base, Index, ShiftType::LSL, FEXCore::ilog2(Operand.Data.SIB.Scale)); + } } else { // NOTE: VSIB cannot have the index * scale portion calculated ahead of time, @@ -5011,7 +5016,12 @@ void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Cl if (Operand.Data.SIB.Index != FEXCore::X86State::REG_INVALID && Operand.Data.SIB.Base != FEXCore::X86State::REG_INVALID) { auto Base = LoadGPRRegister(Operand.Data.SIB.Base, GPRSize); auto Index = LoadGPRRegister(Operand.Data.SIB.Index, GPRSize); - Tmp = _AddShift(IR::SizeToOpSize(GPRSize), Base, Index, ShiftType::LSL, FEXCore::ilog2(Operand.Data.SIB.Scale)); + if (Operand.Data.SIB.Scale == 1) { + Tmp = _Add(IR::SizeToOpSize(GPRSize), Base, Index); + } + else { + Tmp = _AddShift(IR::SizeToOpSize(GPRSize), Base, Index, ShiftType::LSL, FEXCore::ilog2(Operand.Data.SIB.Scale)); + } } else { if (Operand.Data.SIB.Index != FEXCore::X86State::REG_INVALID) { From dc5239c003d51534b97596b3cdeb2037bd7e390a Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Tue, 27 Feb 2024 19:57:06 -0800 Subject: [PATCH 2/3] InstCountCI: Update for previous fix --- unittests/InstructionCountCI/FlagM/HotBlocks.json | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/unittests/InstructionCountCI/FlagM/HotBlocks.json b/unittests/InstructionCountCI/FlagM/HotBlocks.json index 2d75277984..9838b0a9d9 100644 --- a/unittests/InstructionCountCI/FlagM/HotBlocks.json +++ b/unittests/InstructionCountCI/FlagM/HotBlocks.json @@ -169,7 +169,7 @@ ] }, "Scalar vector add loop": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 7, "Comment": [ "Saw this in bytemark" ], @@ -181,11 +181,9 @@ "cmp rsi, rax" ], "ExpectedArm64ASM": [ - "add x20, x16, x4", - "ldr q16, [x20]", + "ldr q16, [x16, x4, sxtx]", "add v16.2d, v16.2d, v17.2d", - "add x20, x16, x4", - "str q16, [x20]", + "str q16, [x16, x4, sxtx]", "add x4, x4, #0x10 (16)", "eor w27, w10, w4", "subs x26, x10, x4", From 67f13ba927eeb6ed202e56f77814d87e939cbd63 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Tue, 27 Feb 2024 19:57:22 -0800 Subject: [PATCH 3/3] Adds sign extending address bug that was detected when testing #3421 Doesn't quite match the libc code directly because it uses `[gs:eax]` with both having the sign bit set and we can't deal with that with ASM tests. So match the behaviour in a different way. --- unittests/32Bit_ASM/FEX_bugs/SignExtendBug.asm | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/unittests/32Bit_ASM/FEX_bugs/SignExtendBug.asm b/unittests/32Bit_ASM/FEX_bugs/SignExtendBug.asm index ea564e23ef..5337c418eb 100644 --- a/unittests/32Bit_ASM/FEX_bugs/SignExtendBug.asm +++ b/unittests/32Bit_ASM/FEX_bugs/SignExtendBug.asm @@ -1,7 +1,8 @@ %ifdef CONFIG { "RegData": { - "RAX": "0x41424344" + "RAX": "0x41424344", + "RBX": "0x41424344" }, "MemoryRegions": { "0xf0000000": "4096" @@ -17,4 +18,13 @@ lea eax, [0xf000_0000] mov eax, [ds:eax] +; Ensures that zext occurs correctly with two registers that have the sign bit set. +mov ebx, 0xffff_ffff +mov ecx, 0xf000_0001 + +; Break the block so it can't optimize through. +jmp .test +.test: +mov ebx, [ebx+ecx] + hlt