From 019555ad6982a9e0af1930147c452c5bd63779ca Mon Sep 17 00:00:00 2001 From: Paulo Matos Date: Sun, 25 Feb 2024 19:26:25 +0000 Subject: [PATCH] Improve 32bit constant usage in memory addressing Update instcountci files. --- FEXCore/Source/Interface/IR/PassManager.cpp | 4 +- FEXCore/Source/Interface/IR/Passes.h | 8 +- .../Source/Interface/IR/Passes/ConstProp.cpp | 61 ++++- unittests/32Bit_ASM/subaddr.asm | 20 ++ .../FEXOpt/AddressingLimitations_32Bit.json | 216 +++++++----------- .../FlagM/HotBlocks_32Bit.json | 51 ++--- .../InstructionCountCI/Primary_32Bit.json | 48 ++-- 7 files changed, 205 insertions(+), 203 deletions(-) create mode 100644 unittests/32Bit_ASM/subaddr.asm diff --git a/FEXCore/Source/Interface/IR/PassManager.cpp b/FEXCore/Source/Interface/IR/PassManager.cpp index f9c8f255d7..6fa908d557 100644 --- a/FEXCore/Source/Interface/IR/PassManager.cpp +++ b/FEXCore/Source/Interface/IR/PassManager.cpp @@ -80,7 +80,8 @@ void PassManager::AddDefaultPasses(FEXCore::Context::ContextImpl *ctx, bool Inli InsertPass(CreateDeadStoreElimination(ctx->HostFeatures.SupportsAVX)); InsertPass(CreatePassDeadCodeElimination()); - InsertPass(CreateConstProp(InlineConstants, ctx->HostFeatures.SupportsTSOImm9)); + InsertPass(CreateConstProp( + InlineConstants, ctx->HostFeatures.SupportsTSOImm9, Is64BitMode())); InsertPass(CreateDeadFlagCalculationEliminination()); @@ -121,5 +122,4 @@ bool PassManager::Run(IREmitter *IREmit) { return Changed; } - } diff --git a/FEXCore/Source/Interface/IR/Passes.h b/FEXCore/Source/Interface/IR/Passes.h index 07d8760420..770fa7ae1c 100644 --- a/FEXCore/Source/Interface/IR/Passes.h +++ b/FEXCore/Source/Interface/IR/Passes.h @@ -16,15 +16,17 @@ class Pass; class RegisterAllocationPass; class RegisterAllocationData; -fextl::unique_ptr CreateConstProp(bool InlineConstants, bool SupportsTSOImm9); +fextl::unique_ptr +CreateConstProp(bool InlineConstants, bool SupportsTSOImm9, bool Is64BitMode); fextl::unique_ptr CreateContextLoadStoreElimination(bool SupportsAVX); fextl::unique_ptr CreateInlineCallOptimization(const FEXCore::CPUIDEmu* CPUID); fextl::unique_ptr CreateDeadFlagCalculationEliminination(); fextl::unique_ptr CreateDeadStoreElimination(bool SupportsAVX); fextl::unique_ptr CreatePassDeadCodeElimination(); fextl::unique_ptr CreateIRCompaction(FEXCore::Utils::IntrusivePooledAllocator &Allocator); -fextl::unique_ptr CreateRegisterAllocationPass(FEXCore::IR::Pass* CompactionPass, - bool SupportsAVX); +fextl::unique_ptr +CreateRegisterAllocationPass(FEXCore::IR::Pass *CompactionPass, + bool SupportsAVX); fextl::unique_ptr CreateLongDivideEliminationPass(); namespace Validation { diff --git a/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp b/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp index b59fc7da97..cd4cd36517 100644 --- a/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp +++ b/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp @@ -91,6 +91,7 @@ static bool IsTSOImm9(uint64_t imm) { } static std::tuple MemExtendedAddressing(IREmitter *IREmit, uint8_t AccessSize, IROp_Header* AddressHeader) { + LOGMAN_THROW_A_FMT(AddressHeader->Op == OP_ADD, "Invalid address Op"); auto Src0Header = IREmit->GetOpHeader(AddressHeader->Args[0]); if (Src0Header->Size == 8) { //Try to optimize: Base + MUL(Offset, Scale) @@ -141,7 +142,37 @@ static std::tuple MemExtende } // no match anywhere, just add - return { MEM_OFFSET_SXTX, 1, IREmit->UnwrapNode(AddressHeader->Args[0]), IREmit->UnwrapNode(AddressHeader->Args[1]) }; + // However, if we have one 32bit negative constant, we need to sign extend it + auto Arg0_ = AddressHeader->Args[0]; + auto Arg1_ = AddressHeader->Args[1]; + auto Arg0H = IREmit->GetOpHeader(Arg0_); + auto Arg1H = IREmit->GetOpHeader(Arg1_); + auto Arg0 = IREmit->UnwrapNode(Arg0_); + auto Arg1 = IREmit->UnwrapNode(Arg1_); + + uint64_t ConstVal = 0; + OrderedNode *Cnt = nullptr; + OrderedNode *Base = nullptr; + + if (IREmit->IsValueConstant(Arg0_, &ConstVal) && Arg0H->Size == 4) { + Cnt = Arg0; + Base = Arg1; + } else if (IREmit->IsValueConstant(Arg1_, &ConstVal) && Arg1H->Size == 4) { + Cnt = Arg1; + Base = Arg0; + } + + if (Cnt) { + int32_t Val32 = (int32_t)ConstVal; + + if (Val32 < 0) { + return {MEM_OFFSET_SXTW, 1, Base, Cnt}; + } else { + return {MEM_OFFSET_SXTX, 1, Base, Cnt}; + } + } else { + return {MEM_OFFSET_SXTX, 1, Arg0, Arg1}; + } } static OrderedNodeWrapper RemoveUselessMasking(IREmitter *IREmit, OrderedNodeWrapper src, uint64_t mask) { @@ -184,9 +215,10 @@ static bool IsBfeAlreadyDone(IREmitter *IREmit, OrderedNodeWrapper src, uint64_t class ConstProp final : public FEXCore::IR::Pass { public: - explicit ConstProp(bool DoInlineConstants, bool SupportsTSOImm9) - : InlineConstants(DoInlineConstants) - , SupportsTSOImm9 {SupportsTSOImm9} { } + explicit ConstProp(bool DoInlineConstants, bool SupportsTSOImm9, + bool Is64BitMode) + : InlineConstants(DoInlineConstants), SupportsTSOImm9{SupportsTSOImm9}, + Is64BitMode(Is64BitMode) {} bool Run(IREmitter *IREmit) override; @@ -219,6 +251,7 @@ class ConstProp final : public FEXCore::IR::Pass { return Result.first->second; } bool SupportsTSOImm9{}; + bool Is64BitMode; // This is a heuristic to limit constant pool live ranges to reduce RA interference pressure. // If the range is unbounded then RA interference pressure seems to increase to the point // that long blocks of constant usage can slow to a crawl. @@ -525,12 +558,15 @@ bool ConstProp::ConstantPropagation(IREmitter *IREmit, const IRListView& Current auto Op = IROp->CW(); auto AddressHeader = IREmit->GetOpHeader(Op->Addr); - if (AddressHeader->Op == OP_ADD && AddressHeader->Size == 8) { - auto [OffsetType, OffsetScale, Arg0, Arg1] = MemExtendedAddressing(IREmit, IROp->Size, AddressHeader); + if (AddressHeader->Op == OP_ADD && + ((Is64BitMode && AddressHeader->Size == 8) || + (!Is64BitMode && AddressHeader->Size == 4))) { + auto [OffsetType, OffsetScale, Arg0, Arg1] = + MemExtendedAddressing(IREmit, IROp->Size, AddressHeader); Op->OffsetType = OffsetType; Op->OffsetScale = OffsetScale; - IREmit->ReplaceNodeArgument(CodeNode, Op->Addr_Index, Arg0); // Addr + IREmit->ReplaceNodeArgument(CodeNode, Op->Addr_Index, Arg0); // Addr IREmit->ReplaceNodeArgument(CodeNode, Op->Offset_Index, Arg1); // Offset Changed = true; @@ -542,7 +578,9 @@ bool ConstProp::ConstantPropagation(IREmitter *IREmit, const IRListView& Current auto Op = IROp->CW(); auto AddressHeader = IREmit->GetOpHeader(Op->Addr); - if (AddressHeader->Op == OP_ADD && AddressHeader->Size == 8) { + if (AddressHeader->Op == OP_ADD && + ((Is64BitMode && AddressHeader->Size == 8) || + (!Is64BitMode && AddressHeader->Size == 4))) { auto [OffsetType, OffsetScale, Arg0, Arg1] = MemExtendedAddressing(IREmit, IROp->Size, AddressHeader); Op->OffsetType = OffsetType; @@ -1295,8 +1333,9 @@ bool ConstProp::Run(IREmitter *IREmit) { return Changed; } -fextl::unique_ptr CreateConstProp(bool InlineConstants, bool SupportsTSOImm9) { - return fextl::make_unique(InlineConstants, SupportsTSOImm9); +fextl::unique_ptr +CreateConstProp(bool InlineConstants, bool SupportsTSOImm9, bool Is64BitMode) { + return fextl::make_unique(InlineConstants, SupportsTSOImm9, + Is64BitMode); } - } diff --git a/unittests/32Bit_ASM/subaddr.asm b/unittests/32Bit_ASM/subaddr.asm new file mode 100644 index 0000000000..414c50735e --- /dev/null +++ b/unittests/32Bit_ASM/subaddr.asm @@ -0,0 +1,20 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "0xdeadbeef" + }, + "MemoryRegions": { + "0x10000000": "4096" + }, + "MemoryData": { + "0x10000000": "0xdeadbeef" + }, + "Mode": "32BIT" +} +%endif + +section .text + +lea eax, [0x10000040] +mov eax, [eax-0x40] +hlt diff --git a/unittests/InstructionCountCI/FEXOpt/AddressingLimitations_32Bit.json b/unittests/InstructionCountCI/FEXOpt/AddressingLimitations_32Bit.json index f04b9efa87..a3e22b73cb 100644 --- a/unittests/InstructionCountCI/FEXOpt/AddressingLimitations_32Bit.json +++ b/unittests/InstructionCountCI/FEXOpt/AddressingLimitations_32Bit.json @@ -14,331 +14,287 @@ ], "Instructions": { "movzx eax, byte [ecx - 257]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0xfffffeff", - "add w20, w5, w20", - "ldrb w4, [x20]" + "ldrb w4, [x5, w20, sxtw]" ] }, "movzx eax, byte [ecx - 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0xffffff00", - "add w20, w5, w20", - "ldrb w4, [x20]" + "ldrb w4, [x5, w20, sxtw]" ] }, "movzx eax, byte [ecx + 255]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "ExpectedArm64ASM": [ - "add w20, w5, #0xff (255)", - "ldrb w4, [x20]" + "ldrb w4, [x5, #255]" ] }, "movzx eax, byte [ecx + 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "ExpectedArm64ASM": [ - "add w20, w5, #0x100 (256)", - "ldrb w4, [x20]" + "ldrb w4, [x5, #256]" ] }, "movzx eax, byte [ecx + 4095]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "ExpectedArm64ASM": [ - "add w20, w5, #0xfff (4095)", - "ldrb w4, [x20]" + "ldrb w4, [x5, #4095]" ] }, "movzx eax, byte [ecx + 4096]": { "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ - "add w20, w5, #0x1000 (4096)", - "ldrb w4, [x20]" + "mov w20, #0x1000", + "ldrb w4, [x5, x20, sxtx]" ] }, "movzx eax, word [ecx - 257]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0xfffffeff", - "add w20, w5, w20", - "ldrh w4, [x20]" + "ldrh w4, [x5, w20, sxtw]" ] }, "movzx eax, word [ecx - 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0xffffff00", - "add w20, w5, w20", - "ldrh w4, [x20]" + "ldrh w4, [x5, w20, sxtw]" ] }, "movzx eax, word [ecx + 255]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "ExpectedArm64ASM": [ - "add w20, w5, #0xff (255)", - "ldrh w4, [x20]" + "ldurh w4, [x5, #255]" ] }, "movzx eax, word [ecx + 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "ExpectedArm64ASM": [ - "add w20, w5, #0x100 (256)", - "ldrh w4, [x20]" + "ldrh w4, [x5, #256]" ] }, "movzx eax, word [ecx + 8190]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 1, "ExpectedArm64ASM": [ - "mov w20, #0x1ffe", - "add w20, w5, w20", - "ldrh w4, [x20]" + "ldrh w4, [x5, #8190]" ] }, "movzx eax, word [ecx + 8191]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0x1fff", - "add w20, w5, w20", - "ldrh w4, [x20]" + "ldrh w4, [x5, x20, sxtx]" ] }, "movzx eax, word [ecx + 8192]": { "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ - "add w20, w5, #0x2000 (8192)", - "ldrh w4, [x20]" + "mov w20, #0x2000", + "ldrh w4, [x5, x20, sxtx]" ] }, "mov eax, dword [ecx - 257]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0xfffffeff", - "add w20, w5, w20", - "ldr w4, [x20]" + "ldr w4, [x5, w20, sxtw]" ] }, "mov eax, dword [ecx - 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0xffffff00", - "add w20, w5, w20", - "ldr w4, [x20]" + "ldr w4, [x5, w20, sxtw]" ] }, "mov eax, dword [ecx + 255]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "ExpectedArm64ASM": [ - "add w20, w5, #0xff (255)", - "ldr w4, [x20]" + "ldur w4, [x5, #255]" ] }, "mov eax, dword [ecx + 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "ExpectedArm64ASM": [ - "add w20, w5, #0x100 (256)", - "ldr w4, [x20]" + "ldr w4, [x5, #256]" ] }, "mov eax, dword [ecx + 16380]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 1, "ExpectedArm64ASM": [ - "mov w20, #0x3ffc", - "add w20, w5, w20", - "ldr w4, [x20]" + "ldr w4, [x5, #16380]" ] }, "mov eax, dword [ecx + 16381]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0x3ffd", - "add w20, w5, w20", - "ldr w4, [x20]" + "ldr w4, [x5, x20, sxtx]" ] }, "mov eax, dword [ecx + 16382]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0x3ffe", - "add w20, w5, w20", - "ldr w4, [x20]" + "ldr w4, [x5, x20, sxtx]" ] }, "mov eax, dword [ecx + 16383]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0x3fff", - "add w20, w5, w20", - "ldr w4, [x20]" + "ldr w4, [x5, x20, sxtx]" ] }, "mov eax, dword [ecx + 16384]": { "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ - "add w20, w5, #0x4000 (16384)", - "ldr w4, [x20]" + "mov w20, #0x4000", + "ldr w4, [x5, x20, sxtx]" ] }, "movss xmm0, [ecx + 16379]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0x3ffb", - "add w20, w5, w20", - "ldr s16, [x20]" + "ldr s16, [x5, x20, sxtx]" ] }, "movss xmm0, [ecx + 16380]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 1, "ExpectedArm64ASM": [ - "mov w20, #0x3ffc", - "add w20, w5, w20", - "ldr s16, [x20]" + "ldr s16, [x5, #16380]" ] }, "movss xmm0, [ecx + 16381]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0x3ffd", - "add w20, w5, w20", - "ldr s16, [x20]" + "ldr s16, [x5, x20, sxtx]" ] }, "movss xmm0, [ecx - 257]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0xfffffeff", - "add w20, w5, w20", - "ldr s16, [x20]" + "ldr s16, [x5, w20, sxtw]" ] }, "movss xmm0, [ecx - 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0xffffff00", - "add w20, w5, w20", - "ldr s16, [x20]" + "ldr s16, [x5, w20, sxtw]" ] }, "movss xmm0, [ecx + 255]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "ExpectedArm64ASM": [ - "add w20, w5, #0xff (255)", - "ldr s16, [x20]" + "ldur s16, [x5, #255]" ] }, "movss xmm0, [ecx + 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "ExpectedArm64ASM": [ - "add w20, w5, #0x100 (256)", - "ldr s16, [x20]" + "ldr s16, [x5, #256]" ] }, "movsd xmm0, [ecx + 32759]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0x7ff7", - "add w20, w5, w20", - "ldr d16, [x20]" + "ldr d16, [x5, x20, sxtx]" ] }, "movsd xmm0, [ecx + 32760]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 1, "ExpectedArm64ASM": [ - "mov w20, #0x7ff8", - "add w20, w5, w20", - "ldr d16, [x20]" + "ldr d16, [x5, #32760]" ] }, "movsd xmm0, [ecx + 32761]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0x7ff9", - "add w20, w5, w20", - "ldr d16, [x20]" + "ldr d16, [x5, x20, sxtx]" ] }, "movsd xmm0, [ecx - 257]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0xfffffeff", - "add w20, w5, w20", - "ldr d16, [x20]" + "ldr d16, [x5, w20, sxtw]" ] }, "movsd xmm0, [ecx - 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0xffffff00", - "add w20, w5, w20", - "ldr d16, [x20]" + "ldr d16, [x5, w20, sxtw]" ] }, "movsd xmm0, [ecx + 255]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "ExpectedArm64ASM": [ - "add w20, w5, #0xff (255)", - "ldr d16, [x20]" + "ldur d16, [x5, #255]" ] }, "movsd xmm0, [ecx + 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "ExpectedArm64ASM": [ - "add w20, w5, #0x100 (256)", - "ldr d16, [x20]" + "ldr d16, [x5, #256]" ] }, "movq xmm0, [ecx + 65519]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0xffef", - "add w20, w5, w20", - "ldr d16, [x20]" + "ldr d16, [x5, x20, sxtx]" ] }, "movq xmm0, [ecx + 65520]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0xfff0", - "add w20, w5, w20", - "ldr d16, [x20]" + "ldr d16, [x5, x20, sxtx]" ] }, "movq xmm0, [ecx + 65521]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0xfff1", - "add w20, w5, w20", - "ldr d16, [x20]" + "ldr d16, [x5, x20, sxtx]" ] }, "movq xmm0, [ecx - 257]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0xfffffeff", - "add w20, w5, w20", - "ldr d16, [x20]" + "ldr d16, [x5, w20, sxtw]" ] }, "movq xmm0, [ecx - 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ "mov w20, #0xffffff00", - "add w20, w5, w20", - "ldr d16, [x20]" + "ldr d16, [x5, w20, sxtw]" ] }, "movq xmm0, [ecx + 255]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "ExpectedArm64ASM": [ - "add w20, w5, #0xff (255)", - "ldr d16, [x20]" + "ldur d16, [x5, #255]" ] }, "movq xmm0, [ecx + 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "ExpectedArm64ASM": [ - "add w20, w5, #0x100 (256)", - "ldr d16, [x20]" + "ldr d16, [x5, #256]" ] } } diff --git a/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json b/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json index 68b387cff6..a0ab3a49b4 100644 --- a/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json +++ b/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json @@ -12,7 +12,7 @@ }, "Instructions": { "Sonic Mania movie player": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 16, "Comment": "Used to be hottest block in Sonic Mania", "x86Insts": [ "movzx edx, byte [esi+ecx]", @@ -35,8 +35,7 @@ "lsl w6, w6, #8", "add w10, w10, #0x1 (1)", "orr w6, w6, w5", - "add w20, w9, #0xc (12)", - "ldr w5, [x20]", + "ldr w5, [x9, #12]", "ldr w20, [x4]", "orr w20, w20, w6", "str w20, [x4]", @@ -47,7 +46,7 @@ ] }, "wine mscrt.dll memmove": { - "ExpectedInstructionCount": 20, + "ExpectedInstructionCount": 14, "Comment": "Hot in Sonic Mania", "x86Insts": [ "movdqu xmm0, [esi]", @@ -65,19 +64,13 @@ ], "ExpectedArm64ASM": [ "ldr q16, [x10]", - "add w20, w10, #0x10 (16)", - "ldr q17, [x20]", - "add w20, w10, #0x20 (32)", - "ldr q18, [x20]", - "add w20, w10, #0x30 (48)", - "ldr q19, [x20]", + "ldr q17, [x10, #16]", + "ldr q18, [x10, #32]", + "ldr q19, [x10, #48]", "str q16, [x11]", - "add w20, w11, #0x10 (16)", - "str q17, [x20]", - "add w20, w11, #0x20 (32)", - "str q18, [x20]", - "add w20, w11, #0x30 (48)", - "str q19, [x20]", + "str q17, [x11, #16]", + "str q18, [x11, #32]", + "str q19, [x11, #48]", "add w10, w10, #0x40 (64)", "add w11, w11, #0x40 (64)", "sub w5, w5, #0x40 (64)", @@ -87,7 +80,7 @@ ] }, "dxvk hotblock from MGRR": { - "ExpectedInstructionCount": 55, + "ExpectedInstructionCount": 47, "Comment": [ "Hottest block in Metal Gear Rising: Revengeance render thread" ], @@ -108,18 +101,14 @@ "lock cmpxchg8b qword [esi+0x8]" ], "ExpectedArm64ASM": [ - "add w20, w4, #0xc (12)", - "ldr w6, [x20]", - "add w20, w4, #0x8 (8)", - "ldr w4, [x20]", + "ldr w6, [x4, #12]", + "ldr w4, [x4, #8]", "mov w20, #0xffffffcc", - "add w20, w9, w20", - "str w10, [x20]", + "str w10, [x9, w20, sxtw]", "mov w5, w4", "mov w7, w6", "mov w20, #0xffffffdc", - "add w20, w9, w20", - "ldr w10, [x20]", + "ldr w10, [x9, w20, sxtw]", "mov w20, #0xffffffff", "adds w21, w4, w20", "mov w5, w21", @@ -128,17 +117,13 @@ "add w22, w6, w22", "mov w7, w22", "mov w23, #0xffffffd8", - "add w23, w9, w23", - "str w21, [x23]", + "str w21, [x9, w23, sxtw]", "mov w23, #0xffffffd4", - "add w24, w9, w23", - "str w22, [x24]", + "str w22, [x9, w23, sxtw]", "mov w7, w21", "mov w24, #0xffffffd0", - "add w24, w9, w24", - "str w21, [x24]", - "add w23, w9, w23", - "ldr w5, [x23]", + "str w21, [x9, w24, sxtw]", + "ldr w5, [x9, w23, sxtw]", "eor w27, w6, w20", "mov w26, w22", "adcs wzr, w6, w20", diff --git a/unittests/InstructionCountCI/Primary_32Bit.json b/unittests/InstructionCountCI/Primary_32Bit.json index 8ec12ed1be..4aa5972d18 100644 --- a/unittests/InstructionCountCI/Primary_32Bit.json +++ b/unittests/InstructionCountCI/Primary_32Bit.json @@ -380,18 +380,18 @@ "ExpectedArm64ASM": [ "ldr w11, [x8]", "add x20, x8, #0x4 (4)", - "ldr w10, [x8, #4]", - "add x21, x20, #0x4 (4)", - "ldr w9, [x20, #4]", - "add x20, x21, #0x8 (8)", - "ldr w7, [x21, #8]", - "add x21, x20, #0x4 (4)", - "ldr w6, [x20, #4]", - "add x20, x21, #0x4 (4)", - "ldr w5, [x21, #4]", - "add x21, x20, #0x4 (4)", - "ldr w4, [x20, #4]", - "add x8, x21, #0x4 (4)" + "ldr w10, [x20]", + "add x20, x20, #0x4 (4)", + "ldr w9, [x20]", + "add x20, x20, #0x8 (8)", + "ldr w7, [x20]", + "add x20, x20, #0x4 (4)", + "ldr w6, [x20]", + "add x20, x20, #0x4 (4)", + "ldr w5, [x20]", + "add x20, x20, #0x4 (4)", + "ldr w4, [x20]", + "add x8, x20, #0x4 (4)" ] }, "popad": { @@ -400,18 +400,18 @@ "ExpectedArm64ASM": [ "ldr w11, [x8]", "add x20, x8, #0x4 (4)", - "ldr w10, [x8, #4]", - "add x21, x20, #0x4 (4)", - "ldr w9, [x20, #4]", - "add x20, x21, #0x8 (8)", - "ldr w7, [x21, #8]", - "add x21, x20, #0x4 (4)", - "ldr w6, [x20, #4]", - "add x20, x21, #0x4 (4)", - "ldr w5, [x21, #4]", - "add x21, x20, #0x4 (4)", - "ldr w4, [x20, #4]", - "add x8, x21, #0x4 (4)" + "ldr w10, [x20]", + "add x20, x20, #0x4 (4)", + "ldr w9, [x20]", + "add x20, x20, #0x8 (8)", + "ldr w7, [x20]", + "add x20, x20, #0x4 (4)", + "ldr w6, [x20]", + "add x20, x20, #0x4 (4)", + "ldr w5, [x20]", + "add x20, x20, #0x4 (4)", + "ldr w4, [x20]", + "add x8, x20, #0x4 (4)" ] }, "aam": {