From 77f24308fe7890ee5094dd3c84441ae8a45adb20 Mon Sep 17 00:00:00 2001 From: Elliot Saba Date: Tue, 7 Sep 2021 10:10:49 -0400 Subject: [PATCH] [X86] Don't clobber EBX in stackprobes On X86, the stackprobe emission code chooses the `R11D` register, which is illegal on i686. This ends up wrapping around to `EBX`, which does not get properly callee-saved within the stack probing prologue, clobbering the register for the callers. We fix this by explicitly using `EAX` as the stack probe register. Reviewed By: pengfei Differential Revision: https://reviews.llvm.org/D109203 (cherry picked from commit ae8507b0df738205a6b9e3795ad34672b7499381) --- llvm/lib/Target/X86/X86FrameLowering.cpp | 8 +- llvm/test/CodeGen/X86/stack-clash-large.ll | 97 +++++++++++++--------- 2 files changed, 64 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 4cde7971e597e1..86cb86b19d6294 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -671,7 +671,9 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( MF.insert(MBBIter, testMBB); MF.insert(MBBIter, tailMBB); - Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D; + Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 + : Is64Bit ? X86::R11D + : X86::EAX; BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); @@ -1092,7 +1094,9 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, MF.insert(MBBIter, bodyMBB); MF.insert(MBBIter, footMBB); const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; - Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D; + Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 + : Is64Bit ? X86::R11D + : X86::EAX; // Setup entry block { diff --git a/llvm/test/CodeGen/X86/stack-clash-large.ll b/llvm/test/CodeGen/X86/stack-clash-large.ll index 9129e4ed40fda2..00c7843b54f55b 100644 --- a/llvm/test/CodeGen/X86/stack-clash-large.ll +++ b/llvm/test/CodeGen/X86/stack-clash-large.ll @@ -1,45 +1,64 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp -; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X86-64 %s -; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86-32 %s +; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X64 %s +; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86 %s +; RUN: llc -mtriple=x86_64-linux-gnux32 < %s | FileCheck -check-prefix=CHECK-X32 %s define i32 @foo() local_unnamed_addr #0 { -; CHECK-X86-64-LABEL: foo: -; CHECK-X86-64: # %bb.0: -; CHECK-X86-64-NEXT: movq %rsp, %r11 -; CHECK-X86-64-NEXT: subq $69632, %r11 # imm = 0x11000 -; CHECK-X86-64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 -; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-X86-64-NEXT: movq $0, (%rsp) -; CHECK-X86-64-NEXT: cmpq %r11, %rsp -; CHECK-X86-64-NEXT: jne .LBB0_1 -; CHECK-X86-64-NEXT: # %bb.2: -; CHECK-X86-64-NEXT: subq $2248, %rsp # imm = 0x8C8 -; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 71888 -; CHECK-X86-64-NEXT: movl $1, 264(%rsp) -; CHECK-X86-64-NEXT: movl $1, 28664(%rsp) -; CHECK-X86-64-NEXT: movl -128(%rsp), %eax -; CHECK-X86-64-NEXT: addq $71880, %rsp # imm = 0x118C8 -; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 8 -; CHECK-X86-64-NEXT: retq +; CHECK-X64-LABEL: foo: +; CHECK-X64: # %bb.0: +; CHECK-X64-NEXT: movq %rsp, %r11 +; CHECK-X64-NEXT: subq $69632, %r11 # imm = 0x11000 +; CHECK-X64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-X64-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-X64-NEXT: movq $0, (%rsp) +; CHECK-X64-NEXT: cmpq %r11, %rsp +; CHECK-X64-NEXT: jne .LBB0_1 +; CHECK-X64-NEXT: # %bb.2: +; CHECK-X64-NEXT: subq $2248, %rsp # imm = 0x8C8 +; CHECK-X64-NEXT: .cfi_def_cfa_offset 71888 +; CHECK-X64-NEXT: movl $1, 264(%rsp) +; CHECK-X64-NEXT: movl $1, 28664(%rsp) +; CHECK-X64-NEXT: movl -128(%rsp), %eax +; CHECK-X64-NEXT: addq $71880, %rsp # imm = 0x118C8 +; CHECK-X64-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X64-NEXT: retq ; -; CHECK-X86-32-LABEL: foo: -; CHECK-X86-32: # %bb.0: -; CHECK-X86-32-NEXT: movl %esp, %r11d -; CHECK-X86-32-NEXT: subl $69632, %r11d # imm = 0x11000 -; CHECK-X86-32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 -; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 -; CHECK-X86-32-NEXT: movl $0, (%esp) -; CHECK-X86-32-NEXT: cmpl %r11d, %esp -; CHECK-X86-32-NEXT: jne .LBB0_1 -; CHECK-X86-32-NEXT: # %bb.2: -; CHECK-X86-32-NEXT: subl $2380, %esp # imm = 0x94C -; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 72016 -; CHECK-X86-32-NEXT: movl $1, 392(%esp) -; CHECK-X86-32-NEXT: movl $1, 28792(%esp) -; CHECK-X86-32-NEXT: movl (%esp), %eax -; CHECK-X86-32-NEXT: addl $72012, %esp # imm = 0x1194C -; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 4 -; CHECK-X86-32-NEXT: retl +; CHECK-X86-LABEL: foo: +; CHECK-X86: # %bb.0: +; CHECK-X86-NEXT: movl %esp, %eax +; CHECK-X86-NEXT: subl $69632, %eax # imm = 0x11000 +; CHECK-X86-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-X86-NEXT: subl $4096, %esp # imm = 0x1000 +; CHECK-X86-NEXT: movl $0, (%esp) +; CHECK-X86-NEXT: cmpl %eax, %esp +; CHECK-X86-NEXT: jne .LBB0_1 +; CHECK-X86-NEXT: # %bb.2: +; CHECK-X86-NEXT: subl $2380, %esp # imm = 0x94C +; CHECK-X86-NEXT: .cfi_def_cfa_offset 72016 +; CHECK-X86-NEXT: movl $1, 392(%esp) +; CHECK-X86-NEXT: movl $1, 28792(%esp) +; CHECK-X86-NEXT: movl (%esp), %eax +; CHECK-X86-NEXT: addl $72012, %esp # imm = 0x1194C +; CHECK-X86-NEXT: .cfi_def_cfa_offset 4 +; CHECK-X86-NEXT: retl +; +; CHECK-X32-LABEL: foo: +; CHECK-X32: # %bb.0: +; CHECK-X32-NEXT: movl %esp, %r11d +; CHECK-X32-NEXT: subl $69632, %r11d # imm = 0x11000 +; CHECK-X32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-X32-NEXT: subl $4096, %esp # imm = 0x1000 +; CHECK-X32-NEXT: movq $0, (%esp) +; CHECK-X32-NEXT: cmpl %r11d, %esp +; CHECK-X32-NEXT: jne .LBB0_1 +; CHECK-X32-NEXT: # %bb.2: +; CHECK-X32-NEXT: subl $2248, %esp # imm = 0x8C8 +; CHECK-X32-NEXT: .cfi_def_cfa_offset 71888 +; CHECK-X32-NEXT: movl $1, 264(%esp) +; CHECK-X32-NEXT: movl $1, 28664(%esp) +; CHECK-X32-NEXT: movl -128(%esp), %eax +; CHECK-X32-NEXT: addl $71880, %esp # imm = 0x118C8 +; CHECK-X32-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X32-NEXT: retq %a = alloca i32, i64 18000, align 16 %b0 = getelementptr inbounds i32, i32* %a, i64 98 %b1 = getelementptr inbounds i32, i32* %a, i64 7198