From e8dc11a735e594172e9d23fee67d13c9397655cf Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Thu, 17 Oct 2024 10:32:44 +0200 Subject: [PATCH] Re-land: [ARM] Fix frame chains with M-profile PACBTI (#110285) When using AAPCS-compliant frame chains with PACBTI return address signing, there ware a number of bugs in the generation of the frame pointer and function prologues. The most obvious was that we sometimes would modify r11 before pushing it to the stack, so it wasn't preserved as required by the PCS. We also sometimes did not push R11 and LR adjacent to one another on the stack, or used R11 as a frame pointer without pointing it at the saved value of R11, both of which are required to have an AAPCS compliant frame chain. The original work of this patch was done by James Westwood, reviewed as #82801 and #81249, with some tidy-ups done by Mark Murray and myself. --- llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp | 5 +- llvm/lib/Target/ARM/ARMCallingConv.td | 19 ++- llvm/lib/Target/ARM/ARMFrameLowering.cpp | 145 +++++++++++------ llvm/lib/Target/ARM/ARMSubtarget.cpp | 7 + llvm/lib/Target/ARM/ARMSubtarget.h | 12 ++ .../CodeGen/Thumb2/pacbti-m-frame-chain.ll | 150 ++++++++++++++++++ 6 files changed, 281 insertions(+), 57 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 3f28ce8ca4b559..aad305cce03961 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -116,9 +116,12 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_iOS_SaveList; if (PushPopSplit == ARMSubtarget::SplitR7) - return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList + return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_R7_SaveList : CSR_ATPCS_SplitPush_SaveList; + if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA) + return CSR_AAPCS_SplitPush_R11_SaveList; + return CSR_AAPCS_SaveList; } diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td index d14424c2decac3..27f175a7003366 100644 --- a/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/llvm/lib/Target/ARM/ARMCallingConv.td @@ -301,14 +301,17 @@ def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush, def CSR_ATPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush, R10)>; -// When enforcing an AAPCS compliant frame chain, R11 is used as the frame -// pointer even for Thumb targets, where split pushes are necessary. -// This AAPCS alternative makes sure the frame index slots match the push -// order in that case. -def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R11, - R7, R6, R5, R4, - R10, R9, R8, - (sequence "D%u", 15, 8))>; +// Sometimes we need to split the push of the callee-saved GPRs into two +// regions, to ensure that the frame chain record is set up correctly. These +// list the callee-saved registers in the order they end up on the stack, which +// depends on whether the frame pointer is r7 or r11. +def CSR_AAPCS_SplitPush_R11 : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4, + LR, R11, + (sequence "D%u", 15, 8))>; +def CSR_AAPCS_SplitPush_R7 : CalleeSavedRegs<(add LR, R11, + R7, R6, R5, R4, + R10, R9, R8, + (sequence "D%u", 15, 8))>; // Constructors and destructors return 'this' in the ARM C++ ABI; since 'this' // and the pointer return value are both passed in R0 in these cases, this can diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index e0703457aa8139..4f366dcffcd29a 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -199,6 +199,11 @@ SpillArea getSpillArea(Register Reg, // push {r0-r10, r12} GPRCS1 // vpush {r8-d15} DPRCS1 // push {r11, lr} GPRCS2 + // + // SplitR11AAPCSSignRA: + // push {r0-r10, r12} GPRSC1 + // push {r11, lr} GPRCS2 + // vpush {r8-d15} DPRCS1 // If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at // the top of the stack frame. @@ -246,7 +251,8 @@ SpillArea getSpillArea(Register Reg, return SpillArea::GPRCS1; case ARM::LR: - if (Variation == ARMSubtarget::SplitR11WindowsSEH) + if (Variation == ARMSubtarget::SplitR11WindowsSEH || + Variation == ARMSubtarget::SplitR11AAPCSSignRA) return SpillArea::GPRCS2; else return SpillArea::GPRCS1; @@ -863,6 +869,9 @@ static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI, // This is a conservative estimation: Assume the frame pointer being r7 and // pc("r15") up to r8 getting spilled before (= 8 registers). int MaxRegBytes = 8 * 4; + if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA) + // Here, r11 can be stored below all of r4-r15. + MaxRegBytes = 11 * 4; if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) { // Here, r11 can be stored below all of r4-r15 plus d8-d15. MaxRegBytes = 11 * 4 + 8 * 8; @@ -935,17 +944,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } // Determine spill area sizes, and some important frame indices. + SpillArea FramePtrSpillArea = SpillArea::GPRCS1; + bool BeforeFPPush = true; for (const CalleeSavedInfo &I : CSI) { Register Reg = I.getReg(); int FI = I.getFrameIdx(); - if (Reg == FramePtr) + SpillArea Area = getSpillArea(Reg, PushPopSplit, + AFI->getNumAlignedDPRCS2Regs(), RegInfo); + + if (Reg == FramePtr) { FramePtrSpillFI = FI; + FramePtrSpillArea = Area; + } if (Reg == ARM::D8) D8SpillFI = FI; - switch (getSpillArea(Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs(), - RegInfo)) { + switch (Area) { case SpillArea::FPCXT: FPCXTSaveSize += 4; break; @@ -972,7 +987,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // Move past FPCXT area. if (FPCXTSaveSize > 0) { LastPush = MBBI++; - DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true); + DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, BeforeFPPush); } // Allocate the vararg register save area. @@ -980,13 +995,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, MachineInstr::FrameSetup); LastPush = std::prev(MBBI); - DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true); + DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, BeforeFPPush); } // Move past area 1. if (GPRCS1Size > 0) { GPRCS1Push = LastPush = MBBI++; - DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true); + DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, BeforeFPPush); + if (FramePtrSpillArea == SpillArea::GPRCS1) + BeforeFPPush = false; } // Determine starting offsets of spill areas. These offsets are all positive @@ -1010,7 +1027,6 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } else { DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; } - int FramePtrOffsetInPush = 0; if (HasFP) { // Offset from the CFA to the saved frame pointer, will be negative. [[maybe_unused]] int FPOffset = MFI.getObjectOffset(FramePtrSpillFI); @@ -1018,13 +1034,6 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, << ", FPOffset: " << FPOffset << "\n"); assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset && "Max FP estimation is wrong"); - // Offset from the top of the GPRCS1 area to the saved frame pointer, will - // be negative. - FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize; - LLVM_DEBUG(dbgs() << "FramePtrOffsetInPush=" << FramePtrOffsetInPush - << ", FramePtrSpillOffset=" - << (MFI.getObjectOffset(FramePtrSpillFI) + NumBytes) - << "\n"); AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + NumBytes); } @@ -1036,7 +1045,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // after DPRCS1. if (GPRCS2Size > 0 && PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) { GPRCS2Push = LastPush = MBBI++; - DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); + DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush); + if (FramePtrSpillArea == SpillArea::GPRCS2) + BeforeFPPush = false; } // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our @@ -1049,7 +1060,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, else { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize, MachineInstr::FrameSetup); - DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize); + DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize, BeforeFPPush); } } @@ -1058,7 +1069,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) { - DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI)); + DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI), + BeforeFPPush); LastPush = MBBI++; } } @@ -1077,7 +1089,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // Move GPRCS2, if using using SplitR11WindowsSEH. if (GPRCS2Size > 0 && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) { GPRCS2Push = LastPush = MBBI++; - DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); + DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush); + if (FramePtrSpillArea == SpillArea::GPRCS2) + BeforeFPPush = false; } bool NeedsWinCFIStackAlloc = NeedsWinCFI; @@ -1178,28 +1192,51 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // into spill area 1, including the FP in R11. In either case, it // is in area one and the adjustment needs to take place just after // that push. - // FIXME: The above is not necessary true when PACBTI is enabled. - // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes, - // so FP ends up on area two. MachineBasicBlock::iterator AfterPush; if (HasFP) { - AfterPush = std::next(GPRCS1Push); - unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push); - int FPOffset = PushSize + FramePtrOffsetInPush; - if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) { - AfterPush = std::next(GPRCS2Push); - emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, - FramePtr, ARM::SP, 0, MachineInstr::FrameSetup); - } else { - emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, - FramePtr, ARM::SP, FPOffset, - MachineInstr::FrameSetup); + MachineBasicBlock::iterator FPPushInst; + // Offset from SP immediately after the push which saved the FP to the FP + // save slot. + int64_t FPOffsetAfterPush; + switch (FramePtrSpillArea) { + case SpillArea::GPRCS1: + FPPushInst = GPRCS1Push; + FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) + + ArgRegsSaveSize + FPCXTSaveSize + + sizeOfSPAdjustment(*FPPushInst); + LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset " + << FPOffsetAfterPush << " after that push\n"); + break; + case SpillArea::GPRCS2: + FPPushInst = GPRCS2Push; + FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) + + ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size + + sizeOfSPAdjustment(*FPPushInst); + if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) + FPOffsetAfterPush += DPRCSSize + DPRGapSize; + LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset " + << FPOffsetAfterPush << " after that push\n"); + break; + default: + llvm_unreachable("frame pointer in unknown spill area"); + break; } + AfterPush = std::next(FPPushInst); + if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) + assert(FPOffsetAfterPush == 0); + + // Emit the MOV or ADD to set up the frame pointer register. + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, + FramePtr, ARM::SP, FPOffsetAfterPush, + MachineInstr::FrameSetup); + if (!NeedsWinCFI) { - if (FramePtrOffsetInPush + PushSize != 0) { + // Emit DWARF info to find the CFA using the frame pointer from this + // point onward. + if (FPOffsetAfterPush != 0) { unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( nullptr, MRI->getDwarfRegNum(FramePtr, true), - FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush)); + -MFI.getObjectOffset(FramePtrSpillFI))); BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); @@ -1712,7 +1749,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 && STI.hasV5TOps() && MBB.succ_empty() && !hasPAC && - PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) { + (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH && + PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) { Reg = ARM::PC; // Fold the return instruction into the LDM. DeleteRet = true; @@ -2945,18 +2983,29 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots( const auto &AFI = *MF.getInfo(); if (AFI.shouldSignReturnAddress()) { // The order of register must match the order we push them, because the - // PEI assigns frame indices in that order. When compiling for return - // address sign and authenication, we use split push, therefore the orders - // we want are: - // LR, R7, R6, R5, R4, , R11, R10, R9, R8, D15-D8 - CSI.insert(find_if(CSI, - [=](const auto &CS) { - Register Reg = CS.getReg(); - return Reg == ARM::R10 || Reg == ARM::R11 || - Reg == ARM::R8 || Reg == ARM::R9 || - ARM::DPRRegClass.contains(Reg); - }), - CalleeSavedInfo(ARM::R12)); + // PEI assigns frame indices in that order. That order depends on the + // PushPopSplitVariation, there are only two cases which we use with return + // address signing: + switch (STI.getPushPopSplitVariation(MF)) { + case ARMSubtarget::SplitR7: + // LR, R7, R6, R5, R4, , R11, R10, R9, R8, D15-D8 + CSI.insert(find_if(CSI, + [=](const auto &CS) { + Register Reg = CS.getReg(); + return Reg == ARM::R10 || Reg == ARM::R11 || + Reg == ARM::R8 || Reg == ARM::R9 || + ARM::DPRRegClass.contains(Reg); + }), + CalleeSavedInfo(ARM::R12)); + break; + case ARMSubtarget::SplitR11AAPCSSignRA: + // With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR + // on the stack. + CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12)); + break; + default: + llvm_unreachable("Unexpected CSR split with return address signing"); + } } return false; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index c4a782bc40910a..9adfb1fab5f084 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -514,5 +514,12 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const { F.needsUnwindTableEntry() && (MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF))) return SplitR11WindowsSEH; + + // Returns R11SplitAAPCSBranchSigning if R11 and lr are not adjacent to each + // other in the list of callee saved registers in a frame, and branch + // signing is enabled. + if (MF.getInfo()->shouldSignReturnAddress() && + getFramePointerReg() == ARM::R11) + return SplitR11AAPCSSignRA; return NoSplit; } diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 7917ddc17bdb81..214c5f1b45e556 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -105,6 +105,18 @@ class ARMSubtarget : public ARMGenSubtargetInfo { /// vpush {d8-d15} /// push {r11, lr} SplitR11WindowsSEH, + + /// When generating AAPCS-compilant frame chains, R11 is the frame pointer, + /// and must be pushed adjacent to the return address (LR). Normally this + /// isn't a problem, because the only register between them is r12, which is + /// the intra-procedure-call scratch register, so doesn't need to be saved. + /// However, when PACBTI is in use, r12 contains the authentication code, so + /// does need to be saved. This means that we need a separate push for R11 + /// and LR. + /// push {r0-r10, r12} + /// push {r11, lr} + /// vpush {d8-d15} + SplitR11AAPCSSignRA, }; protected: diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll new file mode 100644 index 00000000000000..8bcf87130c5400 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s + +; int test1() { +; return 0; +; } +define i32 @test1() "sign-return-address"="non-leaf" { +; CHECK-LABEL: test1: +; CHECK: .cfi_sections .debug_frame +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: .cfi_offset ra_auth_code, -4 +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push.w {r11, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset lr, -8 +; CHECK-NEXT: .cfi_offset r11, -12 +; CHECK-NEXT: .setfp r11, sp +; CHECK-NEXT: mov r11, sp +; CHECK-NEXT: .cfi_def_cfa_register r11 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: pop.w {r11, lr} +; CHECK-NEXT: ldr r12, [sp], #4 +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr +entry: + ret i32 0 +} + +; void foo(int n) { +; int a[n]; +; bar(a); +; } +define dso_local void @test2(i32 noundef %n) "sign-return-address"="non-leaf" { +; CHECK-LABEL: test2: +; CHECK: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r7, ra_auth_code} +; CHECK-NEXT: push.w {r4, r7, r12} +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset ra_auth_code, -4 +; CHECK-NEXT: .cfi_offset r7, -8 +; CHECK-NEXT: .cfi_offset r4, -12 +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push.w {r11, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: .cfi_offset lr, -16 +; CHECK-NEXT: .cfi_offset r11, -20 +; CHECK-NEXT: .setfp r11, sp +; CHECK-NEXT: mov r11, sp +; CHECK-NEXT: .cfi_def_cfa_register r11 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: movs r1, #7 +; CHECK-NEXT: add.w r0, r1, r0, lsl #2 +; CHECK-NEXT: bic r0, r0, #7 +; CHECK-NEXT: sub.w r0, sp, r0 +; CHECK-NEXT: mov sp, r0 +; CHECK-NEXT: bl take_ptr +; CHECK-NEXT: mov sp, r11 +; CHECK-NEXT: pop.w {r11, lr} +; CHECK-NEXT: pop.w {r4, r7, r12} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr +entry: + %vla = alloca i32, i32 %n, align 4 + call void @take_ptr(ptr noundef nonnull %vla) + ret void +} + +; void test3(int c, float e, int z) { +; if (c) +; knr(); +; take_ptr(alloca(z)); +; if (e) +; knr(); +; } +define void @test3(i32 noundef %c, float noundef %e, i32 noundef %z) "sign-return-address"="non-leaf" { +; CHECK-LABEL: test3: +; CHECK: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r5, r6, r7, ra_auth_code} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r12} +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: .cfi_offset ra_auth_code, -4 +; CHECK-NEXT: .cfi_offset r7, -8 +; CHECK-NEXT: .cfi_offset r6, -12 +; CHECK-NEXT: .cfi_offset r5, -16 +; CHECK-NEXT: .cfi_offset r4, -20 +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push.w {r11, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 28 +; CHECK-NEXT: .cfi_offset lr, -24 +; CHECK-NEXT: .cfi_offset r11, -28 +; CHECK-NEXT: .setfp r11, sp +; CHECK-NEXT: mov r11, sp +; CHECK-NEXT: .cfi_def_cfa_register r11 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r5, r2 +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: it ne +; CHECK-NEXT: blne knr +; CHECK-NEXT: adds r0, r5, #7 +; CHECK-NEXT: bic r0, r0, #7 +; CHECK-NEXT: sub.w r0, sp, r0 +; CHECK-NEXT: mov sp, r0 +; CHECK-NEXT: bl take_ptr +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: bl __aeabi_fcmpeq +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it eq +; CHECK-NEXT: bleq knr +; CHECK-NEXT: mov sp, r11 +; CHECK-NEXT: pop.w {r11, lr} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r12} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr +entry: + %tobool.not = icmp eq i32 %c, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + tail call void @knr() + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = alloca i8, i32 %z, align 8 + call void @take_ptr(ptr noundef nonnull %0) + %tobool1 = fcmp une float %e, 0.000000e+00 + br i1 %tobool1, label %if.then2, label %if.end3 + +if.then2: ; preds = %if.end + call void @knr() + br label %if.end3 + +if.end3: ; preds = %if.then2, %if.end + ret void +} + +declare void @knr(...) +declare void @take_ptr(ptr noundef)