diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 6a02c6d53888..34402f7e1712 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -179,6 +179,14 @@ def form_truncstore : GICombineRule< (apply [{ applyFormTruncstore(*${root}, MRI, B, Observer, ${matchinfo}); }]) >; +def form_auth_load_matchdata : GIDefMatchData<"AuthLoadMatchInfo">; +def form_auth_load : GICombineRule< + (defs root:$root, form_auth_load_matchdata:$matchinfo), + (match (wip_match_opcode G_LOAD):$root, + [{ return matchFormAuthLoad(*${root}, MRI, Helper, ${matchinfo}); }]), + (apply [{ applyFormAuthLoad(*${root}, MRI, B, Helper, Observer, ${matchinfo}); }]) +>; + def fold_merge_to_zext : GICombineRule< (defs root:$d), (match (wip_match_opcode G_MERGE_VALUES):$d, @@ -231,6 +239,7 @@ def AArch64PostLegalizerLowering [shuffle_vector_lowering, vashr_vlshr_imm, icmp_lowering, build_vector_lowering, lower_vector_fcmp, form_truncstore, + form_auth_load, vector_sext_inreg_to_shift, unmerge_ext_to_unmerge]> { } diff --git a/llvm/lib/Target/AArch64/AArch64ExpandHardenedPseudos.cpp b/llvm/lib/Target/AArch64/AArch64ExpandHardenedPseudos.cpp index db9c8d174450..8608dbfd9de8 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandHardenedPseudos.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandHardenedPseudos.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "AArch64ExpandImm.h" #include "AArch64InstrInfo.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64Subtarget.h" @@ -54,6 +55,7 @@ class AArch64ExpandHardenedPseudos : public MachineFunctionPass { private: bool expandPtrAuthPseudo(MachineInstr &MI); + bool expandAuthLoad(MachineInstr &MI); bool expandMI(MachineInstr &MI); }; @@ -306,6 +308,125 @@ bool AArch64ExpandHardenedPseudos::expandPtrAuthPseudo(MachineInstr &MI) { return true; } +bool AArch64ExpandHardenedPseudos::expandAuthLoad(MachineInstr &MI) { + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + DebugLoc DL = MI.getDebugLoc(); + auto MBBI = MI.getIterator(); + + const AArch64Subtarget &STI = MF.getSubtarget(); + const AArch64InstrInfo *TII = STI.getInstrInfo(); + + LLVM_DEBUG(dbgs() << "Expanding: " << MI << "\n"); + + bool IsPre = MI.getOpcode() == AArch64::LDRApre; + + MachineOperand DstOp = MI.getOperand(0); + int64_t Offset = MI.getOperand(1).getImm(); + auto Key = (AArch64PACKey::ID)MI.getOperand(2).getImm(); + uint64_t Disc = MI.getOperand(3).getImm(); + unsigned AddrDisc = MI.getOperand(4).getReg(); + + unsigned DiscReg = AddrDisc; + if (Disc) { + assert(isUInt<16>(Disc) && "Integer discriminator is too wide"); + + if (AddrDisc != AArch64::XZR) { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17) + .addReg(AArch64::XZR) + .addReg(AddrDisc) + .addImm(0); + BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X17) + .addReg(AArch64::X17) + .addImm(Disc) + .addImm(/*shift=*/48); + } else { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X17) + .addImm(Disc) + .addImm(/*shift=*/0); + } + DiscReg = AArch64::X17; + } + + unsigned AUTOpc = getAUTOpcodeForKey(Key, DiscReg == AArch64::XZR); + auto MIB = BuildMI(MBB, MBBI, DL, TII->get(AUTOpc), AArch64::X16) + .addReg(AArch64::X16); + if (DiscReg != AArch64::XZR) + MIB.addReg(DiscReg); + + // We have a few options for offset folding: + // - 0 offset: LDRXui + // - no wb, uimm12s8 offset: LDRXui + // - no wb, simm9 offset: LDURXi + // - wb, simm9 offset: LDRXpre + // - no wb, any offset: expanded MOVImm + LDRXroX + // - wb, any offset: expanded MOVImm + ADD + LDRXui + if (!Offset || (!IsPre && isShiftedUInt<12, 3>(Offset))) { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui)) + .add(DstOp) + .addUse(AArch64::X16) + .addImm(Offset / 8); + } else if (!IsPre && Offset && isInt<9>(Offset)) { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDURXi)) + .add(DstOp) + .addUse(AArch64::X16) + .addImm(Offset); + } else if (IsPre && Offset && isInt<9>(Offset)) { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXpre), AArch64::X16) + .add(DstOp) + .addUse(AArch64::X16) + .addImm(Offset); + } else { + SmallVector ImmInsns; + AArch64_IMM::expandMOVImm(Offset, 64, ImmInsns); + + // X17 is dead at this point, use it as the offset register + for (auto &ImmI : ImmInsns) { + switch (ImmI.Opcode) { + default: llvm_unreachable("invalid ldra imm expansion opc!"); break; + + case AArch64::ORRXri: + BuildMI(MBB, MBBI, DL, TII->get(ImmI.Opcode), AArch64::X17) + .addReg(AArch64::XZR) + .addImm(ImmI.Op2); + break; + case AArch64::MOVNXi: + case AArch64::MOVZXi: { + BuildMI(MBB, MBBI, DL, TII->get(ImmI.Opcode), AArch64::X17) + .addImm(ImmI.Op1) + .addImm(ImmI.Op2); + } break; + case AArch64::MOVKXi: { + BuildMI(MBB, MBBI, DL, TII->get(ImmI.Opcode), AArch64::X17) + .addReg(AArch64::X17) + .addImm(ImmI.Op1) + .addImm(ImmI.Op2); + } break; + } + } + + if (IsPre) { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrs), AArch64::X16) + .addReg(AArch64::X16) + .addReg(AArch64::X17) + .addImm(0); + BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui)) + .add(DstOp) + .addUse(AArch64::X16) + .addImm(/*Offset=*/0); + } else { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXroX)) + .add(DstOp) + .addReg(AArch64::X16) + .addReg(AArch64::X17) + .addImm(0) + .addImm(0); + } + } + + return true; +} + bool AArch64ExpandHardenedPseudos::expandMI(MachineInstr &MI) { switch (MI.getOpcode()) { case AArch64::BR_JumpTable: @@ -313,6 +434,9 @@ bool AArch64ExpandHardenedPseudos::expandMI(MachineInstr &MI) { case AArch64::LOADgotPAC: case AArch64::MOVaddrPAC: return expandPtrAuthPseudo(MI); + case AArch64::LDRA: + case AArch64::LDRApre: + return expandAuthLoad(MI); default: return false; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 722e43651d7e..9a9426e9245d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -358,6 +358,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { bool tryIndexedLoad(SDNode *N); + bool tryAuthLoad(SDNode *N); + void SelectPtrauthAuth(SDNode *N); void SelectPtrauthResign(SDNode *N); @@ -1640,6 +1642,163 @@ bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { return true; } +bool AArch64DAGToDAGISel::tryAuthLoad(SDNode *N) { + LoadSDNode *LD = cast(N); + EVT VT = LD->getMemoryVT(); + if (VT != MVT::i64) + return false; + + assert(LD->getExtensionType() == ISD::NON_EXTLOAD && "invalid 64bit extload"); + + ISD::MemIndexedMode AM = LD->getAddressingMode(); + if (AM != ISD::PRE_INC && AM != ISD::UNINDEXED) + return false; + bool IsPre = AM == ISD::PRE_INC; + + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + SDValue Base = Ptr; + + int64_t OffsetVal = 0; + if (IsPre) { + OffsetVal = cast(LD->getOffset())->getSExtValue(); + } else if (CurDAG->isBaseWithConstantOffset(Base)) { + // We support both 'base' and 'base + constant offset' modes. + ConstantSDNode *RHS = dyn_cast(Base.getOperand(1)); + if (!RHS) + return false; + OffsetVal = RHS->getSExtValue(); + Base = Base.getOperand(0); + } + + // The base must be of the form: + // (int_ptrauth_auth , , ) + // with disc being either a constant int, or: + // (int_ptrauth_blend , ) + if (Base.getOpcode() != ISD::INTRINSIC_WO_CHAIN) + return false; + + unsigned IntID = cast(Base.getOperand(0))->getZExtValue(); + if (IntID != Intrinsic::ptrauth_auth) + return false; + + unsigned KeyC = cast(Base.getOperand(2))->getZExtValue(); + bool IsDKey = KeyC == AArch64PACKey::DA || KeyC == AArch64PACKey::DB; + SDValue Disc = Base.getOperand(3); + + Base = Base.getOperand(1); + + bool ZeroDisc = isNullConstant(Disc); + SDValue IntDisc, AddrDisc; + std::tie(IntDisc, AddrDisc) = + extractPtrauthBlendDiscriminators(Disc, CurDAG); + + // If this is an indexed pre-inc load, we obviously need the writeback form. + bool needsWriteback = IsPre; + // If not, but the base authenticated pointer has any other use, it's + // beneficial to use the writeback form, to "writeback" the auth, even if + // there is no base+offset addition. + if (!Ptr.hasOneUse()) { + needsWriteback = true; + + // However, we can only do that if we don't introduce cycles between the + // load node and any other user of the pointer computation nodes. That can + // happen if the load node uses any of said other users. + // In other words: we can only do this transformation if none of the other + // uses of the pointer computation to be folded are predecessors of the load + // we're folding into. + // + // Visited is a cache containing nodes that are known predecessors of N. + // Worklist is the set of nodes we're looking for predecessors of. + // For the first lookup, that only contains the load node N. Each call to + // hasPredecessorHelper adds any of the potential predecessors of N to the + // Worklist. + SmallPtrSet Visited; + SmallVector Worklist; + Worklist.push_back(N); + for (SDNode *U : Ptr.getNode()->uses()) + if (SDNode::hasPredecessorHelper(U, Visited, Worklist, /*Max=*/32, + /*TopologicalPrune=*/true)) + return false; + } + + // We have 2 main isel alternatives: + // - LDRAA/LDRAB, writeback or indexed. Zero disc, small offsets, D key. + // - LDRA/LDRApre. Pointer needs to be in X16. + SDLoc DL(N); + MachineSDNode *Res = nullptr; + SDValue Writeback, ResVal, OutChain; + + // If the discriminator is zero and the offset fits, we can use LDRAA/LDRAB. + // Do that here to avoid needlessly constraining regalloc into using X16. + if (ZeroDisc && isShiftedInt<10, 3>(OffsetVal) && IsDKey) { + unsigned Opc = 0; + switch (KeyC) { + case AArch64PACKey::DA: + Opc = needsWriteback ? AArch64::LDRAAwriteback : AArch64::LDRAAindexed; + break; + case AArch64PACKey::DB: + Opc = needsWriteback ? AArch64::LDRABwriteback : AArch64::LDRABindexed; + break; + default: + llvm_unreachable("Invalid key for LDRAA/LDRAB"); + } + // The offset is encoded as scaled, for an element size of 8 bytes. + SDValue Offset = CurDAG->getTargetConstant(OffsetVal / 8, DL, MVT::i64); + SDValue Ops[] = {Base, Offset, Chain}; + Res = needsWriteback ? + CurDAG->getMachineNode(Opc, DL, MVT::i64, MVT::i64, MVT::Other, Ops) : + CurDAG->getMachineNode(Opc, DL, MVT::i64, MVT::Other, Ops); + if (needsWriteback) { + Writeback = SDValue(Res, 0); + ResVal = SDValue(Res, 1); + OutChain = SDValue(Res, 2); + } else { + ResVal = SDValue(Res, 0); + OutChain = SDValue(Res, 1); + } + } else { + // Otherwise, use the generalized LDRA pseudos. + unsigned Opc = needsWriteback ? AArch64::LDRApre : AArch64::LDRA; + + SDValue X16Copy = CurDAG->getCopyToReg(Chain, DL, AArch64::X16, + Base, SDValue()); + SDValue Offset = CurDAG->getTargetConstant(OffsetVal, DL, MVT::i64); + SDValue Key = CurDAG->getTargetConstant(KeyC, DL, MVT::i32); + SDValue Ops[] = {Offset, Key, IntDisc, AddrDisc, X16Copy.getValue(1)}; + Res = CurDAG->getMachineNode(Opc, DL, MVT::i64, MVT::Other, MVT::Glue, Ops); + if (needsWriteback) + Writeback = CurDAG->getCopyFromReg(SDValue(Res, 1), DL, AArch64::X16, + MVT::i64, SDValue(Res, 2)); + ResVal = SDValue(Res, 0); + OutChain = SDValue(Res, 1); + } + + if (IsPre) { + // If the original load was pre-inc, the resulting LDRA is writeback. + assert(needsWriteback && "preinc loads can't be selected into non-wb ldra"); + ReplaceUses(SDValue(N, 1), Writeback); // writeback + ReplaceUses(SDValue(N, 0), ResVal); // loaded value + ReplaceUses(SDValue(N, 2), OutChain); // chain + } else if (needsWriteback) { + // If the original load was unindexed, but we emitted a writeback form, + // we need to replace the uses of the original auth(signedbase)[+offset] + // computation. + ReplaceUses(Ptr, Writeback); // writeback + ReplaceUses(SDValue(N, 0), ResVal); // loaded value + ReplaceUses(SDValue(N, 1), OutChain); // chain + } else { + // Otherwise, we selected a simple load to a simple non-wb ldra. + assert(Ptr.hasOneUse() && "reused auth ptr should be folded into ldra"); + ReplaceUses(SDValue(N, 0), ResVal); // loaded value + ReplaceUses(SDValue(N, 1), OutChain); // chain + } + + CurDAG->RemoveDeadNode(N); + return true; +} + void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, unsigned SubRegIdx) { SDLoc dl(N); @@ -4359,8 +4518,8 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { break; case ISD::LOAD: { - // Try to select as an indexed load. Fall through to normal processing - // if we can't. + if (tryAuthLoad(Node)) + return; if (tryIndexedLoad(Node)) return; break; diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 0f3ef2327769..83e851a47ab8 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -25,6 +25,23 @@ def G_ADD_LOW : AArch64GenericInstruction { let hasSideEffects = 0; } +// Represents an auth-load instruction. Produced post-legalization from +// G_LOADs of ptrauth_auth intrinsics, with variants for keys/discriminators. +def G_LDRA : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$addr, i64imm:$offset, i32imm:$key, i64imm:$disc, type0:$addrdisc); + let hasSideEffects = 0; + let mayLoad = 1; +} + +// Represents a pre-inc writeback auth-load instruction. Similar to G_LDRA. +def G_LDRApre : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst, ptype1:$newaddr); + let InOperandList = (ins ptype1:$addr, i64imm:$offset, i32imm:$key, i64imm:$disc, type0:$addrdisc); + let hasSideEffects = 0; + let mayLoad = 1; +} + // Pseudo for a rev16 instruction. Produced post-legalization from // G_SHUFFLE_VECTORs with appropriate masks. def G_REV16 : AArch64GenericInstruction { diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 5c04f054dcdf..b06622b33e02 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1626,6 +1626,42 @@ let Predicates = [HasPAuth] in { defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>; defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>; + // LDRA pseudo: generalized LDRAA/Bindexed, allowing arbitrary discriminators, + // and wider offsets. + // This directly manipulates x16/x17, which are the only registers the OS + // guarantees are safe to use for sensitive operations. + // The loaded value is in $Rt. The signed pointer is in X16. + // $Rt could be GPR64 but is GPR64noip to help out regalloc: we imp-def 2/3rds + // of the difference between the two, and the 3rd reg (LR) is often reserved. + def LDRA : Pseudo<(outs GPR64noip:$Rt), + (ins i64imm:$Offset, i32imm:$Key, i64imm:$Disc, + GPR64noip:$AddrDisc), + []>, Sched<[]> { + let isCodeGenOnly = 1; + let hasSideEffects = 1; + let mayStore = 0; + let mayLoad = 1; + let Size = 48; + let Defs = [X16,X17]; + let Uses = [X16]; + } + + // Pre-indexed + writeback variant of LDRA. + // The signed pointer is in X16, and is written back, after being + // authenticated and offset, into X16. + def LDRApre : Pseudo<(outs GPR64noip:$Rt), + (ins i64imm:$Offset, i32imm:$Key, i64imm:$Disc, + GPR64noip:$AddrDisc), + []>, Sched<[]> { + let isCodeGenOnly = 1; + let hasSideEffects = 1; + let mayStore = 0; + let mayLoad = 1; + let Size = 48; + let Defs = [X16,X17]; + let Uses = [X16]; + } + // AUT pseudo. // This directly manipulates x16/x17, which are the only registers the OS // guarantees are safe to use for sensitive operations. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 54c9e33e9a86..085763ef1847 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -213,6 +213,7 @@ class AArch64InstructionSelector : public InstructionSelector { bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI); bool selectPtrAuthGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool selectAuthLoad(MachineInstr &I, MachineRegisterInfo &MRI); bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI); bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI); bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI); @@ -2853,6 +2854,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE: return selectPtrAuthGlobalValue(I, MRI); + case AArch64::G_LDRA: + case AArch64::G_LDRApre: + return selectAuthLoad(I, MRI); + case TargetOpcode::G_ZEXTLOAD: case TargetOpcode::G_LOAD: case TargetOpcode::G_STORE: { @@ -6767,6 +6772,72 @@ bool AArch64InstructionSelector::selectPtrAuthGlobalValue( return true; } +bool AArch64InstructionSelector::selectAuthLoad( + MachineInstr &I, MachineRegisterInfo &MRI) { + bool Writeback = I.getOpcode() == AArch64::G_LDRApre; + + Register ValReg = I.getOperand(0).getReg(); + Register PtrReg = I.getOperand(1 + Writeback).getReg(); + int64_t Offset = I.getOperand(2 + Writeback).getImm(); + auto Key = static_cast(I.getOperand(3 + Writeback).getImm()); + uint64_t DiscImm = I.getOperand(4 + Writeback).getImm(); + Register AddrDisc = I.getOperand(5 + Writeback).getReg(); + + bool IsDKey = Key == AArch64PACKey::DA || Key == AArch64PACKey::DB; + bool ZeroDisc = AddrDisc == AArch64::NoRegister && !DiscImm; + + // If the discriminator is zero and the offset fits, we can use LDRAA/LDRAB. + // Do that here to avoid needlessly constraining regalloc into using X16. + if (ZeroDisc && isShiftedInt<10, 3>(Offset) && IsDKey) { + unsigned Opc = 0; + switch (Key) { + case AArch64PACKey::DA: + Opc = Writeback ? AArch64::LDRAAwriteback : AArch64::LDRAAindexed; + break; + case AArch64PACKey::DB: + Opc = Writeback ? AArch64::LDRABwriteback : AArch64::LDRABindexed; + break; + default: + llvm_unreachable("Invalid key for LDRAA/LDRAB"); + } + // The LDRAA/LDRAB offset immediate is scaled. + Offset /= 8; + if (Writeback) { + MIB.buildInstr(Opc, {I.getOperand(1).getReg(), ValReg}, {PtrReg, Offset}) + .constrainAllUses(TII, TRI, RBI); + RBI.constrainGenericRegister(I.getOperand(1).getReg(), + AArch64::GPR64spRegClass, MRI); + } else { + MIB.buildInstr(Opc, {ValReg}, {PtrReg, Offset}) + .constrainAllUses(TII, TRI, RBI); + } + I.eraseFromParent(); + return true; + } + + if (AddrDisc == AArch64::NoRegister) + AddrDisc = AArch64::XZR; + + // Otherwise, use the generalized LDRA pseudo. + MIB.buildCopy(AArch64::X16, PtrReg); + if (Writeback) { + MIB.buildInstr(AArch64::LDRApre, {ValReg}, + {Offset, uint64_t(Key), DiscImm, AddrDisc}) + .constrainAllUses(TII, TRI, RBI); + MIB.buildCopy(I.getOperand(1).getReg(), (Register)AArch64::X16); + RBI.constrainGenericRegister(I.getOperand(1).getReg(), + AArch64::GPR64RegClass, MRI); + } else { + MIB.buildInstr(AArch64::LDRA, {ValReg}, + {Offset, uint64_t(Key), DiscImm, AddrDisc}) + .constrainAllUses(TII, TRI, RBI); + } + + I.eraseFromParent(); + return true; +} + + InstructionSelector::ComplexRendererFns AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { auto MaybeImmed = getImmedFromMO(Root); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp index 9640a1c17b87..cc084ab719a8 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -1065,6 +1065,121 @@ void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper Helper(*MI.getMF(), Observer, B); Helper.lower(MI, 0, /* Unused hint type */ LLT()); } +struct AuthLoadMatchInfo { + Register Dst = 0; + Register Addr = 0; + int64_t Offset = 0; + unsigned Key = 0; + Register Disc = 0; + + bool Writeback = false; + Register NewAddr = 0; +}; + +static bool matchFormAuthLoad(MachineInstr &MI, MachineRegisterInfo &MRI, + CombinerHelper &Helper, + AuthLoadMatchInfo &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_LOAD); + if (!MI.getMF()->getSubtarget().hasPAuth()) + return false; + + MatchInfo.Dst = MI.getOperand(0).getReg(); + + LLT DstTy = MRI.getType(MatchInfo.Dst); + if (DstTy != LLT::scalar(64) && DstTy != LLT::pointer(0, 64)) + return false; + + Register LoadPtr = MI.getOperand(1).getReg(); + if (MRI.getType(LoadPtr) != LLT::pointer(0, 64)) + return false; + + // When matching the writeback variant, we may need to writeback either the + // ptr-typed (used by the G_LOAD) or int-typed (def'd by @llvm.ptrauth.auth) + // base (+offset, with auth) pointer. Try at each level, but at most once. + auto TryWriteback = [&](Register Ptr, MachineInstr &KnownUseMI) { + if (MRI.hasOneNonDBGUse(Ptr)) + return true; + if (MatchInfo.Writeback) + return false; + for (auto &UseMI : MRI.use_nodbg_instructions(Ptr)) + if (&KnownUseMI != &UseMI && !Helper.dominates(MI, UseMI)) + return false; + MatchInfo.Writeback = true; + MatchInfo.NewAddr = Ptr; + return true; + }; + + if (!TryWriteback(LoadPtr, MI)) + return false; + + // Try to match different variants of offset additions to find the base ptr. + Register BasePtr = AArch64::NoRegister; + + MachineInstr *LoadPtrDef = getDefIgnoringCopies(LoadPtr, MRI); + if (!LoadPtrDef) + return false; + + if (LoadPtrDef->getOpcode() == TargetOpcode::G_INTTOPTR) { + Register IntPtr = LoadPtrDef->getOperand(1).getReg(); + + // Check if the int-typed ptr is the one in need of writeback. + if (!TryWriteback(IntPtr, *LoadPtrDef)) + return false; + + if (!mi_match(IntPtr, MRI, m_any_of( + m_GAdd(m_Reg(BasePtr), m_ICst(MatchInfo.Offset)), + m_Reg(BasePtr)))) + return false; + + } else if (!mi_match(*LoadPtrDef, MRI, m_GPtrAdd(m_GIntToPtr(m_Reg(BasePtr)), + m_ICst(MatchInfo.Offset)))) { + return false; + } + + MachineInstr *AUT = getOpcodeDef(TargetOpcode::G_INTRINSIC, BasePtr, MRI); + if (!AUT || + cast(AUT)->getIntrinsicID() != Intrinsic::ptrauth_auth) + return false; + + Register RawPtr; + if (!mi_match(AUT->getOperand(2).getReg(), MRI, m_GPtrToInt(m_Reg(RawPtr)))) + return false; + + MatchInfo.Addr = RawPtr; + MatchInfo.Key = AUT->getOperand(3).getImm(); + MatchInfo.Disc = AUT->getOperand(4).getReg(); + return true; +} + +static bool applyFormAuthLoad(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, + CombinerHelper &Helper, + GISelChangeObserver &Observer, + AuthLoadMatchInfo &MatchInfo) { + MachineIRBuilder MIB(MI); + + Register AddrDisc; + uint16_t DiscImm; + std::tie(DiscImm, AddrDisc) = + extractPtrauthBlendDiscriminators(MatchInfo.Disc, MRI); + + if (MatchInfo.Writeback) { + MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.NewAddr); + MIB.buildInstr(AArch64::G_LDRApre, {MatchInfo.Dst, MatchInfo.NewAddr}, + {MatchInfo.Addr, MatchInfo.Offset, (uint64_t)MatchInfo.Key, + (uint64_t)DiscImm, AddrDisc}) + .addMemOperand(*MI.memoperands_begin()); + AddrDef.eraseFromParent(); + } else { + MIB.buildInstr(AArch64::G_LDRA, {MatchInfo.Dst}, + {MatchInfo.Addr, MatchInfo.Offset, (uint64_t)MatchInfo.Key, + (uint64_t)DiscImm, AddrDisc}) + .addMemOperand(*MI.memoperands_begin()); + } + + MI.eraseFromParent(); + return true; +} /// Combine , unused = unmerge(G_EXT <2*N x t> v, undef, N) /// => unused, = unmerge v diff --git a/llvm/test/CodeGen/AArch64/ptrauth-load.ll b/llvm/test/CodeGen/AArch64/ptrauth-load.ll new file mode 100644 index 000000000000..f36bfc9a9dfc --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ptrauth-load.ll @@ -0,0 +1,716 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple arm64e-apple-darwin -verify-machineinstrs -global-isel=0 | FileCheck %s +; RUN: llc < %s -mtriple arm64e-apple-darwin -verify-machineinstrs -global-isel=1 -global-isel-abort=1 | FileCheck %s + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +; Basic: no discriminator, no offset. + +define i64 @test_da(i64* %ptr) { +; CHECK-LABEL: test_da: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldraa x0, [x0] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 2, i64 0) + %tmp2 = inttoptr i64 %tmp1 to i64* + %tmp3 = load i64, i64* %tmp2 + ret i64 %tmp3 +} + +define i64 @test_db(i64* %ptr) { +; CHECK-LABEL: test_db: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldrab x0, [x0] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 3, i64 0) + %tmp2 = inttoptr i64 %tmp1 to i64* + %tmp3 = load i64, i64* %tmp2 + ret i64 %tmp3 +} + +define i64 @test_ia(i64* %ptr) { +; CHECK-LABEL: test_ia: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: autiza x16 +; CHECK-NEXT: ldr x0, [x16] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 0, i64 0) + %tmp2 = inttoptr i64 %tmp1 to i64* + %tmp3 = load i64, i64* %tmp2 + ret i64 %tmp3 +} + +define i64 @test_ib(i64* %ptr) { +; CHECK-LABEL: test_ib: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: autizb x16 +; CHECK-NEXT: ldr x0, [x16] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 1, i64 0) + %tmp2 = inttoptr i64 %tmp1 to i64* + %tmp3 = load i64, i64* %tmp2 + ret i64 %tmp3 +} + +; No discriminator, interesting offsets. + +define i64 @test_da_8(i64* %ptr) { +; CHECK-LABEL: test_da_8: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldraa x0, [x0, #8] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 2, i64 0) + %tmp2 = add i64 %tmp1, 8 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + ret i64 %tmp4 +} + +define i64 @test_db_simm9max(i64* %ptr) { +; CHECK-LABEL: test_db_simm9max: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldrab x0, [x0, #4088] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 3, i64 0) + %tmp2 = add i64 %tmp1, 4088 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + ret i64 %tmp4 +} + +define i64 @test_db_uimm12max(i64* %ptr) { +; CHECK-LABEL: test_db_uimm12max: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: autdzb x16 +; CHECK-NEXT: ldr x0, [x16, #32760] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 3, i64 0) + %tmp2 = add i64 %tmp1, 32760 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + ret i64 %tmp4 +} + +define i64 @test_da_4(i64* %ptr) { +; CHECK-LABEL: test_da_4: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: autdza x16 +; CHECK-NEXT: ldur x0, [x16, #4] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 2, i64 0) + %tmp2 = add i64 %tmp1, 4 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + ret i64 %tmp4 +} + +define i64 @test_da_largeoff_12b(i64* %ptr) { +; CHECK-LABEL: test_da_largeoff_12b: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: autdza x16 +; CHECK-NEXT: mov x17, #32768 +; CHECK-NEXT: ldr x0, [x16, x17] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 2, i64 0) + %tmp2 = add i64 %tmp1, 32768 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + ret i64 %tmp4 +} + +define i64 @test_da_largeoff_32b(i64* %ptr) { +; CHECK-LABEL: test_da_largeoff_32b: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: autdza x16 +; CHECK-NEXT: mov x17, #2 +; CHECK-NEXT: movk x17, #1, lsl #32 +; CHECK-NEXT: ldr x0, [x16, x17] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 2, i64 0) + %tmp2 = add i64 %tmp1, 4294967298 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + ret i64 %tmp4 +} + +define i64 @test_da_m8(i64* %ptr) { +; CHECK-LABEL: test_da_m8: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldraa x0, [x0, #-8] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 2, i64 0) + %tmp2 = add i64 %tmp1, -8 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + ret i64 %tmp4 +} + +define i64 @test_db_simm9min(i64* %ptr) { +; CHECK-LABEL: test_db_simm9min: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldrab x0, [x0, #-4096] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 3, i64 0) + %tmp2 = add i64 %tmp1, -4096 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + ret i64 %tmp4 +} + +define i64 @test_da_neg_largeoff_12b(i64* %ptr) { +; CHECK-LABEL: test_da_neg_largeoff_12b: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: autdza x16 +; CHECK-NEXT: mov x17, #-32768 +; CHECK-NEXT: ldr x0, [x16, x17] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 2, i64 0) + %tmp2 = add i64 %tmp1, -32768 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + ret i64 %tmp4 +} + +define i64 @test_da_neg_largeoff_32b(i64* %ptr) { +; CHECK-LABEL: test_da_neg_largeoff_32b: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: autdza x16 +; CHECK-NEXT: mov x17, #-3 +; CHECK-NEXT: movk x17, #65534, lsl #32 +; CHECK-NEXT: ldr x0, [x16, x17] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 2, i64 0) + %tmp2 = add i64 %tmp1, -4294967299 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + ret i64 %tmp4 +} + +define i64 @test_da_disc_m256(i64* %ptr, i64 %disc) { +; CHECK-LABEL: test_da_disc_m256: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: autda x16, x1 +; CHECK-NEXT: ldur x0, [x16, #-256] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 2, i64 %disc) + %tmp2 = add i64 %tmp1, -256 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + ret i64 %tmp4 +} + +; No discriminator, interesting offsets, writeback. + +define i64* @test_da_wb(i64* %ptr, i64* %dst) { +; CHECK-LABEL: test_da_wb: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldraa x8, [x0, #0]! +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 2, i64 0) + %tmp2 = inttoptr i64 %tmp1 to i64* + %tmp3 = load i64, i64* %tmp2 + store i64 %tmp3, i64* %dst + ret i64* %tmp2 +} + +define i64* @test_da_8_wb(i64* %ptr, i64* %dst) { +; CHECK-LABEL: test_da_8_wb: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldraa x8, [x0, #8]! +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 2, i64 0) + %tmp2 = add i64 %tmp1, 8 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + store i64 %tmp4, i64* %dst + ret i64* %tmp3 +} + +define i64* @test_da_simm9max_wb(i64* %ptr, i64* %dst) { +; CHECK-LABEL: test_da_simm9max_wb: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldraa x8, [x0, #4088]! +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 2, i64 0) + %tmp2 = add i64 %tmp1, 4088 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + store i64 %tmp4, i64* %dst + ret i64* %tmp3 +} + +define i64* @test_da_uimm12max_wb(i64* %ptr, i64* %dst) { +; CHECK-LABEL: test_da_uimm12max_wb: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: autdza x16 +; CHECK-NEXT: mov x17, #32760 +; CHECK-NEXT: add x16, x16, x17 +; CHECK-NEXT: ldr x8, [x16] +; CHECK-NEXT: mov x0, x16 +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 2, i64 0) + %tmp2 = add i64 %tmp1, 32760 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + store i64 %tmp4, i64* %dst + ret i64* %tmp3 +} + +define i64* @test_db_4_wb(i64* %ptr, i64* %dst) { +; CHECK-LABEL: test_db_4_wb: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: autdzb x16 +; CHECK-NEXT: ldr x8, [x16, #4]! +; CHECK-NEXT: mov x0, x16 +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 3, i64 0) + %tmp2 = add i64 %tmp1, 4 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + store i64 %tmp4, i64* %dst + ret i64* %tmp3 +} + +define i64* @test_da_largeoff_12b_wb(i64* %ptr, i64* %dst) { +; CHECK-LABEL: test_da_largeoff_12b_wb: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: autdza x16 +; CHECK-NEXT: mov x17, #32768 +; CHECK-NEXT: add x16, x16, x17 +; CHECK-NEXT: ldr x8, [x16] +; CHECK-NEXT: mov x0, x16 +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 2, i64 0) + %tmp2 = add i64 %tmp1, 32768 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + store i64 %tmp4, i64* %dst + ret i64* %tmp3 +} + +define i64* @test_db_m256_wb(i64* %ptr, i64* %dst) { +; CHECK-LABEL: test_db_m256_wb: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldrab x8, [x0, #-256]! +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 3, i64 0) + %tmp2 = add i64 %tmp1, -256 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + store i64 %tmp4, i64* %dst + ret i64* %tmp3 +} + +define i64* @test_db_simm9min_wb(i64* %ptr, i64* %dst) { +; CHECK-LABEL: test_db_simm9min_wb: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldrab x8, [x0, #-4096]! +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 3, i64 0) + %tmp2 = add i64 %tmp1, -4096 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + store i64 %tmp4, i64* %dst + ret i64* %tmp3 +} + +define i64* @test_db_neg_largeoff_12b_wb(i64* %ptr, i64* %dst) { +; CHECK-LABEL: test_db_neg_largeoff_12b_wb: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: autdzb x16 +; CHECK-NEXT: mov x17, #-32768 +; CHECK-NEXT: add x16, x16, x17 +; CHECK-NEXT: ldr x8, [x16] +; CHECK-NEXT: mov x0, x16 +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 3, i64 0) + %tmp2 = add i64 %tmp1, -32768 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + store i64 %tmp4, i64* %dst + ret i64* %tmp3 +} + +; Writeback, with a potential cycle. + +define void @test_da_wb_cycle(i64* %ptr, i64* %dst, i64* %dst2, i64* %dst3) { +; CHECK-LABEL: test_da_wb_cycle: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: autdza x16 +; CHECK-NEXT: str x16, [x2] +; CHECK-NEXT: ldr x8, [x16] +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 2, i64 0) + %tmp2 = inttoptr i64 %tmp1 to i64* + store i64 %tmp1, i64* %dst2 + %tmp3 = load i64, i64* %tmp2 + store i64 %tmp3, i64* %dst + ret void +} + +; Writeback multiple-use of the auth. + +define i64* @test_da_8_wb_use(i64* %ptr, i64* %dst, i64* %dst2) { +; CHECK-LABEL: test_da_8_wb_use: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: autdza x16 +; CHECK-NEXT: ldraa x8, [x0, #8]! +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: str x16, [x2] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 2, i64 0) + %tmp2 = add i64 %tmp1, 8 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + store i64 %tmp4, i64* %dst + store i64 %tmp1, i64* %dst2 + ret i64* %tmp3 +} + +; Writeback multiple-use of the auth, invalid offset. + +define i64* @test_da_256_wb_use(i64* %ptr, i64* %dst, i64* %dst2) { +; CHECK-LABEL: test_da_256_wb_use: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: autdza x16 +; CHECK-NEXT: ldraa x8, [x0, #256]! +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: str x16, [x2] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 2, i64 0) + %tmp2 = add i64 %tmp1, 256 + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + store i64 %tmp4, i64* %dst + store i64 %tmp1, i64* %dst2 + ret i64* %tmp3 +} + +; Integer discriminator, no offset. + +define i64 @test_da_constdisc(i64* %ptr) { +; CHECK-LABEL: test_da_constdisc: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: mov x17, #12345 +; CHECK-NEXT: autda x16, x17 +; CHECK-NEXT: ldr x0, [x16] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 2, i64 12345) + %tmp2 = inttoptr i64 %tmp1 to i64* + %tmp3 = load i64, i64* %tmp2 + ret i64 %tmp3 +} + +define i64 @test_ib_constdisc(i64* %ptr) { +; CHECK-LABEL: test_ib_constdisc: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: mov x17, #12345 +; CHECK-NEXT: autib x16, x17 +; CHECK-NEXT: ldr x0, [x16] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 1, i64 12345) + %tmp2 = inttoptr i64 %tmp1 to i64* + %tmp3 = load i64, i64* %tmp2 + ret i64 %tmp3 +} + +; "Address" (register) discriminator, no offset. + +define i64 @test_da_addrdisc(i64* %ptr, i64 %disc) { +; CHECK-LABEL: test_da_addrdisc: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: autda x16, x1 +; CHECK-NEXT: ldr x0, [x16] +; CHECK-NEXT: ret + %tmp0 = ptrtoint i64* %ptr to i64 + %tmp1 = call i64 @llvm.ptrauth.auth(i64 %tmp0, i32 2, i64 %disc) + %tmp2 = inttoptr i64 %tmp1 to i64* + %tmp3 = load i64, i64* %tmp2 + ret i64 %tmp3 +} + +; Blend discriminator, no offset. + +define i64 @test_da_blend(i64* %ptr, i64 %disc) { +; CHECK-LABEL: test_da_blend: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: mov x17, x1 +; CHECK-NEXT: movk x17, #12345, lsl #48 +; CHECK-NEXT: autda x16, x17 +; CHECK-NEXT: ldr x0, [x16] +; CHECK-NEXT: ret + %tmp0 = call i64 @llvm.ptrauth.blend(i64 %disc, i64 12345) + %tmp1 = ptrtoint i64* %ptr to i64 + %tmp2 = call i64 @llvm.ptrauth.auth(i64 %tmp1, i32 2, i64 %tmp0) + %tmp3 = inttoptr i64 %tmp2 to i64* + %tmp4 = load i64, i64* %tmp3 + ret i64 %tmp4 +} + +; Blend discriminator, interesting offsets. + +define i64 @test_da_blend_8(i64* %ptr, i64 %disc) { +; CHECK-LABEL: test_da_blend_8: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: mov x17, x1 +; CHECK-NEXT: movk x17, #12345, lsl #48 +; CHECK-NEXT: autda x16, x17 +; CHECK-NEXT: ldr x0, [x16, #8] +; CHECK-NEXT: ret + %tmp0 = call i64 @llvm.ptrauth.blend(i64 %disc, i64 12345) + %tmp1 = ptrtoint i64* %ptr to i64 + %tmp2 = call i64 @llvm.ptrauth.auth(i64 %tmp1, i32 2, i64 %tmp0) + %tmp3 = add i64 %tmp2, 8 + %tmp4 = inttoptr i64 %tmp3 to i64* + %tmp5 = load i64, i64* %tmp4 + ret i64 %tmp5 +} + +define i64 @test_da_blend_uimm12max(i64* %ptr, i64 %disc) { +; CHECK-LABEL: test_da_blend_uimm12max: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: mov x17, x1 +; CHECK-NEXT: movk x17, #12345, lsl #48 +; CHECK-NEXT: autda x16, x17 +; CHECK-NEXT: ldr x0, [x16, #32760] +; CHECK-NEXT: ret + %tmp0 = call i64 @llvm.ptrauth.blend(i64 %disc, i64 12345) + %tmp1 = ptrtoint i64* %ptr to i64 + %tmp2 = call i64 @llvm.ptrauth.auth(i64 %tmp1, i32 2, i64 %tmp0) + %tmp3 = add i64 %tmp2, 32760 + %tmp4 = inttoptr i64 %tmp3 to i64* + %tmp5 = load i64, i64* %tmp4 + ret i64 %tmp5 +} + +define i64 @test_da_blend_largeoff_32b(i64* %ptr, i64 %disc) { +; CHECK-LABEL: test_da_blend_largeoff_32b: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: mov x17, x1 +; CHECK-NEXT: movk x17, #12345, lsl #48 +; CHECK-NEXT: autda x16, x17 +; CHECK-NEXT: mov x17, #2 +; CHECK-NEXT: movk x17, #1, lsl #32 +; CHECK-NEXT: ldr x0, [x16, x17] +; CHECK-NEXT: ret + %tmp0 = call i64 @llvm.ptrauth.blend(i64 %disc, i64 12345) + %tmp1 = ptrtoint i64* %ptr to i64 + %tmp2 = call i64 @llvm.ptrauth.auth(i64 %tmp1, i32 2, i64 %tmp0) + %tmp3 = add i64 %tmp2, 4294967298 + %tmp4 = inttoptr i64 %tmp3 to i64* + %tmp5 = load i64, i64* %tmp4 + ret i64 %tmp5 +} + +define i64 @test_da_blend_m4(i64* %ptr, i64 %disc) { +; CHECK-LABEL: test_da_blend_m4: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: mov x17, x1 +; CHECK-NEXT: movk x17, #12345, lsl #48 +; CHECK-NEXT: autda x16, x17 +; CHECK-NEXT: ldur x0, [x16, #-4] +; CHECK-NEXT: ret + %tmp0 = call i64 @llvm.ptrauth.blend(i64 %disc, i64 12345) + %tmp1 = ptrtoint i64* %ptr to i64 + %tmp2 = call i64 @llvm.ptrauth.auth(i64 %tmp1, i32 2, i64 %tmp0) + %tmp3 = add i64 %tmp2, -4 + %tmp4 = inttoptr i64 %tmp3 to i64* + %tmp5 = load i64, i64* %tmp4 + ret i64 %tmp5 +} + +define i64 @test_da_blend_simm9min(i64* %ptr, i64 %disc) { +; CHECK-LABEL: test_da_blend_simm9min: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: mov x17, x1 +; CHECK-NEXT: movk x17, #12345, lsl #48 +; CHECK-NEXT: autda x16, x17 +; CHECK-NEXT: mov x17, #-4096 +; CHECK-NEXT: ldr x0, [x16, x17] +; CHECK-NEXT: ret + %tmp0 = call i64 @llvm.ptrauth.blend(i64 %disc, i64 12345) + %tmp1 = ptrtoint i64* %ptr to i64 + %tmp2 = call i64 @llvm.ptrauth.auth(i64 %tmp1, i32 2, i64 %tmp0) + %tmp3 = add i64 %tmp2, -4096 + %tmp4 = inttoptr i64 %tmp3 to i64* + %tmp5 = load i64, i64* %tmp4 + ret i64 %tmp5 +} + +define i64 @test_da_blend_neg_largeoff_32b(i64* %ptr, i64 %disc) { +; CHECK-LABEL: test_da_blend_neg_largeoff_32b: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: mov x17, x1 +; CHECK-NEXT: movk x17, #12345, lsl #48 +; CHECK-NEXT: autda x16, x17 +; CHECK-NEXT: mov x17, #-3 +; CHECK-NEXT: movk x17, #65534, lsl #32 +; CHECK-NEXT: ldr x0, [x16, x17] +; CHECK-NEXT: ret + %tmp0 = call i64 @llvm.ptrauth.blend(i64 %disc, i64 12345) + %tmp1 = ptrtoint i64* %ptr to i64 + %tmp2 = call i64 @llvm.ptrauth.auth(i64 %tmp1, i32 2, i64 %tmp0) + %tmp3 = add i64 %tmp2, -4294967299 + %tmp4 = inttoptr i64 %tmp3 to i64* + %tmp5 = load i64, i64* %tmp4 + ret i64 %tmp5 +} + +; Blend discriminator, interesting offsets, writeback. + +define i64 @test_da_blend_8_wb(i64* %ptr, i64 %disc, i64* %dst) { +; CHECK-LABEL: test_da_blend_8_wb: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: mov x17, x1 +; CHECK-NEXT: movk x17, #12345, lsl #48 +; CHECK-NEXT: autda x16, x17 +; CHECK-NEXT: ldr x8, [x16, #8]! +; CHECK-NEXT: mov x0, x16 +; CHECK-NEXT: str x8, [x2] +; CHECK-NEXT: ret + %tmp0 = call i64 @llvm.ptrauth.blend(i64 %disc, i64 12345) + %tmp1 = ptrtoint i64* %ptr to i64 + %tmp2 = call i64 @llvm.ptrauth.auth(i64 %tmp1, i32 2, i64 %tmp0) + %tmp3 = add i64 %tmp2, 8 + %tmp4 = inttoptr i64 %tmp3 to i64* + %tmp5 = load i64, i64* %tmp4 + store i64 %tmp5, i64* %dst + ret i64 %tmp3 +} + +define i64 @test_da_blend_simm9umax_wb(i64* %ptr, i64 %disc, i64* %dst) { +; CHECK-LABEL: test_da_blend_simm9umax_wb: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: mov x17, x1 +; CHECK-NEXT: movk x17, #12345, lsl #48 +; CHECK-NEXT: autda x16, x17 +; CHECK-NEXT: ldr x8, [x16, #248]! +; CHECK-NEXT: mov x0, x16 +; CHECK-NEXT: str x8, [x2] +; CHECK-NEXT: ret + %tmp0 = call i64 @llvm.ptrauth.blend(i64 %disc, i64 12345) + %tmp1 = ptrtoint i64* %ptr to i64 + %tmp2 = call i64 @llvm.ptrauth.auth(i64 %tmp1, i32 2, i64 %tmp0) + %tmp3 = add i64 %tmp2, 248 + %tmp4 = inttoptr i64 %tmp3 to i64* + %tmp5 = load i64, i64* %tmp4 + store i64 %tmp5, i64* %dst + ret i64 %tmp3 +} + +define i64 @test_da_blend_simm9s8max_wb(i64* %ptr, i64 %disc, i64* %dst) { +; CHECK-LABEL: test_da_blend_simm9s8max_wb: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: mov x17, x1 +; CHECK-NEXT: movk x17, #12345, lsl #48 +; CHECK-NEXT: autda x16, x17 +; CHECK-NEXT: mov x17, #4088 +; CHECK-NEXT: add x16, x16, x17 +; CHECK-NEXT: ldr x8, [x16] +; CHECK-NEXT: mov x0, x16 +; CHECK-NEXT: str x8, [x2] +; CHECK-NEXT: ret + %tmp0 = call i64 @llvm.ptrauth.blend(i64 %disc, i64 12345) + %tmp1 = ptrtoint i64* %ptr to i64 + %tmp2 = call i64 @llvm.ptrauth.auth(i64 %tmp1, i32 2, i64 %tmp0) + %tmp3 = add i64 %tmp2, 4088 + %tmp4 = inttoptr i64 %tmp3 to i64* + %tmp5 = load i64, i64* %tmp4 + store i64 %tmp5, i64* %dst + ret i64 %tmp3 +} + +define i64 @test_da_blend_neg_largeoff_32b_wb(i64* %ptr, i64 %disc, i64* %dst) { +; CHECK-LABEL: test_da_blend_neg_largeoff_32b_wb: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: mov x17, x1 +; CHECK-NEXT: movk x17, #12345, lsl #48 +; CHECK-NEXT: autda x16, x17 +; CHECK-NEXT: mov x17, #-3 +; CHECK-NEXT: movk x17, #65534, lsl #32 +; CHECK-NEXT: add x16, x16, x17 +; CHECK-NEXT: ldr x8, [x16] +; CHECK-NEXT: mov x0, x16 +; CHECK-NEXT: str x8, [x2] +; CHECK-NEXT: ret + %tmp0 = call i64 @llvm.ptrauth.blend(i64 %disc, i64 12345) + %tmp1 = ptrtoint i64* %ptr to i64 + %tmp2 = call i64 @llvm.ptrauth.auth(i64 %tmp1, i32 2, i64 %tmp0) + %tmp3 = add i64 %tmp2, -4294967299 + %tmp4 = inttoptr i64 %tmp3 to i64* + %tmp5 = load i64, i64* %tmp4 + store i64 %tmp5, i64* %dst + ret i64 %tmp3 +} + +declare i64 @llvm.ptrauth.auth(i64, i32, i64) +declare i64 @llvm.ptrauth.blend(i64, i64)