From 60f9048ea461030d7d83339185166524d97bd01b Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Seo Date: Mon, 15 Jul 2024 18:00:37 +0000 Subject: [PATCH] [AArch64] Implement INIT/ADJUST_TRAMPOLINE Add support for llvm.init.trampoline and llvm.adjust.trampoline intrinsics for AArch64. Fixes #65573 Fixes #76927 Fixes #83555 Updates #66157 --- compiler-rt/lib/builtins/README.txt | 5 ++ compiler-rt/lib/builtins/trampoline_setup.c | 42 ++++++++++++++ .../builtins/Unit/trampoline_setup_test.c | 2 +- .../Target/AArch64/AArch64ISelLowering.cpp | 58 +++++++++++++++++++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + llvm/test/CodeGen/AArch64/trampoline.ll | 19 ++++++ 6 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/trampoline.ll diff --git a/compiler-rt/lib/builtins/README.txt b/compiler-rt/lib/builtins/README.txt index 2d213d95f333af..19f26c92a0f94f 100644 --- a/compiler-rt/lib/builtins/README.txt +++ b/compiler-rt/lib/builtins/README.txt @@ -272,6 +272,11 @@ switch32 switch8 switchu8 +// This function generates a custom trampoline function with the specific +// realFunc and localsPtr values. +void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, + const void* realFunc, void* localsPtr); + // There is no C interface to the *_vfp_d8_d15_regs functions. There are // called in the prolog and epilog of Thumb1 functions. When the C++ ABI use // SJLJ for exceptions, each function with a catch clause or destructors needs diff --git a/compiler-rt/lib/builtins/trampoline_setup.c b/compiler-rt/lib/builtins/trampoline_setup.c index 844eb279441428..830e25e4c0303a 100644 --- a/compiler-rt/lib/builtins/trampoline_setup.c +++ b/compiler-rt/lib/builtins/trampoline_setup.c @@ -41,3 +41,45 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack, __clear_cache(trampOnStack, &trampOnStack[10]); } #endif // __powerpc__ && !defined(__powerpc64__) + +// The AArch64 compiler generates calls to __trampoline_setup() when creating +// trampoline functions on the stack for use with nested functions. +// This function creates a custom 36-byte trampoline function on the stack +// which loads x18 with a pointer to the outer function's locals +// and then jumps to the target nested function. +// Note: x18 is a reserved platform register on Windows and macOS. + +#if defined(__aarch64__) && defined(__ELF__) +COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack, + int trampSizeAllocated, + const void *realFunc, void *localsPtr) { + // This should never happen, but if compiler did not allocate + // enough space on stack for the trampoline, abort. + if (trampSizeAllocated < 36) + compilerrt_abort(); + + // create trampoline + // Load realFunc into x17. mov/movk 16 bits at a time. + trampOnStack[0] = + 0xd2800000u | ((((uint64_t)realFunc >> 0) & 0xffffu) << 5) | 0x11; + trampOnStack[1] = + 0xf2a00000u | ((((uint64_t)realFunc >> 16) & 0xffffu) << 5) | 0x11; + trampOnStack[2] = + 0xf2c00000u | ((((uint64_t)realFunc >> 32) & 0xffffu) << 5) | 0x11; + trampOnStack[3] = + 0xf2e00000u | ((((uint64_t)realFunc >> 48) & 0xffffu) << 5) | 0x11; + // Load localsPtr into x18 + trampOnStack[4] = + 0xd2800000u | ((((uint64_t)localsPtr >> 0) & 0xffffu) << 5) | 0x12; + trampOnStack[5] = + 0xf2a00000u | ((((uint64_t)localsPtr >> 16) & 0xffffu) << 5) | 0x12; + trampOnStack[6] = + 0xf2c00000u | ((((uint64_t)localsPtr >> 32) & 0xffffu) << 5) | 0x12; + trampOnStack[7] = + 0xf2e00000u | ((((uint64_t)localsPtr >> 48) & 0xffffu) << 5) | 0x12; + trampOnStack[8] = 0xd61f0220; // br x17 + + // Clear instruction cache. + __clear_cache(trampOnStack, &trampOnStack[9]); +} +#endif // defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64) diff --git a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c index da115fe7642718..d51d35acaa02f1 100644 --- a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c +++ b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c @@ -7,7 +7,7 @@ /* * Tests nested functions - * The ppc compiler generates a call to __trampoline_setup + * The ppc and aarch64 compilers generates a call to __trampoline_setup * The i386 and x86_64 compilers generate a call to ___enable_execute_stack */ diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 7efe99c63a3747..d86e52d49000ae 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1080,6 +1080,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // Try to create BICs for vector ANDs. setTargetDAGCombine(ISD::AND); + // llvm.init.trampoline and llvm.adjust.trampoline + setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); + // Vector add and sub nodes may conceal a high-half opportunity. // Also, try to fold ADD into CSINC/CSINV.. setTargetDAGCombine({ISD::ADD, ISD::ABS, ISD::SUB, ISD::XOR, ISD::SINT_TO_FP, @@ -6688,6 +6692,56 @@ static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) { return Final; } +SDValue AArch64TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { + // Note: x18 cannot be used for the Nest parameter on Windows and macOS. + if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) + report_fatal_error( + "ADJUST_TRAMPOLINE operation is only supported on Linux."); + + return Op.getOperand(0); +} + +SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { + + // Note: x18 cannot be used for the Nest parameter on Windows and macOS. + if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) + report_fatal_error("INIT_TRAMPOLINE operation is only supported on Linux."); + + SDValue Chain = Op.getOperand(0); + SDValue Trmp = Op.getOperand(1); // trampoline + SDValue FPtr = Op.getOperand(2); // nested function + SDValue Nest = Op.getOperand(3); // 'nest' parameter value + SDLoc dl(Op); + + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + + Entry.Ty = IntPtrTy; + Entry.Node = Trmp; + Args.push_back(Entry); + Entry.Node = DAG.getConstant(20, dl, MVT::i64); + Args.push_back(Entry); + + Entry.Node = FPtr; + Args.push_back(Entry); + Entry.Node = Nest; + Args.push_back(Entry); + + // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain).setLibCallee( + CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args)); + + std::pair CallResult = LowerCallTo(CLI); + return CallResult.second; +} + SDValue AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { LLVM_DEBUG(dbgs() << "Custom lowering: "); @@ -6705,6 +6759,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerGlobalTLSAddress(Op, DAG); case ISD::PtrAuthGlobalAddress: return LowerPtrAuthGlobalAddress(Op, DAG); + case ISD::ADJUST_TRAMPOLINE: + return LowerADJUST_TRAMPOLINE(Op, DAG); + case ISD::INIT_TRAMPOLINE: + return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::SETCC: case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index ef45e4f01ecd30..81e15185f985d5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1143,6 +1143,8 @@ class AArch64TargetLowering : public TargetLowering { SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, SDValue TVal, SDValue FVal, const SDLoc &dl, SelectionDAG &DAG) const; + SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/AArch64/trampoline.ll b/llvm/test/CodeGen/AArch64/trampoline.ll new file mode 100644 index 00000000000000..293e538a7459d4 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/trampoline.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s + +declare void @llvm.init.trampoline(ptr, ptr, ptr); +declare ptr @llvm.adjust.trampoline(ptr); + +define i64 @f(ptr nest %c, i64 %x, i64 %y) { + %sum = add i64 %x, %y + ret i64 %sum +} + +define i64 @main() { + %val = alloca i64 + %nval = bitcast ptr %val to ptr + %tramp = alloca [36 x i8], align 8 + ; CHECK: bl __trampoline_setup + call void @llvm.init.trampoline(ptr %tramp, ptr @f, ptr %nval) + %fp = call ptr @llvm.adjust.trampoline(ptr %tramp) + ret i64 0 +}