Skip to content

Commit

Permalink
[AArch64] Implement INIT/ADJUST_TRAMPOLINE (llvm#70267)
Browse files Browse the repository at this point in the history
Add support for llvm.init.trampoline and llvm.adjust.trampoline
intrinsics for AArch64.

Fixes llvm#65573
Fixes llvm#76927
Fixes llvm#83555
Updates llvm#66157

(cherry picked from commit c4b66bf)
  • Loading branch information
ceseo authored and llvmbot committed Jul 24, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent 7af27be commit b770811
Showing 6 changed files with 127 additions and 1 deletion.
5 changes: 5 additions & 0 deletions compiler-rt/lib/builtins/README.txt
Original file line number Diff line number Diff line change
@@ -272,6 +272,11 @@ switch32
switch8
switchu8

// This function generates a custom trampoline function with the specific
// realFunc and localsPtr values.
void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated,
const void* realFunc, void* localsPtr);

// There is no C interface to the *_vfp_d8_d15_regs functions. There are
// called in the prolog and epilog of Thumb1 functions. When the C++ ABI use
// SJLJ for exceptions, each function with a catch clause or destructors needs
42 changes: 42 additions & 0 deletions compiler-rt/lib/builtins/trampoline_setup.c
Original file line number Diff line number Diff line change
@@ -41,3 +41,45 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
__clear_cache(trampOnStack, &trampOnStack[10]);
}
#endif // __powerpc__ && !defined(__powerpc64__)

// The AArch64 compiler generates calls to __trampoline_setup() when creating
// trampoline functions on the stack for use with nested functions.
// This function creates a custom 36-byte trampoline function on the stack
// which loads x18 with a pointer to the outer function's locals
// and then jumps to the target nested function.
// Note: x18 is a reserved platform register on Windows and macOS.

#if defined(__aarch64__) && defined(__ELF__)
COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
int trampSizeAllocated,
const void *realFunc, void *localsPtr) {
// This should never happen, but if compiler did not allocate
// enough space on stack for the trampoline, abort.
if (trampSizeAllocated < 36)
compilerrt_abort();

// create trampoline
// Load realFunc into x17. mov/movk 16 bits at a time.
trampOnStack[0] =
0xd2800000u | ((((uint64_t)realFunc >> 0) & 0xffffu) << 5) | 0x11;
trampOnStack[1] =
0xf2a00000u | ((((uint64_t)realFunc >> 16) & 0xffffu) << 5) | 0x11;
trampOnStack[2] =
0xf2c00000u | ((((uint64_t)realFunc >> 32) & 0xffffu) << 5) | 0x11;
trampOnStack[3] =
0xf2e00000u | ((((uint64_t)realFunc >> 48) & 0xffffu) << 5) | 0x11;
// Load localsPtr into x18
trampOnStack[4] =
0xd2800000u | ((((uint64_t)localsPtr >> 0) & 0xffffu) << 5) | 0x12;
trampOnStack[5] =
0xf2a00000u | ((((uint64_t)localsPtr >> 16) & 0xffffu) << 5) | 0x12;
trampOnStack[6] =
0xf2c00000u | ((((uint64_t)localsPtr >> 32) & 0xffffu) << 5) | 0x12;
trampOnStack[7] =
0xf2e00000u | ((((uint64_t)localsPtr >> 48) & 0xffffu) << 5) | 0x12;
trampOnStack[8] = 0xd61f0220; // br x17

// Clear instruction cache.
__clear_cache(trampOnStack, &trampOnStack[9]);
}
#endif // defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64)
2 changes: 1 addition & 1 deletion compiler-rt/test/builtins/Unit/trampoline_setup_test.c
Original file line number Diff line number Diff line change
@@ -7,7 +7,7 @@

/*
* Tests nested functions
* The ppc compiler generates a call to __trampoline_setup
* The ppc and aarch64 compilers generates a call to __trampoline_setup
* The i386 and x86_64 compilers generate a call to ___enable_execute_stack
*/

58 changes: 58 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
@@ -1080,6 +1080,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Try to create BICs for vector ANDs.
setTargetDAGCombine(ISD::AND);

// llvm.init.trampoline and llvm.adjust.trampoline
setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);

// Vector add and sub nodes may conceal a high-half opportunity.
// Also, try to fold ADD into CSINC/CSINV..
setTargetDAGCombine({ISD::ADD, ISD::ABS, ISD::SUB, ISD::XOR, ISD::SINT_TO_FP,
@@ -6688,6 +6692,56 @@ static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) {
return Final;
}

SDValue AArch64TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
// Note: x18 cannot be used for the Nest parameter on Windows and macOS.
if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
report_fatal_error(
"ADJUST_TRAMPOLINE operation is only supported on Linux.");

return Op.getOperand(0);
}

SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {

// Note: x18 cannot be used for the Nest parameter on Windows and macOS.
if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
report_fatal_error("INIT_TRAMPOLINE operation is only supported on Linux.");

SDValue Chain = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
SDValue Nest = Op.getOperand(3); // 'nest' parameter value
SDLoc dl(Op);

EVT PtrVT = getPointerTy(DAG.getDataLayout());
Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());

TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;

Entry.Ty = IntPtrTy;
Entry.Node = Trmp;
Args.push_back(Entry);
Entry.Node = DAG.getConstant(20, dl, MVT::i64);
Args.push_back(Entry);

Entry.Node = FPtr;
Args.push_back(Entry);
Entry.Node = Nest;
Args.push_back(Entry);

// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
CallingConv::C, Type::getVoidTy(*DAG.getContext()),
DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));

std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
}

SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");
@@ -6705,6 +6759,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerGlobalTLSAddress(Op, DAG);
case ISD::PtrAuthGlobalAddress:
return LowerPtrAuthGlobalAddress(Op, DAG);
case ISD::ADJUST_TRAMPOLINE:
return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::INIT_TRAMPOLINE:
return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::SETCC:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
@@ -1143,6 +1143,8 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
SDValue TVal, SDValue FVal, const SDLoc &dl,
SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const;
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/AArch64/trampoline.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s

declare void @llvm.init.trampoline(ptr, ptr, ptr);
declare ptr @llvm.adjust.trampoline(ptr);

define i64 @f(ptr nest %c, i64 %x, i64 %y) {
%sum = add i64 %x, %y
ret i64 %sum
}

define i64 @main() {
%val = alloca i64
%nval = bitcast ptr %val to ptr
%tramp = alloca [36 x i8], align 8
; CHECK: bl __trampoline_setup
call void @llvm.init.trampoline(ptr %tramp, ptr @f, ptr %nval)
%fp = call ptr @llvm.adjust.trampoline(ptr %tramp)
ret i64 0
}

0 comments on commit b770811

Please sign in to comment.