Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AArch64] Implement INIT/ADJUST_TRAMPOLINE #70267

Merged
merged 1 commit into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions compiler-rt/lib/builtins/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,11 @@ switch32
switch8
switchu8

// This function generates a custom trampoline function with the specific
// realFunc and localsPtr values.
void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated,
const void* realFunc, void* localsPtr);
ceseo marked this conversation as resolved.
Show resolved Hide resolved

// There is no C interface to the *_vfp_d8_d15_regs functions. There are
// called in the prolog and epilog of Thumb1 functions. When the C++ ABI use
// SJLJ for exceptions, each function with a catch clause or destructors needs
Expand Down
42 changes: 42 additions & 0 deletions compiler-rt/lib/builtins/trampoline_setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,45 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
__clear_cache(trampOnStack, &trampOnStack[10]);
}
#endif // __powerpc__ && !defined(__powerpc64__)

// The AArch64 compiler generates calls to __trampoline_setup() when creating
// trampoline functions on the stack for use with nested functions.
// This function creates a custom 36-byte trampoline function on the stack
// which loads x18 with a pointer to the outer function's locals
// and then jumps to the target nested function.
// Note: x18 is a reserved platform register on Windows and macOS.

#if defined(__aarch64__) && defined(__ELF__)
COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
int trampSizeAllocated,
const void *realFunc, void *localsPtr) {
// This should never happen, but if compiler did not allocate
// enough space on stack for the trampoline, abort.
if (trampSizeAllocated < 36)
compilerrt_abort();

// create trampoline
// Load realFunc into x17. mov/movk 16 bits at a time.
trampOnStack[0] =
0xd2800000u | ((((uint64_t)realFunc >> 0) & 0xffffu) << 5) | 0x11;
trampOnStack[1] =
0xf2a00000u | ((((uint64_t)realFunc >> 16) & 0xffffu) << 5) | 0x11;
trampOnStack[2] =
0xf2c00000u | ((((uint64_t)realFunc >> 32) & 0xffffu) << 5) | 0x11;
trampOnStack[3] =
0xf2e00000u | ((((uint64_t)realFunc >> 48) & 0xffffu) << 5) | 0x11;
// Load localsPtr into x18
trampOnStack[4] =
0xd2800000u | ((((uint64_t)localsPtr >> 0) & 0xffffu) << 5) | 0x12;
trampOnStack[5] =
0xf2a00000u | ((((uint64_t)localsPtr >> 16) & 0xffffu) << 5) | 0x12;
trampOnStack[6] =
0xf2c00000u | ((((uint64_t)localsPtr >> 32) & 0xffffu) << 5) | 0x12;
trampOnStack[7] =
0xf2e00000u | ((((uint64_t)localsPtr >> 48) & 0xffffu) << 5) | 0x12;
trampOnStack[8] = 0xd61f0220; // br x17

// Clear instruction cache.
__clear_cache(trampOnStack, &trampOnStack[9]);
}
#endif // defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64)
2 changes: 1 addition & 1 deletion compiler-rt/test/builtins/Unit/trampoline_setup_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

/*
* Tests nested functions
* The ppc compiler generates a call to __trampoline_setup
* The ppc and aarch64 compilers generates a call to __trampoline_setup
* The i386 and x86_64 compilers generate a call to ___enable_execute_stack
*/

Expand Down
58 changes: 58 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1080,6 +1080,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Try to create BICs for vector ANDs.
setTargetDAGCombine(ISD::AND);

// llvm.init.trampoline and llvm.adjust.trampoline
setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);

// Vector add and sub nodes may conceal a high-half opportunity.
// Also, try to fold ADD into CSINC/CSINV..
setTargetDAGCombine({ISD::ADD, ISD::ABS, ISD::SUB, ISD::XOR, ISD::SINT_TO_FP,
Expand Down Expand Up @@ -6688,6 +6692,56 @@ static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) {
return Final;
}

SDValue AArch64TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
// Note: x18 cannot be used for the Nest parameter on Windows and macOS.
if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
report_fatal_error(
"ADJUST_TRAMPOLINE operation is only supported on Linux.");

return Op.getOperand(0);
}

SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {

// Note: x18 cannot be used for the Nest parameter on Windows and macOS.
if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
report_fatal_error("INIT_TRAMPOLINE operation is only supported on Linux.");

SDValue Chain = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
SDValue Nest = Op.getOperand(3); // 'nest' parameter value
SDLoc dl(Op);

EVT PtrVT = getPointerTy(DAG.getDataLayout());
Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());

TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;

Entry.Ty = IntPtrTy;
Entry.Node = Trmp;
Args.push_back(Entry);
Entry.Node = DAG.getConstant(20, dl, MVT::i64);
Args.push_back(Entry);

Entry.Node = FPtr;
Args.push_back(Entry);
Entry.Node = Nest;
Args.push_back(Entry);

// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
CallingConv::C, Type::getVoidTy(*DAG.getContext()),
DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));

std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
}

SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");
Expand All @@ -6705,6 +6759,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerGlobalTLSAddress(Op, DAG);
case ISD::PtrAuthGlobalAddress:
return LowerPtrAuthGlobalAddress(Op, DAG);
case ISD::ADJUST_TRAMPOLINE:
return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::INIT_TRAMPOLINE:
return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::SETCC:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,8 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
SDValue TVal, SDValue FVal, const SDLoc &dl,
SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const;
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/AArch64/trampoline.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s
ceseo marked this conversation as resolved.
Show resolved Hide resolved

declare void @llvm.init.trampoline(ptr, ptr, ptr);
declare ptr @llvm.adjust.trampoline(ptr);

define i64 @f(ptr nest %c, i64 %x, i64 %y) {
%sum = add i64 %x, %y
ret i64 %sum
}

define i64 @main() {
%val = alloca i64
%nval = bitcast ptr %val to ptr
%tramp = alloca [36 x i8], align 8
; CHECK: bl __trampoline_setup
call void @llvm.init.trampoline(ptr %tramp, ptr @f, ptr %nval)
%fp = call ptr @llvm.adjust.trampoline(ptr %tramp)
ret i64 0
}
Loading