diff --git a/base/reflection.jl b/base/reflection.jl index a5aaf0ad20d4a..1adc69291934e 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -1092,6 +1092,7 @@ struct CodegenParams prefer_specsig::Cint gnu_pubnames::Cint debug_info_kind::Cint + safepoint_on_entry::Cint lookup::Ptr{Cvoid} @@ -1100,12 +1101,14 @@ struct CodegenParams function CodegenParams(; track_allocations::Bool=true, code_coverage::Bool=true, prefer_specsig::Bool=false, gnu_pubnames=true, debug_info_kind::Cint = default_debug_info_kind(), + safepoint_on_entry::Bool=true, lookup::Ptr{Cvoid}=cglobal(:jl_rettype_inferred), generic_context = nothing) return new( Cint(track_allocations), Cint(code_coverage), Cint(prefer_specsig), Cint(gnu_pubnames), debug_info_kind, + Cint(safepoint_on_entry), lookup, generic_context) end end diff --git a/src/cgutils.cpp b/src/cgutils.cpp index c091111f31617..8ce84acb30901 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -3933,7 +3933,6 @@ static Value *emit_defer_signal(jl_codectx_t &ctx) return ctx.builder.CreateInBoundsGEP(ctx.types().T_sigatomic, ptls, ArrayRef(offset), "jl_defer_signal"); } - #ifndef JL_NDEBUG static int compare_cgparams(const jl_cgparams_t *a, const jl_cgparams_t *b) { diff --git a/src/codegen.cpp b/src/codegen.cpp index 6bd0b0d16865a..a4773acb3fbea 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1279,6 +1279,7 @@ extern "C" { 1, #endif (int) DICompileUnit::DebugEmissionKind::FullDebug, + 1, jl_rettype_inferred, NULL }; } @@ -7805,7 +7806,11 @@ static jl_llvm_functions_t ctx.builder.CreateAlignedStore(load_world, world_age_field, Align(sizeof(size_t))); } - // step 11b. Do codegen in control flow order + // step 11b. Emit the entry safepoint + if (JL_FEAT_TEST(ctx, safepoint_on_entry)) + emit_gc_safepoint(ctx.builder, get_current_ptls(ctx), ctx.tbaa().tbaa_const); + + // step 11c. Do codegen in control flow order std::vector workstack; std::map BB; std::map come_from_bb; diff --git a/src/codegen_shared.h b/src/codegen_shared.h index 329cc567e8c5f..e0edb792d7645 100644 --- a/src/codegen_shared.h +++ b/src/codegen_shared.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -233,20 +234,39 @@ static inline void emit_signal_fence(llvm::IRBuilder<> &builder) builder.CreateFence(AtomicOrdering::SequentiallyConsistent, SyncScope::SingleThread); } -static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::MDNode *tbaa) +static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::MDNode *tbaa, bool final = false) { + using namespace llvm; + llvm::Value *signal_page = get_current_signal_page_from_ptls(builder, ptls, tbaa); emit_signal_fence(builder); - builder.CreateLoad(getSizeTy(builder.getContext()), get_current_signal_page_from_ptls(builder, ptls, tbaa), true); + Module *M = builder.GetInsertBlock()->getModule(); + LLVMContext &C = builder.getContext(); + // inline jlsafepoint_func->realize(M) + if (final) { + auto T_size = getSizeTy(builder.getContext()); + builder.CreateLoad(T_size, signal_page, true); + } + else { + Function *F = M->getFunction("julia.safepoint"); + if (!F) { + auto T_size = getSizeTy(builder.getContext()); + auto T_psize = T_size->getPointerTo(); + FunctionType *FT = FunctionType::get(Type::getVoidTy(C), {T_psize}, false); + F = Function::Create(FT, Function::ExternalLinkage, "julia.safepoint", M); + F->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + } + builder.CreateCall(F, {signal_page}); + } emit_signal_fence(builder); } -static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state, llvm::Value *old_state) +static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state, llvm::Value *old_state, bool final) { using namespace llvm; Type *T_int8 = state->getType(); - ptls = emit_bitcast_with_builder(builder, ptls, builder.getInt8PtrTy()); + llvm::Value *ptls_i8 = emit_bitcast_with_builder(builder, ptls, builder.getInt8PtrTy()); Constant *offset = ConstantInt::getSigned(builder.getInt32Ty(), offsetof(jl_tls_states_t, gc_state)); - Value *gc_state = builder.CreateInBoundsGEP(T_int8, ptls, ArrayRef(offset), "gc_state"); + Value *gc_state = builder.CreateInBoundsGEP(T_int8, ptls_i8, ArrayRef(offset), "gc_state"); if (old_state == nullptr) { old_state = builder.CreateLoad(T_int8, gc_state); cast(old_state)->setOrdering(AtomicOrdering::Monotonic); @@ -266,38 +286,38 @@ static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::V passBB, exitBB); builder.SetInsertPoint(passBB); MDNode *tbaa = get_tbaa_const(builder.getContext()); - emit_gc_safepoint(builder, ptls, tbaa); + emit_gc_safepoint(builder, ptls, tbaa, final); builder.CreateBr(exitBB); builder.SetInsertPoint(exitBB); return old_state; } -static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Value *ptls) +static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Value *ptls, bool final) { using namespace llvm; Value *state = builder.getInt8(0); - return emit_gc_state_set(builder, ptls, state, nullptr); + return emit_gc_state_set(builder, ptls, state, nullptr, final); } -static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state) +static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state, bool final) { using namespace llvm; Value *old_state = builder.getInt8(0); - return emit_gc_state_set(builder, ptls, state, old_state); + return emit_gc_state_set(builder, ptls, state, old_state, final); } -static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Value *ptls) +static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Value *ptls, bool final) { using namespace llvm; Value *state = builder.getInt8(JL_GC_STATE_SAFE); - return emit_gc_state_set(builder, ptls, state, nullptr); + return emit_gc_state_set(builder, ptls, state, nullptr, final); } -static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state) +static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state, bool final) { using namespace llvm; Value *old_state = builder.getInt8(JL_GC_STATE_SAFE); - return emit_gc_state_set(builder, ptls, state, old_state); + return emit_gc_state_set(builder, ptls, state, old_state, final); } // Compatibility shims for LLVM attribute APIs that were renamed in LLVM 14. diff --git a/src/julia.h b/src/julia.h index bb747a77a518d..a34c1f06d0cc1 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2247,9 +2247,11 @@ typedef struct { // controls the emission of debug-info. mirrors the clang options int gnu_pubnames; // can we emit the gnu pubnames debuginfo - int debug_info_kind; // Enum for line-table-only, line-directives-only, + int debug_info_kind; // Enum for line-table-only, line-directives-only, // limited, standalone + int safepoint_on_entry; // Emit a safepoint on entry to each function + // Cache access. Default: jl_rettype_inferred. jl_codeinstance_lookup_t lookup; diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 0a43c52ddfbc4..30a5d9a59f676 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -27,6 +27,7 @@ STATISTIC(GetGCFrameSlotCount, "Number of lowered getGCFrameSlotFunc intrinsics" STATISTIC(GCAllocBytesCount, "Number of lowered GCAllocBytesFunc intrinsics"); STATISTIC(QueueGCRootCount, "Number of lowered queueGCRootFunc intrinsics"); STATISTIC(QueueGCBindingCount, "Number of lowered queueGCBindingFunc intrinsics"); +STATISTIC(SafepointCount, "Number of lowered safepoint intrinsics"); using namespace llvm; @@ -72,6 +73,9 @@ struct FinalLowerGC: private JuliaPassContext { // Lowers a `julia.queue_gc_binding` intrinsic. Value *lowerQueueGCBinding(CallInst *target, Function &F); + + // Lowers a `julia.safepoint` intrinsic. + Value *lowerSafepoint(CallInst *target, Function &F); }; Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F) @@ -202,6 +206,18 @@ Value *FinalLowerGC::lowerQueueGCBinding(CallInst *target, Function &F) return target; } +Value *FinalLowerGC::lowerSafepoint(CallInst *target, Function &F) +{ + ++SafepointCount; + assert(target->arg_size() == 1); + IRBuilder<> builder(target->getContext()); + builder.SetInsertPoint(target); + auto T_size = getSizeTy(builder.getContext()); + Value* signal_page = target->getOperand(0); + Value* load = builder.CreateLoad(T_size, signal_page, true); + return load; +} + Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) { ++GCAllocBytesCount; @@ -317,16 +333,20 @@ static void replaceInstruction( bool FinalLowerGC::runOnFunction(Function &F) { - LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n"); // Check availability of functions again since they might have been deleted. initFunctions(*F.getParent()); - if (!pgcstack_getter && !adoptthread_func) + if (!pgcstack_getter && !adoptthread_func) { + LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << "\n"); return false; + } // Look for a call to 'julia.get_pgcstack'. pgcstack = getPGCstack(F); - if (!pgcstack) + if (!pgcstack) { + LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << " no pgcstack\n"); return false; + } + LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n"); // Acquire intrinsic functions. auto newGCFrameFunc = getOrNull(jl_intrinsics::newGCFrame); @@ -336,6 +356,7 @@ bool FinalLowerGC::runOnFunction(Function &F) auto GCAllocBytesFunc = getOrNull(jl_intrinsics::GCAllocBytes); auto queueGCRootFunc = getOrNull(jl_intrinsics::queueGCRoot); auto queueGCBindingFunc = getOrNull(jl_intrinsics::queueGCBinding); + auto safepointFunc = getOrNull(jl_intrinsics::safepoint); // Lower all calls to supported intrinsics. for (BasicBlock &BB : F) { @@ -347,6 +368,7 @@ bool FinalLowerGC::runOnFunction(Function &F) } Value *callee = CI->getCalledOperand(); + assert(callee); if (callee == newGCFrameFunc) { replaceInstruction(CI, lowerNewGCFrame(CI, F), it); @@ -371,6 +393,10 @@ bool FinalLowerGC::runOnFunction(Function &F) else if (callee == queueGCBindingFunc) { replaceInstruction(CI, lowerQueueGCBinding(CI, F), it); } + else if (callee == safepointFunc) { + lowerSafepoint(CI, F); + it = CI->eraseFromParent(); + } else { ++it; } diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index f589cb5672365..91850ebe8df07 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -119,6 +119,7 @@ namespace jl_intrinsics { static const char *POP_GC_FRAME_NAME = "julia.pop_gc_frame"; static const char *QUEUE_GC_ROOT_NAME = "julia.queue_gc_root"; static const char *QUEUE_GC_BINDING_NAME = "julia.queue_gc_binding"; + static const char *SAFEPOINT_NAME = "julia.safepoint"; static auto T_size_t(const JuliaPassContext &context) { return sizeof(size_t) == sizeof(uint32_t) ? @@ -229,6 +230,22 @@ namespace jl_intrinsics { intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); return intrinsic; }); + + const IntrinsicDescription safepoint( + SAFEPOINT_NAME, + [](const JuliaPassContext &context) { + auto T_size = getSizeTy(context.getLLVMContext()); + auto T_psize = T_size->getPointerTo(); + auto intrinsic = Function::Create( + FunctionType::get( + Type::getVoidTy(context.getLLVMContext()), + {T_psize}, + false), + Function::ExternalLinkage, + SAFEPOINT_NAME); + intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + return intrinsic; + }); } namespace jl_well_known { diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h index f25f9181ddb18..e54f39c05ba59 100644 --- a/src/llvm-pass-helpers.h +++ b/src/llvm-pass-helpers.h @@ -129,6 +129,9 @@ namespace jl_intrinsics { // `julia.queue_gc_binding`: an intrinsic that queues a binding for GC. extern const IntrinsicDescription queueGCBinding; + + // `julia.safepoint`: an intrinsic that triggers a GC safepoint. + extern const IntrinsicDescription safepoint; } // A namespace for well-known Julia runtime function descriptions. diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp index a39a73c5393a2..be4cc3a1edf2a 100644 --- a/src/llvm-ptls.cpp +++ b/src/llvm-ptls.cpp @@ -207,7 +207,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, IRBuilder<> builder(fastTerm->getParent()); fastTerm->removeFromParent(); MDNode *tbaa = tbaa_gcframe; - Value *prior = emit_gc_unsafe_enter(builder, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, pgcstack), tbaa)); + Value *prior = emit_gc_unsafe_enter(builder, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, pgcstack), tbaa), true); builder.Insert(fastTerm); phi->addIncoming(pgcstack, fastTerm->getParent()); // emit pre-return cleanup @@ -219,7 +219,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, for (auto &BB : *pgcstack->getParent()->getParent()) { if (isa(BB.getTerminator())) { IRBuilder<> builder(BB.getTerminator()); - emit_gc_unsafe_leave(builder, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, phi), tbaa), last_gc_state); + emit_gc_unsafe_leave(builder, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, phi), tbaa), last_gc_state, true); } } } diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl index e4e107351c57f..4c9c7e97a710b 100644 --- a/test/compiler/codegen.jl +++ b/test/compiler/codegen.jl @@ -15,9 +15,12 @@ function libjulia_codegen_name() is_debug_build ? "libjulia-codegen-debug" : "libjulia-codegen" end -# `_dump_function` might be more efficient but it doesn't really matter here... -get_llvm(@nospecialize(f), @nospecialize(t), raw=true, dump_module=false, optimize=true) = - sprint(code_llvm, f, t, raw, dump_module, optimize) +# The tests below assume a certain format and safepoint_on_entry=true breaks that. +function get_llvm(@nospecialize(f), @nospecialize(t), raw=true, dump_module=false, optimize=true) + params = Base.CodegenParams(safepoint_on_entry=false) + d = InteractiveUtils._dump_function(f, t, false, false, !raw, dump_module, :att, optimize, :none, false, params) + sprint(print, d) +end if !is_debug_build && opt_level > 0 # Make sure getptls call is removed at IR level with optimization on