Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve codegen for assignments to globals #44182

Merged
merged 1 commit into from
Mar 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3278,6 +3278,14 @@ static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, ArrayRef<Value*
ctx.builder.CreateCall(prepare_call(jl_write_barrier_func), decay_ptrs);
}

static void emit_write_barrier_binding(jl_codectx_t &ctx, Value *parent, Value *ptr)
{
SmallVector<Value*, 8> decay_ptrs;
decay_ptrs.push_back(maybe_decay_untracked(ctx, emit_bitcast(ctx, parent, ctx.types().T_prjlvalue)));
decay_ptrs.push_back(maybe_decay_untracked(ctx, emit_bitcast(ctx, ptr, ctx.types().T_prjlvalue)));
ctx.builder.CreateCall(prepare_call(jl_write_barrier_binding_func), decay_ptrs);
}

static void find_perm_offsets(jl_datatype_t *typ, SmallVector<unsigned,4> &res, unsigned offset)
{
// This is a inlined field at `offset`.
Expand Down
34 changes: 29 additions & 5 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,15 @@ static const auto jl_write_barrier_func = new JuliaFunction{
AttributeSet(),
{Attributes(C, {Attribute::ReadOnly})}); },
};
static const auto jl_write_barrier_binding_func = new JuliaFunction{
"julia.write_barrier_binding",
[](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
{JuliaType::get_prjlvalue_ty(C)}, true); },
[](LLVMContext &C) { return AttributeList::get(C,
Attributes(C, {Attribute::NoUnwind, Attribute::NoRecurse, Attribute::InaccessibleMemOnly}),
AttributeSet(),
{Attributes(C, {Attribute::ReadOnly})}); },
};
static const auto jlisa_func = new JuliaFunction{
XSTR(jl_isa),
[](LLVMContext &C) {
Expand Down Expand Up @@ -4400,6 +4409,24 @@ static void emit_varinfo_assign(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_cgval_t
}
}

static void emit_binding_store(jl_codectx_t &ctx, jl_binding_t *bnd, Value *bp, jl_value_t *r, ssize_t ssaval, AtomicOrdering Order)
{
assert(bnd);
jl_cgval_t rval_info = emit_expr(ctx, r, ssaval);
Value *rval = boxed(ctx, rval_info);
if (!bnd->constp && bnd->ty && jl_subtype(rval_info.typ, bnd->ty)) {
StoreInst *v = ctx.builder.CreateAlignedStore(rval, bp, Align(sizeof(void*)));
v->setOrdering(Order);
tbaa_decorate(ctx.tbaa().tbaa_binding, v);
emit_write_barrier_binding(ctx, literal_pointer_val(ctx, bnd), rval);
}
else {
ctx.builder.CreateCall(prepare_call(jlcheckassign_func),
{ literal_pointer_val(ctx, bnd),
mark_callee_rooted(ctx, rval) });
}
}

static void emit_assignment(jl_codectx_t &ctx, jl_value_t *l, jl_value_t *r, ssize_t ssaval)
{
assert(!jl_is_ssavalue(l));
Expand All @@ -4416,11 +4443,7 @@ static void emit_assignment(jl_codectx_t &ctx, jl_value_t *l, jl_value_t *r, ssi
if (bp == NULL && s != NULL)
bp = global_binding_pointer(ctx, ctx.module, s, &bnd, true);
if (bp != NULL) { // it's a global
assert(bnd);
Value *rval = mark_callee_rooted(ctx, boxed(ctx, emit_expr(ctx, r, ssaval)));
ctx.builder.CreateCall(prepare_call(jlcheckassign_func),
{ literal_pointer_val(ctx, bnd),
rval });
emit_binding_store(ctx, bnd, bp, r, ssaval, AtomicOrdering::Unordered);
// Global variable. Does not need debug info because the debugger knows about
// its memory location.
return;
Expand Down Expand Up @@ -8095,6 +8118,7 @@ static void init_jit_functions(void)
add_named_global(jl_loopinfo_marker_func, (void*)NULL);
add_named_global(jl_typeof_func, (void*)NULL);
add_named_global(jl_write_barrier_func, (void*)NULL);
add_named_global(jl_write_barrier_binding_func, (void*)NULL);
add_named_global(jldlsym_func, &jl_load_and_lookup);
add_named_global(jlgetcfunctiontrampoline_func, &jl_get_cfunction_trampoline);
add_named_global(jlgetnthfieldchecked_func, &jl_get_nth_field_checked);
Expand Down
2 changes: 1 addition & 1 deletion src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1642,7 +1642,7 @@ void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_N
}
}

void gc_queue_binding(jl_binding_t *bnd)
JL_DLLEXPORT void jl_gc_queue_binding(jl_binding_t *bnd)
{
jl_ptls_t ptls = jl_current_task->ptls;
jl_taggedvalue_t *buf = jl_astaggedvalue(bnd);
Expand Down
4 changes: 2 additions & 2 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -465,14 +465,14 @@ void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT;
void jl_gc_run_all_finalizers(jl_task_t *ct);
void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task);

void gc_queue_binding(jl_binding_t *bnd) JL_NOTSAFEPOINT;
JL_DLLEXPORT void jl_gc_queue_binding(jl_binding_t *bnd) JL_NOTSAFEPOINT;
void gc_setmark_buf(jl_ptls_t ptls, void *buf, uint8_t, size_t) JL_NOTSAFEPOINT;

STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t*
{
if (__unlikely(jl_astaggedvalue(bnd)->bits.gc == 3 &&
(jl_astaggedvalue(val)->bits.gc & 1) == 0))
gc_queue_binding(bnd);
jl_gc_queue_binding(bnd);
}

STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t*
Expand Down
3 changes: 2 additions & 1 deletion src/llvm-alloc-helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,8 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
assert(use->get() == I);
return true;
}
if (required.pass.write_barrier_func == callee)
if (required.pass.write_barrier_func == callee ||
required.pass.write_barrier_binding_func == callee)
return true;
auto opno = use->getOperandNo();
// Uses in `jl_roots` operand bundle are not counted as escaping, everything else is.
Expand Down
9 changes: 6 additions & 3 deletions src/llvm-alloc-opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,8 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
}
return;
}
if (pass.write_barrier_func == callee) {
if (pass.write_barrier_func == callee ||
pass.write_barrier_binding_func == callee) {
call->eraseFromParent();
return;
}
Expand Down Expand Up @@ -744,7 +745,8 @@ void Optimizer::removeAlloc(CallInst *orig_inst)
call->eraseFromParent();
return;
}
if (pass.write_barrier_func == callee) {
if (pass.write_barrier_func == callee ||
pass.write_barrier_binding_func == callee) {
call->eraseFromParent();
return;
}
Expand Down Expand Up @@ -1036,7 +1038,8 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
call->eraseFromParent();
return;
}
if (pass.write_barrier_func == callee) {
if (pass.write_barrier_func == callee ||
pass.write_barrier_binding_func == callee) {
call->eraseFromParent();
return;
}
Expand Down
22 changes: 19 additions & 3 deletions src/llvm-final-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ struct FinalLowerGC: private JuliaPassContext {

private:
Function *queueRootFunc;
Function *queueBindingFunc;
Function *poolAllocFunc;
Function *bigAllocFunc;
Instruction *pgcstack;
Expand All @@ -58,6 +59,9 @@ struct FinalLowerGC: private JuliaPassContext {

// Lowers a `julia.queue_gc_root` intrinsic.
Value *lowerQueueGCRoot(CallInst *target, Function &F);

// Lowers a `julia.queue_gc_binding` intrinsic.
Value *lowerQueueGCBinding(CallInst *target, Function &F);
};

Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
Expand Down Expand Up @@ -165,6 +169,13 @@ Value *FinalLowerGC::lowerQueueGCRoot(CallInst *target, Function &F)
return target;
}

Value *FinalLowerGC::lowerQueueGCBinding(CallInst *target, Function &F)
{
assert(target->arg_size() == 1);
target->setCalledFunction(queueBindingFunc);
return target;
}

Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
{
assert(target->arg_size() == 2);
Expand Down Expand Up @@ -197,10 +208,11 @@ bool FinalLowerGC::doInitialization(Module &M) {

// Initialize platform-specific references.
queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
queueBindingFunc = getOrDeclare(jl_well_known::GCQueueBinding);
poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc);
bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc);

GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc};
GlobalValue *functionList[] = {queueRootFunc, queueBindingFunc, poolAllocFunc, bigAllocFunc};
unsigned j = 0;
for (unsigned i = 0; i < sizeof(functionList) / sizeof(void*); i++) {
if (!functionList[i])
Expand All @@ -216,8 +228,8 @@ bool FinalLowerGC::doInitialization(Module &M) {

bool FinalLowerGC::doFinalization(Module &M)
{
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc};
queueRootFunc = poolAllocFunc = bigAllocFunc = nullptr;
GlobalValue *functionList[] = {queueRootFunc, queueBindingFunc, poolAllocFunc, bigAllocFunc};
queueRootFunc = queueBindingFunc = poolAllocFunc = bigAllocFunc = nullptr;
auto used = M.getGlobalVariable("llvm.compiler.used");
if (!used)
return false;
Expand Down Expand Up @@ -282,6 +294,7 @@ bool FinalLowerGC::runOnFunction(Function &F)
auto getGCFrameSlotFunc = getOrNull(jl_intrinsics::getGCFrameSlot);
auto GCAllocBytesFunc = getOrNull(jl_intrinsics::GCAllocBytes);
auto queueGCRootFunc = getOrNull(jl_intrinsics::queueGCRoot);
auto queueGCBindingFunc = getOrNull(jl_intrinsics::queueGCBinding);

// Lower all calls to supported intrinsics.
for (BasicBlock &BB : F) {
Expand Down Expand Up @@ -314,6 +327,9 @@ bool FinalLowerGC::runOnFunction(Function &F)
else if (callee == queueGCRootFunc) {
replaceInstruction(CI, lowerQueueGCRoot(CI, F), it);
}
else if (callee == queueGCBindingFunc) {
replaceInstruction(CI, lowerQueueGCBinding(CI, F), it);
}
else {
++it;
}
Expand Down
6 changes: 4 additions & 2 deletions src/llvm-julia-licm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ struct JuliaLICM : public JuliaPassContext {
// `gc_preserve_end_func` is optional since the input to
// `gc_preserve_end_func` must be from `gc_preserve_begin_func`.
// We also hoist write barriers here, so we don't exit if write_barrier_func exists
if (!gc_preserve_begin_func && !write_barrier_func && !alloc_obj_func)
if (!gc_preserve_begin_func && !write_barrier_func && !write_barrier_binding_func &&
!alloc_obj_func)
return false;
auto LI = &GetLI();
auto DT = &GetDT();
Expand Down Expand Up @@ -132,7 +133,8 @@ struct JuliaLICM : public JuliaPassContext {
CallInst::Create(call, {}, exit_pts[i]);
}
}
else if (callee == write_barrier_func) {
else if (callee == write_barrier_func ||
callee == write_barrier_binding_func) {
bool valid = true;
for (std::size_t i = 0; i < call->arg_size(); i++) {
if (!L->makeLoopInvariant(call->getArgOperand(i), changed)) {
Expand Down
16 changes: 13 additions & 3 deletions src/llvm-late-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1556,7 +1556,8 @@ State LateLowerGCFrame::LocalScan(Function &F) {
callee == gc_preserve_end_func || callee == typeof_func ||
callee == pgcstack_getter || callee->getName() == XSTR(jl_egal__unboxed) ||
callee->getName() == XSTR(jl_lock_value) || callee->getName() == XSTR(jl_unlock_value) ||
callee == write_barrier_func || callee->getName() == "memcmp") {
callee == write_barrier_func || callee == write_barrier_binding_func ||
callee->getName() == "memcmp") {
continue;
}
if (callee->hasFnAttribute(Attribute::ReadNone) ||
Expand Down Expand Up @@ -2378,7 +2379,8 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
typ->takeName(CI);
CI->replaceAllUsesWith(typ);
UpdatePtrNumbering(CI, typ, S);
} else if (write_barrier_func && callee == write_barrier_func) {
} else if ((write_barrier_func && callee == write_barrier_func) ||
(write_barrier_binding_func && callee == write_barrier_binding_func)) {
// The replacement for this requires creating new BasicBlocks
// which messes up the loop. Queue all of them to be replaced later.
assert(CI->arg_size() >= 1);
Expand Down Expand Up @@ -2484,7 +2486,15 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
MDB.createBranchWeights(Weights));
builder.SetInsertPoint(trigTerm);
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
if (CI->getCalledOperand() == write_barrier_func) {
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
}
else if (CI->getCalledOperand() == write_barrier_binding_func) {
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCBinding), parent);
}
else {
assert(false);
}
CI->eraseFromParent();
}
if (maxframeargs == 0 && Frame) {
Expand Down
34 changes: 33 additions & 1 deletion src/llvm-pass-helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ JuliaPassContext::JuliaPassContext()
pgcstack_getter(nullptr), gc_flush_func(nullptr),
gc_preserve_begin_func(nullptr), gc_preserve_end_func(nullptr),
pointer_from_objref_func(nullptr), alloc_obj_func(nullptr),
typeof_func(nullptr), write_barrier_func(nullptr), module(nullptr)
typeof_func(nullptr), write_barrier_func(nullptr),
write_barrier_binding_func(nullptr), module(nullptr)
{
}

Expand All @@ -50,6 +51,7 @@ void JuliaPassContext::initFunctions(Module &M)
pointer_from_objref_func = M.getFunction("julia.pointer_from_objref");
typeof_func = M.getFunction("julia.typeof");
write_barrier_func = M.getFunction("julia.write_barrier");
write_barrier_binding_func = M.getFunction("julia.write_barrier_binding");
alloc_obj_func = M.getFunction("julia.gc_alloc_obj");
}

Expand Down Expand Up @@ -117,6 +119,7 @@ namespace jl_intrinsics {
static const char *PUSH_GC_FRAME_NAME = "julia.push_gc_frame";
static const char *POP_GC_FRAME_NAME = "julia.pop_gc_frame";
static const char *QUEUE_GC_ROOT_NAME = "julia.queue_gc_root";
static const char *QUEUE_GC_BINDING_NAME = "julia.queue_gc_binding";

// Annotates a function with attributes suitable for GC allocation
// functions. Specifically, the return value is marked noalias and nonnull.
Expand Down Expand Up @@ -208,12 +211,27 @@ namespace jl_intrinsics {
intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return intrinsic;
});

const IntrinsicDescription queueGCBinding(
QUEUE_GC_BINDING_NAME,
[](const JuliaPassContext &context) {
auto intrinsic = Function::Create(
FunctionType::get(
Type::getVoidTy(context.getLLVMContext()),
{ context.T_prjlvalue },
false),
Function::ExternalLinkage,
QUEUE_GC_BINDING_NAME);
intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return intrinsic;
});
}

namespace jl_well_known {
static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc);
static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc);
static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root);
static const char *GC_QUEUE_BINDING_NAME = XSTR(jl_gc_queue_binding);

using jl_intrinsics::addGCAllocAttributes;

Expand Down Expand Up @@ -248,6 +266,20 @@ namespace jl_well_known {
return addGCAllocAttributes(poolAllocFunc, context.getLLVMContext());
});

const WellKnownFunctionDescription GCQueueBinding(
GC_QUEUE_BINDING_NAME,
[](const JuliaPassContext &context) {
auto func = Function::Create(
FunctionType::get(
Type::getVoidTy(context.getLLVMContext()),
{ context.T_prjlvalue },
false),
Function::ExternalLinkage,
GC_QUEUE_BINDING_NAME);
func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return func;
});

const WellKnownFunctionDescription GCQueueRoot(
GC_QUEUE_ROOT_NAME,
[](const JuliaPassContext &context) {
Expand Down
7 changes: 7 additions & 0 deletions src/llvm-pass-helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ struct JuliaPassContext {
llvm::Function *alloc_obj_func;
llvm::Function *typeof_func;
llvm::Function *write_barrier_func;
llvm::Function *write_barrier_binding_func;

// Creates a pass context. Type and function pointers
// are set to `nullptr`. Metadata nodes are initialized.
Expand Down Expand Up @@ -128,6 +129,9 @@ namespace jl_intrinsics {

// `julia.queue_gc_root`: an intrinsic that queues a GC root.
extern const IntrinsicDescription queueGCRoot;

// `julia.queue_gc_binding`: an intrinsic that queues a binding for GC.
extern const IntrinsicDescription queueGCBinding;
}

// A namespace for well-known Julia runtime function descriptions.
Expand All @@ -148,6 +152,9 @@ namespace jl_well_known {

// `jl_gc_queue_root`: queues a GC root.
extern const WellKnownFunctionDescription GCQueueRoot;

// `jl_gc_queue_binding`: queues a binding for GC.
extern const WellKnownFunctionDescription GCQueueBinding;
}

#endif