Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize write barrier when the child is a perminately allocated object #37043

Merged
merged 1 commit into from
Aug 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 43 additions & 6 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,17 @@ static Value *julia_binding_gv(jl_codectx_t &ctx, jl_binding_t *b)

// --- mapping between julia and llvm types ---

static bool type_is_permalloc(jl_value_t *typ)
{
// Singleton should almost always be handled by the later optimization passes.
// Also do it here since it is cheap and save some effort in LLVM passes.
if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ))
return true;
return typ == (jl_value_t*)jl_symbol_type ||
typ == (jl_value_t*)jl_int8_type ||
typ == (jl_value_t*)jl_uint8_type;
}

static unsigned convert_struct_offset(Type *lty, unsigned byte_offset)
{
const DataLayout &DL = jl_data_layout;
Expand Down Expand Up @@ -1277,7 +1288,7 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v
static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_value_t *jt, Value* dest = NULL, MDNode *tbaa_dest = nullptr, bool isVolatile = false);
static void emit_write_barrier(jl_codectx_t&, Value*, ArrayRef<Value*>);
static void emit_write_barrier(jl_codectx_t&, Value*, Value*);
static void emit_write_multibarrier(jl_codectx_t&, Value*, Value*);
static void emit_write_multibarrier(jl_codectx_t&, Value*, Value*, jl_value_t*);

std::vector<unsigned> first_ptr(Type *T)
{
Expand Down Expand Up @@ -1399,8 +1410,8 @@ static void typed_store(jl_codectx_t &ctx,
tbaa_decorate(tbaa, store);
if (parent != NULL) {
if (!isboxed)
emit_write_multibarrier(ctx, parent, r);
else
emit_write_multibarrier(ctx, parent, r, rhs.typ);
else if (!type_is_permalloc(rhs.typ))
emit_write_barrier(ctx, parent, r);
}
}
Expand Down Expand Up @@ -2605,9 +2616,35 @@ static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, ArrayRef<Value*
ctx.builder.CreateCall(prepare_call(jl_write_barrier_func), decay_ptrs);
}

static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, Value *agg)
static void find_perm_offsets(jl_datatype_t *typ, SmallVector<unsigned,4> &res, unsigned offset)
{
auto ptrs = ExtractTrackedValues(agg, agg->getType(), false, ctx.builder);
// This is a inlined field at `offset`.
if (!typ->layout || typ->layout->npointers == 0)
return;
size_t nf = jl_svec_len(typ->types);
for (size_t i = 0; i < nf; i++) {
jl_value_t *_fld = jl_svecref(typ->types, i);
if (!jl_is_datatype(_fld))
continue;
jl_datatype_t *fld = (jl_datatype_t*)_fld;
if (jl_field_isptr(typ, i)) {
// pointer field, check if field is perm-alloc
if (type_is_permalloc((jl_value_t*)fld))
res.push_back(offset + jl_field_offset(typ, i));
continue;
}
// inline field
find_perm_offsets(fld, res, offset + jl_field_offset(typ, i));
}
}

static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, Value *agg,
jl_value_t *jltype)
{
SmallVector<unsigned,4> perm_offsets;
if (jltype && jl_is_datatype(jltype) && ((jl_datatype_t*)jltype)->layout)
find_perm_offsets((jl_datatype_t*)jltype, perm_offsets, 0);
auto ptrs = ExtractTrackedValues(agg, agg->getType(), false, ctx.builder, perm_offsets);
emit_write_barrier(ctx, parent, ptrs);
}

Expand All @@ -2633,7 +2670,7 @@ static void emit_setfield(jl_codectx_t &ctx,
emit_bitcast(ctx, addr, T_pprjlvalue),
sizeof(jl_value_t*))))
->setOrdering(AtomicOrdering::Unordered);
if (wb && strct.isboxed)
if (wb && strct.isboxed && !type_is_permalloc(rhs.typ))
emit_write_barrier(ctx, boxed(ctx, strct), r);
}
else if (jl_is_uniontype(jfty)) {
Expand Down
5 changes: 3 additions & 2 deletions src/codegen_shared.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

#include <utility>
#include <llvm/ADT/ArrayRef.h>
#include <llvm/Support/Debug.h>
#include <llvm/IR/DebugLoc.h>
#include <llvm/IR/IRBuilder.h>
Expand Down Expand Up @@ -30,10 +31,10 @@ struct CountTrackedPointers {

#if JL_LLVM_VERSION >= 110000
unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::IRBuilder<> &irbuilder);
std::vector<llvm::Value*> ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> &irbuilder);
std::vector<llvm::Value*> ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> &irbuilder, llvm::ArrayRef<unsigned> perm_offsets={});
#else
unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::IRBuilder<> irbuilder);
std::vector<llvm::Value*> ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> irbuilder);
std::vector<llvm::Value*> ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> irbuilder, llvm::ArrayRef<unsigned> perm_offsets={});
#endif

static inline void llvm_dump(llvm::Value *v)
Expand Down
38 changes: 36 additions & 2 deletions src/llvm-late-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1591,15 +1591,49 @@ static Value *ExtractScalar(Value *V, Type *VTy, bool isptr, ArrayRef<unsigned>
return V;
}

static unsigned getFieldOffset(const DataLayout &DL, Type *STy, ArrayRef<unsigned> Idxs)
{
SmallVector<Value*,4> IdxList{Idxs.size() + 1};
Type *T_int32 = Type::getInt32Ty(STy->getContext());
IdxList[0] = ConstantInt::get(T_int32, 0);
for (unsigned j = 0; j < Idxs.size(); ++j)
IdxList[j + 1] = ConstantInt::get(T_int32, Idxs[j]);
auto offset = DL.getIndexedOffsetInType(STy, IdxList);
assert(offset >= 0);
return (unsigned)offset;
}

#if JL_LLVM_VERSION >= 110000
std::vector<Value*> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBuilder<> &irbuilder) {
std::vector<Value*> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBuilder<> &irbuilder, ArrayRef<unsigned> perm_offsets) {
#else
std::vector<Value*> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBuilder<> irbuilder) {
std::vector<Value*> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBuilder<> irbuilder, ArrayRef<unsigned> perm_offsets) {
#endif
auto Tracked = TrackCompositeType(STy);
std::vector<Value*> Ptrs;
unsigned perm_idx = 0;
auto ignore_field = [&] (ArrayRef<unsigned> Idxs) {
if (perm_idx >= perm_offsets.size())
return false;
// Assume the indices returned from `TrackCompositeType` is ordered and do a
// single pass over `perm_offsets`.
assert(!isptr);
auto offset = getFieldOffset(irbuilder.GetInsertBlock()->getModule()->getDataLayout(),
STy, Idxs);
do {
auto perm_offset = perm_offsets[perm_idx];
if (perm_offset > offset)
return false;
perm_idx++;
if (perm_offset == offset) {
return true;
}
} while (perm_idx < perm_offsets.size());
return false;
};
for (unsigned i = 0; i < Tracked.size(); ++i) {
auto Idxs = makeArrayRef(Tracked[i]);
if (ignore_field(Idxs))
continue;
Value *Elem = ExtractScalar(Src, STy, isptr, Idxs, irbuilder);
Ptrs.push_back(Elem);
}
Expand Down