Skip to content

Commit

Permalink
Merge pull request #59 from udesou/support-vo-bit
Browse files Browse the repository at this point in the history
Setting up post alloc function to set VO bit
  • Loading branch information
udesou authored Jul 9, 2024
2 parents 5c9b370 + 82347b3 commit 084d8a0
Show file tree
Hide file tree
Showing 9 changed files with 115 additions and 5 deletions.
4 changes: 4 additions & 0 deletions Make.inc
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,10 @@ ifeq (${MMTK_PLAN},StickyImmix)
JCXXFLAGS += -DMMTK_PLAN_STICKYIMMIX
JCFLAGS += -DMMTK_PLAN_STICKYIMMIX
endif
ifeq (${MMTK_CONSERVATIVE},1)
JCXXFLAGS += -DMMTK_CONSERVATIVE_SCAN
JCFLAGS += -DMMTK_CONSERVATIVE_SCAN
endif
MMTK_DIR = ${MMTK_JULIA_DIR}/mmtk
MMTK_API_INC = $(MMTK_DIR)/api
MMTK_JULIA_INC = ${MMTK_JULIA_DIR}/julia
Expand Down
8 changes: 8 additions & 0 deletions src/datatype.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,10 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz,
if ((void*)ret == HT_NOTFOUND) {
if (!should_malloc) {
char *perm_mem = (char *)jl_gc_perm_alloc(flddesc_sz, 0, 4, 0);
#ifdef MMTK_GC
jl_ptls_t ptls = jl_current_task->ptls;
mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(perm_mem), flddesc_sz);
#endif
assert(perm_mem);
ret = (jl_datatype_layout_t *)perm_mem;
memcpy(perm_mem, flddesc, flddesc_sz);
Expand Down Expand Up @@ -815,6 +819,10 @@ JL_DLLEXPORT jl_datatype_t * jl_new_foreign_type(jl_sym_t *name,
jl_datatype_layout_t *layout = (jl_datatype_layout_t *)
jl_gc_perm_alloc(sizeof(jl_datatype_layout_t) + sizeof(jl_fielddescdyn_t),
0, 4, 0);
#ifdef MMTK_GC
jl_ptls_t ptls = jl_current_task->ptls;
mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(layout), sizeof(jl_datatype_layout_t) + sizeof(jl_fielddescdyn_t));
#endif
layout->size = large ? GC_MAX_SZCLASS+1 : 0;
layout->nfields = 0;
layout->alignment = sizeof(void *);
Expand Down
5 changes: 5 additions & 0 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -3578,6 +3578,11 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
return NULL;
}

JL_DLLEXPORT void jl_gc_post_alloc_slow(void* obj, int size) JL_NOTSAFEPOINT
{
jl_unreachable();
}

JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT
{
jl_unreachable();
Expand Down
1 change: 1 addition & 0 deletions src/jl_exported_funcs.inc
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@
XX(jl_gc_wb_binding_noinline) \
XX(jl_gc_wb1_slow) \
XX(jl_gc_wb2_slow) \
XX(jl_gc_post_alloc_slow) \
XX(jl_gc_safepoint) \
XX(jl_gc_schedule_foreign_sweepfunc) \
XX(jl_gc_set_cb_notify_external_alloc) \
Expand Down
33 changes: 31 additions & 2 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -2438,8 +2438,11 @@ extern JL_DLLEXPORT int jl_default_debug_info_kind;
extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr);
extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr);
extern void* mmtk_alloc(void* mutator, size_t size, size_t align, size_t offset, int allocator);
extern void mmtk_post_alloc(void* mutator, void* refer, size_t bytes, int allocator);


extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS;
extern const void* MMTK_SIDE_VO_BIT_BASE_ADDRESS;

// These need to be constants.

Expand All @@ -2452,6 +2455,12 @@ extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS;
#define MMTK_NEEDS_WRITE_BARRIER (1)
#endif

#ifdef MMTK_CONSERVATIVE_SCAN
#define MMTK_NEEDS_VO_BIT (1)
#else
#define MMTK_NEEDS_VO_BIT (0)
#endif

#define MMTK_DEFAULT_IMMIX_ALLOCATOR (0)
#define MMTK_IMMORTAL_BUMP_ALLOCATOR (0)

Expand Down Expand Up @@ -2526,8 +2535,23 @@ STATIC_INLINE void* mmtk_immix_alloc_fast(MMTkMutatorContext* mutator, size_t si
return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (intptr_t)allocator->limit, size, align, offset, 0);
}

STATIC_INLINE void mmtk_immix_post_alloc_slow(MMTkMutatorContext* mutator, void* obj, size_t size) {
mmtk_post_alloc(mutator, obj, size, 0);
}

STATIC_INLINE void mmtk_set_vo_bit(void* obj) {
intptr_t addr = (intptr_t) obj;
intptr_t shift = (addr >> 3) & 0b111;
uint8_t* vo_meta_addr = (uint8_t*) (MMTK_SIDE_VO_BIT_BASE_ADDRESS) + (addr >> 6);
uint8_t new_val = (*vo_meta_addr) | (1 << shift);
(*vo_meta_addr) = new_val;
}

STATIC_INLINE void mmtk_immix_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) {
// We do not need post alloc for immix objects in immix/stickyimmix
if (MMTK_NEEDS_VO_BIT) {
// set VO bit
mmtk_set_vo_bit(obj);
}
}

STATIC_INLINE void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) {
Expand All @@ -2536,10 +2560,15 @@ STATIC_INLINE void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t
}

STATIC_INLINE void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) {
if (MMTK_NEEDS_VO_BIT) {
// set VO bit
mmtk_set_vo_bit(obj);
}

if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
intptr_t addr = (intptr_t) obj;
uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6);
intptr_t shift = (addr >> 3) & 0b111;
uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6);
while(1) {
uint8_t old_val = *meta_addr;
uint8_t new_val = old_val | (1 << shift);
Expand Down
47 changes: 44 additions & 3 deletions src/llvm-final-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ struct FinalLowerGC: private JuliaPassContext {
Function *writeBarrierBindingFunc;
Function *writeBarrier1SlowFunc;
Function *writeBarrier2SlowFunc;
Function *postAllocSlowFunc;
#endif
Instruction *pgcstack;

Expand Down Expand Up @@ -373,6 +374,45 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)

auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t)));
auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType());

if (MMTK_NEEDS_VO_BIT) {
// Should we generate fastpath post alloc sequence here?
// Setting this to false will increase allocation overhead a lot, and should only be used for debugging.
const bool INLINE_FASTPATH_POST_ALLOCATION = true;

// set VO bit
if (INLINE_FASTPATH_POST_ALLOCATION) {
auto intptr_ty = Type::getInt64Ty(target->getContext());
auto i8_ty = Type::getInt8Ty(F.getContext());
intptr_t metadata_base_address = reinterpret_cast<intptr_t>(MMTK_SIDE_VO_BIT_BASE_ADDRESS);
auto metadata_base_val = ConstantInt::get(intptr_ty, metadata_base_address);
auto metadata_base_ptr = ConstantExpr::getIntToPtr(metadata_base_val, PointerType::get(i8_ty, 0));

// intptr_t addr = (intptr_t) v;
auto addr = v_raw;

// uint8_t* vo_meta_addr = (uint8_t*) (MMTK_SIDE_VO_BIT_BASE_ADDRESS) + (addr >> 6);
auto shr = builder.CreateLShr(addr, ConstantInt::get(intptr_ty, 6));
auto metadata_ptr = builder.CreateGEP(i8_ty, metadata_base_ptr, shr);

// intptr_t shift = (addr >> 3) & 0b111;
auto shift = builder.CreateAnd(builder.CreateLShr(addr, ConstantInt::get(intptr_ty, 3)), ConstantInt::get(intptr_ty, 7));

// uint8_t byte_val = *vo_meta_addr;
auto byte_val = builder.CreateAlignedLoad(i8_ty, metadata_ptr, Align());

// uint8_t new_val = byte_val | (1 << shift);
auto shifted_val = builder.CreateShl(ConstantInt::get(intptr_ty, 1), shift);
auto shifted_val_i8 = builder.CreateTruncOrBitCast(shifted_val, i8_ty);
auto new_val = builder.CreateOr(byte_val, shifted_val_i8);

// (*vo_meta_addr) = new_val;
builder.CreateStore(new_val, metadata_ptr);
} else {
builder.CreateCall(postAllocSlowFunc, { v_as_ptr, pool_osize_i32 });
}
}

builder.CreateBr(top_cont);

phiNode->addIncoming(new_call, slowpath);
Expand Down Expand Up @@ -416,7 +456,8 @@ bool FinalLowerGC::doInitialization(Module &M) {
writeBarrierBindingFunc = getOrDeclare(jl_well_known::GCWriteBarrierBinding);
writeBarrier1SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier1Slow);
writeBarrier2SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier2Slow);
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, gcPreserveBeginHookFunc, gcPreserveEndHookFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc};
postAllocSlowFunc = getOrDeclare(jl_well_known::GCPostAllocSlow);
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, gcPreserveBeginHookFunc, gcPreserveEndHookFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc, postAllocSlowFunc};
#else
GlobalValue *functionList[] = {queueRootFunc, queueBindingFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc};
#endif
Expand All @@ -436,8 +477,8 @@ bool FinalLowerGC::doInitialization(Module &M) {
bool FinalLowerGC::doFinalization(Module &M)
{
#ifdef MMTK_GC
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, gcPreserveBeginHookFunc, gcPreserveEndHookFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc};
queueRootFunc = poolAllocFunc = bigAllocFunc = gcPreserveBeginHookFunc = gcPreserveEndHookFunc = writeBarrier1Func = writeBarrier2Func = writeBarrierBindingFunc = writeBarrier1SlowFunc = writeBarrier2SlowFunc = nullptr;
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, gcPreserveBeginHookFunc, gcPreserveEndHookFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc, postAllocSlowFunc};
queueRootFunc = poolAllocFunc = bigAllocFunc = gcPreserveBeginHookFunc = gcPreserveEndHookFunc = writeBarrier1Func = writeBarrier2Func = writeBarrierBindingFunc = writeBarrier1SlowFunc = writeBarrier2SlowFunc = postAllocSlowFunc = nullptr;
#else
GlobalValue *functionList[] = {queueRootFunc, queueBindingFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc};
queueRootFunc = queueBindingFunc = poolAllocFunc = bigAllocFunc = allocTypedFunc = nullptr;
Expand Down
15 changes: 15 additions & 0 deletions src/llvm-pass-helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,7 @@ namespace jl_well_known {
static const char *GC_WB_BINDING_NAME = XSTR(jl_gc_wb_binding_noinline);
static const char *GC_WB_1_SLOW_NAME = XSTR(jl_gc_wb1_slow);
static const char *GC_WB_2_SLOW_NAME = XSTR(jl_gc_wb2_slow);
static const char *GC_POST_ALLOC_SLOW_NAME = XSTR(jl_gc_post_alloc_slow);
#endif

static auto T_size_t(const JuliaPassContext &context) {
Expand Down Expand Up @@ -528,5 +529,19 @@ namespace jl_well_known {
func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return func;
});

const WellKnownFunctionDescription GCPostAllocSlow (
GC_POST_ALLOC_SLOW_NAME,
[](const JuliaPassContext &context) {
auto func = Function::Create(
FunctionType::get(
Type::getVoidTy(context.getLLVMContext()),
{ context.T_prjlvalue, Type::getInt32Ty(context.getLLVMContext()) },
false),
Function::ExternalLinkage,
GC_POST_ALLOC_SLOW_NAME);
func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return func;
});
#endif
}
1 change: 1 addition & 0 deletions src/llvm-pass-helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ namespace jl_well_known {
extern const WellKnownFunctionDescription GCWriteBarrierBinding;
extern const WellKnownFunctionDescription GCWriteBarrier1Slow;
extern const WellKnownFunctionDescription GCWriteBarrier2Slow;
extern const WellKnownFunctionDescription GCPostAllocSlow;
#endif
}

Expand Down
6 changes: 6 additions & 0 deletions src/mmtk-gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,12 @@ JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFE
mmtk_object_reference_write_slow(&ptls->mmtk_mutator, parent, ptr);
}

JL_DLLEXPORT void jl_gc_post_alloc_slow(void* obj, int size) JL_NOTSAFEPOINT {
jl_task_t *ct = jl_current_task;
jl_ptls_t ptls = ct->ptls;
mmtk_immix_post_alloc_slow(&ptls->mmtk_mutator, obj, size);
}

void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset)
{
jl_ptls_t ptls = jl_current_task->ptls;
Expand Down

0 comments on commit 084d8a0

Please sign in to comment.