diff --git a/Make.inc b/Make.inc index 5a0d97ac4843d..c6474cf0a377b 100644 --- a/Make.inc +++ b/Make.inc @@ -767,6 +767,10 @@ ifeq (${MMTK_PLAN},StickyImmix) JCXXFLAGS += -DMMTK_PLAN_STICKYIMMIX JCFLAGS += -DMMTK_PLAN_STICKYIMMIX endif +ifeq (${MMTK_CONSERVATIVE},1) +JCXXFLAGS += -DMMTK_CONSERVATIVE_SCAN +JCFLAGS += -DMMTK_CONSERVATIVE_SCAN +endif MMTK_DIR = ${MMTK_JULIA_DIR}/mmtk MMTK_API_INC = $(MMTK_DIR)/api MMTK_JULIA_INC = ${MMTK_JULIA_DIR}/julia diff --git a/src/datatype.c b/src/datatype.c index 345bab270e912..8667d245462ad 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -276,6 +276,10 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz, if ((void*)ret == HT_NOTFOUND) { if (!should_malloc) { char *perm_mem = (char *)jl_gc_perm_alloc(flddesc_sz, 0, 4, 0); +#ifdef MMTK_GC + jl_ptls_t ptls = jl_current_task->ptls; + mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(perm_mem), flddesc_sz); +#endif assert(perm_mem); ret = (jl_datatype_layout_t *)perm_mem; memcpy(perm_mem, flddesc, flddesc_sz); @@ -815,6 +819,10 @@ JL_DLLEXPORT jl_datatype_t * jl_new_foreign_type(jl_sym_t *name, jl_datatype_layout_t *layout = (jl_datatype_layout_t *) jl_gc_perm_alloc(sizeof(jl_datatype_layout_t) + sizeof(jl_fielddescdyn_t), 0, 4, 0); +#ifdef MMTK_GC + jl_ptls_t ptls = jl_current_task->ptls; + mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(layout), sizeof(jl_datatype_layout_t) + sizeof(jl_fielddescdyn_t)); +#endif layout->size = large ? GC_MAX_SZCLASS+1 : 0; layout->nfields = 0; layout->alignment = sizeof(void *); diff --git a/src/gc.c b/src/gc.c index 74a720260e7d5..03c309716110e 100644 --- a/src/gc.c +++ b/src/gc.c @@ -3578,6 +3578,11 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) return NULL; } +JL_DLLEXPORT void jl_gc_post_alloc_slow(void* obj, int size) JL_NOTSAFEPOINT +{ + jl_unreachable(); +} + JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT { jl_unreachable(); diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 54699ca46c78f..292c71ab54b2e 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -198,6 +198,7 @@ XX(jl_gc_wb_binding_noinline) \ XX(jl_gc_wb1_slow) \ XX(jl_gc_wb2_slow) \ + XX(jl_gc_post_alloc_slow) \ XX(jl_gc_safepoint) \ XX(jl_gc_schedule_foreign_sweepfunc) \ XX(jl_gc_set_cb_notify_external_alloc) \ diff --git a/src/julia.h b/src/julia.h index b23f2bc561502..acc78d5a0c74c 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2438,8 +2438,11 @@ extern JL_DLLEXPORT int jl_default_debug_info_kind; extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr); extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr); extern void* mmtk_alloc(void* mutator, size_t size, size_t align, size_t offset, int allocator); +extern void mmtk_post_alloc(void* mutator, void* refer, size_t bytes, int allocator); + extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS; +extern const void* MMTK_SIDE_VO_BIT_BASE_ADDRESS; // These need to be constants. @@ -2452,6 +2455,12 @@ extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS; #define MMTK_NEEDS_WRITE_BARRIER (1) #endif +#ifdef MMTK_CONSERVATIVE_SCAN +#define MMTK_NEEDS_VO_BIT (1) +#else +#define MMTK_NEEDS_VO_BIT (0) +#endif + #define MMTK_DEFAULT_IMMIX_ALLOCATOR (0) #define MMTK_IMMORTAL_BUMP_ALLOCATOR (0) @@ -2526,8 +2535,23 @@ STATIC_INLINE void* mmtk_immix_alloc_fast(MMTkMutatorContext* mutator, size_t si return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (intptr_t)allocator->limit, size, align, offset, 0); } +STATIC_INLINE void mmtk_immix_post_alloc_slow(MMTkMutatorContext* mutator, void* obj, size_t size) { + mmtk_post_alloc(mutator, obj, size, 0); +} + +STATIC_INLINE void mmtk_set_vo_bit(void* obj) { + intptr_t addr = (intptr_t) obj; + intptr_t shift = (addr >> 3) & 0b111; + uint8_t* vo_meta_addr = (uint8_t*) (MMTK_SIDE_VO_BIT_BASE_ADDRESS) + (addr >> 6); + uint8_t new_val = (*vo_meta_addr) | (1 << shift); + (*vo_meta_addr) = new_val; +} + STATIC_INLINE void mmtk_immix_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { - // We do not need post alloc for immix objects in immix/stickyimmix + if (MMTK_NEEDS_VO_BIT) { + // set VO bit + mmtk_set_vo_bit(obj); + } } STATIC_INLINE void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) { @@ -2536,10 +2560,15 @@ STATIC_INLINE void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t } STATIC_INLINE void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { + if (MMTK_NEEDS_VO_BIT) { + // set VO bit + mmtk_set_vo_bit(obj); + } + if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { intptr_t addr = (intptr_t) obj; - uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6); intptr_t shift = (addr >> 3) & 0b111; + uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6); while(1) { uint8_t old_val = *meta_addr; uint8_t new_val = old_val | (1 << shift); diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 74e69d9d6fa9f..02be0ac3e1922 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -59,6 +59,7 @@ struct FinalLowerGC: private JuliaPassContext { Function *writeBarrierBindingFunc; Function *writeBarrier1SlowFunc; Function *writeBarrier2SlowFunc; + Function *postAllocSlowFunc; #endif Instruction *pgcstack; @@ -373,6 +374,45 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t))); auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType()); + + if (MMTK_NEEDS_VO_BIT) { + // Should we generate fastpath post alloc sequence here? + // Setting this to false will increase allocation overhead a lot, and should only be used for debugging. + const bool INLINE_FASTPATH_POST_ALLOCATION = true; + + // set VO bit + if (INLINE_FASTPATH_POST_ALLOCATION) { + auto intptr_ty = Type::getInt64Ty(target->getContext()); + auto i8_ty = Type::getInt8Ty(F.getContext()); + intptr_t metadata_base_address = reinterpret_cast(MMTK_SIDE_VO_BIT_BASE_ADDRESS); + auto metadata_base_val = ConstantInt::get(intptr_ty, metadata_base_address); + auto metadata_base_ptr = ConstantExpr::getIntToPtr(metadata_base_val, PointerType::get(i8_ty, 0)); + + // intptr_t addr = (intptr_t) v; + auto addr = v_raw; + + // uint8_t* vo_meta_addr = (uint8_t*) (MMTK_SIDE_VO_BIT_BASE_ADDRESS) + (addr >> 6); + auto shr = builder.CreateLShr(addr, ConstantInt::get(intptr_ty, 6)); + auto metadata_ptr = builder.CreateGEP(i8_ty, metadata_base_ptr, shr); + + // intptr_t shift = (addr >> 3) & 0b111; + auto shift = builder.CreateAnd(builder.CreateLShr(addr, ConstantInt::get(intptr_ty, 3)), ConstantInt::get(intptr_ty, 7)); + + // uint8_t byte_val = *vo_meta_addr; + auto byte_val = builder.CreateAlignedLoad(i8_ty, metadata_ptr, Align()); + + // uint8_t new_val = byte_val | (1 << shift); + auto shifted_val = builder.CreateShl(ConstantInt::get(intptr_ty, 1), shift); + auto shifted_val_i8 = builder.CreateTruncOrBitCast(shifted_val, i8_ty); + auto new_val = builder.CreateOr(byte_val, shifted_val_i8); + + // (*vo_meta_addr) = new_val; + builder.CreateStore(new_val, metadata_ptr); + } else { + builder.CreateCall(postAllocSlowFunc, { v_as_ptr, pool_osize_i32 }); + } + } + builder.CreateBr(top_cont); phiNode->addIncoming(new_call, slowpath); @@ -416,7 +456,8 @@ bool FinalLowerGC::doInitialization(Module &M) { writeBarrierBindingFunc = getOrDeclare(jl_well_known::GCWriteBarrierBinding); writeBarrier1SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier1Slow); writeBarrier2SlowFunc = getOrDeclare(jl_well_known::GCWriteBarrier2Slow); - GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, gcPreserveBeginHookFunc, gcPreserveEndHookFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc}; + postAllocSlowFunc = getOrDeclare(jl_well_known::GCPostAllocSlow); + GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, gcPreserveBeginHookFunc, gcPreserveEndHookFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc, postAllocSlowFunc}; #else GlobalValue *functionList[] = {queueRootFunc, queueBindingFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc}; #endif @@ -436,8 +477,8 @@ bool FinalLowerGC::doInitialization(Module &M) { bool FinalLowerGC::doFinalization(Module &M) { #ifdef MMTK_GC - GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, gcPreserveBeginHookFunc, gcPreserveEndHookFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc}; - queueRootFunc = poolAllocFunc = bigAllocFunc = gcPreserveBeginHookFunc = gcPreserveEndHookFunc = writeBarrier1Func = writeBarrier2Func = writeBarrierBindingFunc = writeBarrier1SlowFunc = writeBarrier2SlowFunc = nullptr; + GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, gcPreserveBeginHookFunc, gcPreserveEndHookFunc, writeBarrier1Func, writeBarrier2Func, writeBarrierBindingFunc, writeBarrier1SlowFunc, writeBarrier2SlowFunc, postAllocSlowFunc}; + queueRootFunc = poolAllocFunc = bigAllocFunc = gcPreserveBeginHookFunc = gcPreserveEndHookFunc = writeBarrier1Func = writeBarrier2Func = writeBarrierBindingFunc = writeBarrier1SlowFunc = writeBarrier2SlowFunc = postAllocSlowFunc = nullptr; #else GlobalValue *functionList[] = {queueRootFunc, queueBindingFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc}; queueRootFunc = queueBindingFunc = poolAllocFunc = bigAllocFunc = allocTypedFunc = nullptr; diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index 8d2e0c2d14ca5..7f18bc354831b 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -341,6 +341,7 @@ namespace jl_well_known { static const char *GC_WB_BINDING_NAME = XSTR(jl_gc_wb_binding_noinline); static const char *GC_WB_1_SLOW_NAME = XSTR(jl_gc_wb1_slow); static const char *GC_WB_2_SLOW_NAME = XSTR(jl_gc_wb2_slow); + static const char *GC_POST_ALLOC_SLOW_NAME = XSTR(jl_gc_post_alloc_slow); #endif static auto T_size_t(const JuliaPassContext &context) { @@ -528,5 +529,19 @@ namespace jl_well_known { func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); return func; }); + + const WellKnownFunctionDescription GCPostAllocSlow ( + GC_POST_ALLOC_SLOW_NAME, + [](const JuliaPassContext &context) { + auto func = Function::Create( + FunctionType::get( + Type::getVoidTy(context.getLLVMContext()), + { context.T_prjlvalue, Type::getInt32Ty(context.getLLVMContext()) }, + false), + Function::ExternalLinkage, + GC_POST_ALLOC_SLOW_NAME); + func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + return func; + }); #endif } diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h index 1e672433be596..6c169809a87ee 100644 --- a/src/llvm-pass-helpers.h +++ b/src/llvm-pass-helpers.h @@ -177,6 +177,7 @@ namespace jl_well_known { extern const WellKnownFunctionDescription GCWriteBarrierBinding; extern const WellKnownFunctionDescription GCWriteBarrier1Slow; extern const WellKnownFunctionDescription GCWriteBarrier2Slow; + extern const WellKnownFunctionDescription GCPostAllocSlow; #endif } diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index e0363c8e70386..d18e61db819fc 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -647,6 +647,12 @@ JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFE mmtk_object_reference_write_slow(&ptls->mmtk_mutator, parent, ptr); } +JL_DLLEXPORT void jl_gc_post_alloc_slow(void* obj, int size) JL_NOTSAFEPOINT { + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + mmtk_immix_post_alloc_slow(&ptls->mmtk_mutator, obj, size); +} + void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) { jl_ptls_t ptls = jl_current_task->ptls;