diff --git a/src/gc.c b/src/gc.c index 98dcdeb631784..197d1c6ad75c2 100644 --- a/src/gc.c +++ b/src/gc.c @@ -3854,8 +3854,7 @@ static void *gc_perm_alloc_large(size_t sz, int zero, unsigned align, unsigned o errno = last_errno; jl_may_leak(base); assert(align > 0); - unsigned diff = (offset - (uintptr_t)base) % align; - return (void*)((char*)base + diff); + return (void*)(LLT_ALIGN((uintptr_t)base + offset, (uintptr_t)align) - offset); } STATIC_INLINE void *gc_try_perm_alloc_pool(size_t sz, unsigned align, unsigned offset) JL_NOTSAFEPOINT diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 1a385403e9672..099d74ccb37c1 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -1301,6 +1301,69 @@ namespace { JuliaOJIT::ResourcePool> TMs; }; + + struct JITPointersT { + + JITPointersT(SharedBytesT &SharedBytes, std::mutex &Lock) JL_NOTSAFEPOINT + : SharedBytes(SharedBytes), Lock(Lock) {} + + void operator()(Module &M) JL_NOTSAFEPOINT { + std::lock_guard locked(Lock); + for (auto &GV : make_early_inc_range(M.globals())) { + if (auto *Shared = getSharedBytes(GV)) { + ++InternedGlobals; + GV.replaceAllUsesWith(Shared); + GV.eraseFromParent(); + } + } + + // Windows needs some inline asm to help + // build unwind tables + jl_decorate_module(M); + } + + private: + // optimize memory by turning long strings into memoized copies, instead of + // making a copy per object file of output. + // we memoize them using a StringSet with a custom-alignment allocator + // to ensure they are properly aligned + Constant *getSharedBytes(GlobalVariable &GV) JL_NOTSAFEPOINT { + // We could probably technically get away with + // interning even external linkage globals, + // as long as they have global unnamedaddr, + // but currently we shouldn't be emitting those + // except in imaging mode, and we don't want to + // do this optimization there. + if (GV.hasExternalLinkage() || !GV.hasGlobalUnnamedAddr()) { + return nullptr; + } + if (!GV.hasInitializer()) { + return nullptr; + } + if (!GV.isConstant()) { + return nullptr; + } + auto CDS = dyn_cast(GV.getInitializer()); + if (!CDS) { + return nullptr; + } + StringRef Data = CDS->getRawDataValues(); + if (Data.size() < 16) { + // Cutoff, since we don't want to intern small strings + return nullptr; + } + Align Required = GV.getAlign().valueOrOne(); + Align Preferred = MaxAlignedAlloc::alignment(Data.size()); + if (Required > Preferred) + return nullptr; + StringRef Interned = SharedBytes.insert(Data).first->getKey(); + assert(llvm::isAddrAligned(Preferred, Interned.data())); + return literal_static_pointer_val(Interned.data(), GV.getType()); + } + + SharedBytesT &SharedBytes; + std::mutex &Lock; + }; } llvm::DataLayout jl_create_datalayout(TargetMachine &TM) { @@ -1493,8 +1556,7 @@ void JuliaOJIT::addModule(orc::ThreadSafeModule TSM) ++ModulesAdded; orc::SymbolLookupSet NewExports; TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { - jl_decorate_module(M); - shareStrings(M); + JITPointersT(SharedBytes, RLST_mutex)(M); for (auto &F : M.global_values()) { if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) { auto Name = ES.intern(getMangledName(F.getName())); @@ -1820,32 +1882,6 @@ void jl_merge_module(orc::ThreadSafeModule &destTSM, orc::ThreadSafeModule srcTS }); } -// optimize memory by turning long strings into memoized copies, instead of -// making a copy per object file of output. -void JuliaOJIT::shareStrings(Module &M) -{ - ++InternedGlobals; - std::vector erase; - for (auto &GV : M.globals()) { - if (!GV.hasInitializer() || !GV.isConstant()) - continue; - ConstantDataSequential *CDS = dyn_cast(GV.getInitializer()); - if (CDS == nullptr) - continue; - StringRef data = CDS->getRawDataValues(); - if (data.size() > 16) { // only for long strings: keep short ones as values - Type *T_size = Type::getIntNTy(GV.getContext(), sizeof(void*) * 8); - Constant *v = ConstantExpr::getIntToPtr( - ConstantInt::get(T_size, (uintptr_t)(*ES.intern(data)).data()), - GV.getType()); - GV.replaceAllUsesWith(v); - erase.push_back(&GV); - } - } - for (auto GV : erase) - GV->eraseFromParent(); -} - //TargetMachine pass-through methods std::unique_ptr JuliaOJIT::cloneTargetMachine() const diff --git a/src/jitlayers.h b/src/jitlayers.h index 380694af60742..b5160ba1863e8 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -1,6 +1,8 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license #include +#include +#include #include #include @@ -298,6 +300,44 @@ static const inline char *name_from_method_instance(jl_method_instance_t *li) JL return jl_is_method(li->def.method) ? jl_symbol_name(li->def.method->name) : "top-level scope"; } +template +class MaxAlignedAllocImpl + : public AllocatorBase> { + +public: + MaxAlignedAllocImpl() JL_NOTSAFEPOINT = default; + + static Align alignment(size_t Size) JL_NOTSAFEPOINT { + // Define the maximum alignment we expect to require, from offset bytes off + // the returned pointer, this is >= alignof(std::max_align_t), which is too + // small often to actually use. + const size_t MaxAlignment = JL_CACHE_BYTE_ALIGNMENT; + return Align(std::min((size_t)llvm::PowerOf2Ceil(Size), MaxAlignment)); + } + + LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size, Align Alignment) { + Align MaxAlign = alignment(Size); + assert(Alignment < MaxAlign); (void)Alignment; + return jl_gc_perm_alloc(Size, 0, MaxAlign.value(), offset); + } + + inline LLVM_ATTRIBUTE_RETURNS_NONNULL + void * Allocate(size_t Size, size_t Alignment) { + return Allocate(Size, Align(Alignment)); + } + + // Pull in base class overloads. + using AllocatorBase::Allocate; + + void Deallocate(const void *Ptr, size_t Size, size_t /*Alignment*/) { abort(); } + + // Pull in base class overloads. + using AllocatorBase::Deallocate; + +private: +}; +using MaxAlignedAlloc = MaxAlignedAllocImpl<>; + typedef JITSymbol JL_JITSymbol; // The type that is similar to SymbolInfo on LLVM 4.0 is actually // `JITEvaluatedSymbol`. However, we only use this type when a JITSymbol @@ -306,6 +346,7 @@ typedef JITSymbol JL_SymbolInfo; using CompilerResultT = Expected>; using OptimizerResultT = Expected; +using SharedBytesT = StringSet::MapEntryTy)>>; class JuliaOJIT { public: @@ -538,7 +579,6 @@ class JuliaOJIT { private: std::string getMangledName(StringRef Name) JL_NOTSAFEPOINT; std::string getMangledName(const GlobalValue *GV) JL_NOTSAFEPOINT; - void shareStrings(Module &M) JL_NOTSAFEPOINT; const std::unique_ptr TM; const DataLayout DL; @@ -551,6 +591,7 @@ class JuliaOJIT { std::mutex RLST_mutex{}; int RLST_inc = 0; DenseMap ReverseLocalSymbolTable; + SharedBytesT SharedBytes; //Compilation streams jl_locked_stream dump_emitted_mi_name_stream; diff --git a/src/julia_internal.h b/src/julia_internal.h index 51661fa1835f9..4a845350424d4 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -351,7 +351,7 @@ JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT; extern uv_mutex_t gc_perm_lock; void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) JL_NOTSAFEPOINT; -void *jl_gc_perm_alloc(size_t sz, int zero, +JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset) JL_NOTSAFEPOINT; void gc_sweep_sysimg(void);