diff --git a/doc/src/devdocs/locks.md b/doc/src/devdocs/locks.md index c15dfd3841047..59dac6ad79498 100644 --- a/doc/src/devdocs/locks.md +++ b/doc/src/devdocs/locks.md @@ -29,6 +29,8 @@ The following are definitely leaf locks (level 1), and must not try to acquire a > * flisp > * jl_in_stackwalk (Win32) > * ResourcePool::mutex +> * RLST_mutex +> * jl_locked_stream::mutex > > > flisp itself is already threadsafe, this lock only protects the `jl_ast_context_list_t` pool > > likewise, the ResourcePool::mutexes just protect the associated resource pool diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 977478107316c..346c08c6a0ef1 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -460,11 +460,11 @@ void jl_dump_native_impl(void *native_code, TheTriple.setOS(llvm::Triple::MacOSX); #endif std::unique_ptr TM( - jl_ExecutionEngine->getTargetMachine().getTarget().createTargetMachine( + jl_ExecutionEngine->getTarget().createTargetMachine( TheTriple.getTriple(), - jl_ExecutionEngine->getTargetMachine().getTargetCPU(), - jl_ExecutionEngine->getTargetMachine().getTargetFeatureString(), - jl_ExecutionEngine->getTargetMachine().Options, + jl_ExecutionEngine->getTargetCPU(), + jl_ExecutionEngine->getTargetFeatureString(), + jl_ExecutionEngine->getTargetOptions(), #if defined(_OS_LINUX_) || defined(_OS_FREEBSD_) Reloc::PIC_, #else @@ -481,7 +481,7 @@ void jl_dump_native_impl(void *native_code, )); legacy::PassManager PM; - addTargetPasses(&PM, TM.get()); + addTargetPasses(&PM, TM->getTargetTriple(), TM->getTargetIRAnalysis()); // set up optimization passes SmallVector bc_Buffer; @@ -502,7 +502,7 @@ void jl_dump_native_impl(void *native_code, PM.add(createBitcodeWriterPass(unopt_bc_OS)); if (bc_fname || obj_fname || asm_fname) { addOptimizationPasses(&PM, jl_options.opt_level, true, true); - addMachinePasses(&PM, TM.get(), jl_options.opt_level); + addMachinePasses(&PM, jl_options.opt_level); } if (bc_fname) PM.add(createBitcodeWriterPass(bc_OS)); @@ -595,14 +595,14 @@ void jl_dump_native_impl(void *native_code, delete data; } -void addTargetPasses(legacy::PassManagerBase *PM, TargetMachine *TM) +void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis) { - PM->add(new TargetLibraryInfoWrapperPass(Triple(TM->getTargetTriple()))); - PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); + PM->add(new TargetLibraryInfoWrapperPass(triple)); + PM->add(createTargetTransformInfoWrapperPass(std::move(analysis))); } -void addMachinePasses(legacy::PassManagerBase *PM, TargetMachine *TM, int optlevel) +void addMachinePasses(legacy::PassManagerBase *PM, int optlevel) { // TODO: don't do this on CPUs that natively support Float16 PM->add(createDemoteFloat16Pass()); @@ -857,9 +857,9 @@ class JuliaPipeline : public Pass { (void)jl_init_llvm(); PMTopLevelManager *TPM = Stack.top()->getTopLevelManager(); TPMAdapter Adapter(TPM); - addTargetPasses(&Adapter, &jl_ExecutionEngine->getTargetMachine()); + addTargetPasses(&Adapter, jl_ExecutionEngine->getTargetTriple(), jl_ExecutionEngine->getTargetIRAnalysis()); addOptimizationPasses(&Adapter, OptLevel, true, dump_native, true); - addMachinePasses(&Adapter, &jl_ExecutionEngine->getTargetMachine(), OptLevel); + addMachinePasses(&Adapter, OptLevel); } JuliaPipeline() : Pass(PT_PassManager, ID) {} Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const override { @@ -993,9 +993,9 @@ void *jl_get_llvmf_defn_impl(jl_method_instance_t *mi, size_t world, char getwra static legacy::PassManager *PM; if (!PM) { PM = new legacy::PassManager(); - addTargetPasses(PM, &jl_ExecutionEngine->getTargetMachine()); + addTargetPasses(PM, jl_ExecutionEngine->getTargetTriple(), jl_ExecutionEngine->getTargetIRAnalysis()); addOptimizationPasses(PM, jl_options.opt_level); - addMachinePasses(PM, &jl_ExecutionEngine->getTargetMachine(), jl_options.opt_level); + addMachinePasses(PM, jl_options.opt_level); } // get the source code for this function diff --git a/src/codegen.cpp b/src/codegen.cpp index b4194945e33cc..f663bf8d9d15a 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -186,11 +186,10 @@ typedef Instruction TerminatorInst; #include "processor.h" #include "julia_assert.h" -JL_STREAM *dump_emitted_mi_name_stream = NULL; extern "C" JL_DLLEXPORT void jl_dump_emitted_mi_name_impl(void *s) { - dump_emitted_mi_name_stream = (JL_STREAM*)s; + **jl_ExecutionEngine->get_dump_emitted_mi_name_stream() = (JL_STREAM*)s; } extern "C" { @@ -7978,15 +7977,16 @@ jl_llvm_functions_t jl_emit_code( "functions compiled with custom codegen params must not be cached"); JL_TRY { decls = emit_function(m, li, src, jlrettype, params); - if (dump_emitted_mi_name_stream != NULL) { - jl_printf(dump_emitted_mi_name_stream, "%s\t", decls.specFunctionObject.c_str()); + auto stream = *jl_ExecutionEngine->get_dump_emitted_mi_name_stream(); + if (stream) { + jl_printf(stream, "%s\t", decls.specFunctionObject.c_str()); // NOTE: We print the Type Tuple without surrounding quotes, because the quotes // break CSV parsing if there are any internal quotes in the Type name (e.g. in // Symbol("...")). The \t delineator should be enough to ensure whitespace is // handled correctly. (And we don't need to worry about any tabs in the printed // string, because tabs are printed as "\t" by `show`.) - jl_static_show(dump_emitted_mi_name_stream, li->specTypes); - jl_printf(dump_emitted_mi_name_stream, "\n"); + jl_static_show(stream, li->specTypes); + jl_printf(stream, "\n"); } } JL_CATCH { diff --git a/src/disasm.cpp b/src/disasm.cpp index ebbcd03d0fc34..fa2e3e819a0be 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -1208,9 +1208,10 @@ jl_value_t *jl_dump_function_asm_impl(void *F, char raw_mc, const char* asm_vari f2.deleteBody(); } }); - LLVMTargetMachine *TM = static_cast(&jl_ExecutionEngine->getTargetMachine()); + auto TMBase = jl_ExecutionEngine->cloneTargetMachine(); + LLVMTargetMachine *TM = static_cast(TMBase.get()); legacy::PassManager PM; - addTargetPasses(&PM, TM); + addTargetPasses(&PM, TM->getTargetTriple(), TM->getTargetIRAnalysis()); if (raw_mc) { raw_svector_ostream obj_OS(ObjBufferSV); if (TM->addPassesToEmitFile(PM, obj_OS, nullptr, CGFT_ObjectFile, false, nullptr)) diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 80c8b928bd022..2c3e8ab576d50 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -54,17 +54,15 @@ using namespace llvm; #define DEBUG_TYPE "jitlayers" // Snooping on which functions are being compiled, and how long it takes -JL_STREAM *dump_compiles_stream = NULL; extern "C" JL_DLLEXPORT void jl_dump_compiles_impl(void *s) { - dump_compiles_stream = (JL_STREAM*)s; + **jl_ExecutionEngine->get_dump_compiles_stream() = (JL_STREAM*)s; } -JL_STREAM *dump_llvm_opt_stream = NULL; extern "C" JL_DLLEXPORT void jl_dump_llvm_opt_impl(void *s) { - dump_llvm_opt_stream = (JL_STREAM*)s; + **jl_ExecutionEngine->get_dump_llvm_opt_stream() = (JL_STREAM*)s; } static void jl_add_to_ee(orc::ThreadSafeModule &M, StringMap &NewExports); @@ -108,7 +106,8 @@ static jl_callptr_t _jl_compile_codeinst( // caller must hold codegen_lock // and have disabled finalizers uint64_t start_time = 0; - if (dump_compiles_stream != NULL) + bool timed = !!*jl_ExecutionEngine->get_dump_compiles_stream(); + if (timed) start_time = jl_hrtime(); assert(jl_is_code_instance(codeinst)); @@ -198,17 +197,18 @@ static jl_callptr_t _jl_compile_codeinst( } uint64_t end_time = 0; - if (dump_compiles_stream != NULL) + if (timed) end_time = jl_hrtime(); // If logging of the compilation stream is enabled, // then dump the method-instance specialization type to the stream jl_method_instance_t *mi = codeinst->def; if (jl_is_method(mi->def.method)) { - if (dump_compiles_stream != NULL) { - jl_printf(dump_compiles_stream, "%" PRIu64 "\t\"", end_time - start_time); - jl_static_show(dump_compiles_stream, mi->specTypes); - jl_printf(dump_compiles_stream, "\"\n"); + auto stream = *jl_ExecutionEngine->get_dump_compiles_stream(); + if (stream) { + jl_printf(stream, "%" PRIu64 "\t\"", end_time - start_time); + jl_static_show(stream, mi->specTypes); + jl_printf(stream, "\"\n"); } } return fptr; @@ -480,13 +480,6 @@ CodeGenOpt::Level CodeGenOptLevelFor(int optlevel) #endif } -static void addPassesForOptLevel(legacy::PassManager &PM, TargetMachine &TM, int optlevel) -{ - addTargetPasses(&PM, &TM); - addOptimizationPasses(&PM, optlevel); - addMachinePasses(&PM, &TM, optlevel); -} - static auto countBasicBlocks(const Function &F) { return std::distance(F.begin(), F.end()); @@ -899,7 +892,9 @@ namespace { } std::unique_ptr operator()() { auto PM = std::make_unique(); - addPassesForOptLevel(*PM, *TM, optlevel); + addTargetPasses(PM.get(), TM->getTargetTriple(), TM->getTargetIRAnalysis()); + addOptimizationPasses(PM.get(), optlevel); + addMachinePasses(PM.get(), optlevel); return PM; } }; @@ -910,24 +905,27 @@ namespace { OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) { TSM.withModuleDo([&](Module &M) { uint64_t start_time = 0; - if (dump_llvm_opt_stream != NULL) { - // Print LLVM function statistics _before_ optimization - // Print all the information about this invocation as a YAML object - jl_printf(dump_llvm_opt_stream, "- \n"); - // We print the name and some statistics for each function in the module, both - // before optimization and again afterwards. - jl_printf(dump_llvm_opt_stream, " before: \n"); - for (auto &F : M.functions()) { - if (F.isDeclaration() || F.getName().startswith("jfptr_")) { - continue; + { + auto stream = *jl_ExecutionEngine->get_dump_llvm_opt_stream(); + if (stream) { + // Print LLVM function statistics _before_ optimization + // Print all the information about this invocation as a YAML object + jl_printf(stream, "- \n"); + // We print the name and some statistics for each function in the module, both + // before optimization and again afterwards. + jl_printf(stream, " before: \n"); + for (auto &F : M.functions()) { + if (F.isDeclaration() || F.getName().startswith("jfptr_")) { + continue; + } + // Each function is printed as a YAML object with several attributes + jl_printf(stream, " \"%s\":\n", F.getName().str().c_str()); + jl_printf(stream, " instructions: %u\n", F.getInstructionCount()); + jl_printf(stream, " basicblocks: %lu\n", countBasicBlocks(F)); } - // Each function is printed as a YAML object with several attributes - jl_printf(dump_llvm_opt_stream, " \"%s\":\n", F.getName().str().c_str()); - jl_printf(dump_llvm_opt_stream, " instructions: %u\n", F.getInstructionCount()); - jl_printf(dump_llvm_opt_stream, " basicblocks: %lu\n", countBasicBlocks(F)); - } - start_time = jl_hrtime(); + start_time = jl_hrtime(); + } } JL_TIMING(LLVM_OPT); @@ -936,20 +934,23 @@ namespace { (***PMs).run(M); uint64_t end_time = 0; - if (dump_llvm_opt_stream != NULL) { - end_time = jl_hrtime(); - jl_printf(dump_llvm_opt_stream, " time_ns: %" PRIu64 "\n", end_time - start_time); - jl_printf(dump_llvm_opt_stream, " optlevel: %d\n", optlevel); - - // Print LLVM function statistics _after_ optimization - jl_printf(dump_llvm_opt_stream, " after: \n"); - for (auto &F : M.functions()) { - if (F.isDeclaration() || F.getName().startswith("jfptr_")) { - continue; + { + auto stream = *jl_ExecutionEngine->get_dump_llvm_opt_stream(); + if (stream) { + end_time = jl_hrtime(); + jl_printf(stream, " time_ns: %" PRIu64 "\n", end_time - start_time); + jl_printf(stream, " optlevel: %d\n", optlevel); + + // Print LLVM function statistics _after_ optimization + jl_printf(stream, " after: \n"); + for (auto &F : M.functions()) { + if (F.isDeclaration() || F.getName().startswith("jfptr_")) { + continue; + } + jl_printf(stream, " \"%s\":\n", F.getName().str().c_str()); + jl_printf(stream, " instructions: %u\n", F.getInstructionCount()); + jl_printf(stream, " basicblocks: %lu\n", countBasicBlocks(F)); } - jl_printf(dump_llvm_opt_stream, " \"%s\":\n", F.getName().str().c_str()); - jl_printf(dump_llvm_opt_stream, " instructions: %u\n", F.getInstructionCount()); - jl_printf(dump_llvm_opt_stream, " basicblocks: %lu\n", countBasicBlocks(F)); } } }); @@ -1166,7 +1167,7 @@ uint64_t JuliaOJIT::getFunctionAddress(StringRef Name) StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst) { - static int globalUnique = 0; + std::lock_guard lock(RLST_mutex); std::string *fname = &ReverseLocalSymbolTable[(void*)(uintptr_t)Addr]; if (fname->empty()) { std::string string_fname; @@ -1186,7 +1187,7 @@ StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *cod stream_fname << "jlsys_"; } const char* unadorned_name = jl_symbol_name(codeinst->def->def.method->name); - stream_fname << unadorned_name << "_" << globalUnique++; + stream_fname << unadorned_name << "_" << RLST_inc++; *fname = std::move(stream_fname.str()); // store to ReverseLocalSymbolTable addGlobalMapping(*fname, Addr); } @@ -1232,16 +1233,6 @@ const DataLayout& JuliaOJIT::getDataLayout() const return DL; } -TargetMachine &JuliaOJIT::getTargetMachine() -{ - return *TM; -} - -const Triple& JuliaOJIT::getTargetTriple() const -{ - return TM->getTargetTriple(); -} - std::string JuliaOJIT::getMangledName(StringRef Name) { SmallString<128> FullName; @@ -1412,6 +1403,40 @@ void JuliaOJIT::shareStrings(Module &M) GV->eraseFromParent(); } +//TargetMachine pass-through methods + +std::unique_ptr JuliaOJIT::cloneTargetMachine() const +{ + return std::unique_ptr(getTarget() + .createTargetMachine( + getTargetTriple().str(), + getTargetCPU(), + getTargetFeatureString(), + getTargetOptions(), + TM->getRelocationModel(), + TM->getCodeModel(), + TM->getOptLevel())); +} + +const Triple& JuliaOJIT::getTargetTriple() const { + return TM->getTargetTriple(); +} +StringRef JuliaOJIT::getTargetFeatureString() const { + return TM->getTargetFeatureString(); +} +StringRef JuliaOJIT::getTargetCPU() const { + return TM->getTargetCPU(); +} +const TargetOptions &JuliaOJIT::getTargetOptions() const { + return TM->Options; +} +const Target &JuliaOJIT::getTarget() const { + return TM->getTarget(); +} +TargetIRAnalysis JuliaOJIT::getTargetIRAnalysis() const { + return TM->getTargetIRAnalysis(); +} + static void jl_decorate_module(Module &M) { #if defined(_CPU_X86_64_) && defined(_OS_WINDOWS_) // Add special values used by debuginfo to build the UnwindData table registration for Win64 diff --git a/src/jitlayers.h b/src/jitlayers.h index d1a886f87d96b..dde8ebf13ca9b 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -49,9 +49,9 @@ using namespace llvm; extern "C" jl_cgparams_t jl_default_cgparams; -void addTargetPasses(legacy::PassManagerBase *PM, TargetMachine *TM); +void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis); void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, bool lower_intrinsics=true, bool dump_native=false, bool external_use=false); -void addMachinePasses(legacy::PassManagerBase *PM, TargetMachine *TM, int optlevel); +void addMachinePasses(legacy::PassManagerBase *PM, int optlevel); void jl_finalize_module(orc::ThreadSafeModule m); void jl_merge_module(orc::ThreadSafeModule &dest, orc::ThreadSafeModule src); GlobalVariable *jl_emit_RTLD_DEFAULT_var(Module *M); @@ -61,6 +61,34 @@ static inline bool imaging_default() { return jl_options.image_codegen || (jl_generating_output() && !jl_options.incremental); } +struct jl_locked_stream { + JL_STREAM *stream = nullptr; + std::mutex mutex; + + struct lock { + std::unique_lock lck; + JL_STREAM *&stream; + + lock(std::mutex &mutex, JL_STREAM *&stream) : lck(mutex), stream(stream) {} + + JL_STREAM *&operator*() { + return stream; + } + + explicit operator bool() { + return !!stream; + } + + operator JL_STREAM *() { + return stream; + } + }; + + lock operator*() { + return lock(mutex, stream); + } +}; + typedef struct _jl_llvm_functions_t { std::string functionObject; // jlcall llvm Function name std::string specFunctionObject; // specialized llvm Function name @@ -288,14 +316,18 @@ class JuliaOJIT { struct OptSelLayerT : orc::IRLayer { template - OptSelLayerT(std::unique_ptr (&optimizers)[N]) : orc::IRLayer(optimizers[0]->OptimizeLayer.getExecutionSession(), optimizers[0]->OptimizeLayer.getManglingOptions()), optimizers(optimizers), count(N) { + OptSelLayerT(const std::array, N> &optimizers) + : orc::IRLayer(optimizers[0]->OptimizeLayer.getExecutionSession(), + optimizers[0]->OptimizeLayer.getManglingOptions()), + optimizers(optimizers.data()), + count(N) { static_assert(N > 0, "Expected array with at least one optimizer!"); } void emit(std::unique_ptr R, orc::ThreadSafeModule TSM) override; private: - std::unique_ptr *optimizers; + const std::unique_ptr * const optimizers; size_t count; }; @@ -332,20 +364,38 @@ class JuliaOJIT { ContextPool.release(std::move(ctx)); } const DataLayout& getDataLayout() const; - TargetMachine &getTargetMachine(); + + // TargetMachine pass-through methods + std::unique_ptr cloneTargetMachine() const; const Triple& getTargetTriple() const; + StringRef getTargetFeatureString() const; + StringRef getTargetCPU() const; + const TargetOptions &getTargetOptions() const; + const Target &getTarget() const; + TargetIRAnalysis getTargetIRAnalysis() const; + size_t getTotalBytes() const; JITDebugInfoRegistry &getDebugInfoRegistry() JL_NOTSAFEPOINT { return DebugRegistry; } + + jl_locked_stream &get_dump_emitted_mi_name_stream() JL_NOTSAFEPOINT { + return dump_emitted_mi_name_stream; + } + jl_locked_stream &get_dump_compiles_stream() JL_NOTSAFEPOINT { + return dump_compiles_stream; + } + jl_locked_stream &get_dump_llvm_opt_stream() JL_NOTSAFEPOINT { + return dump_llvm_opt_stream; + } private: std::string getMangledName(StringRef Name); std::string getMangledName(const GlobalValue *GV); void shareStrings(Module &M); - std::unique_ptr TM; - DataLayout DL; + const std::unique_ptr TM; + const DataLayout DL; orc::ExecutionSession ES; orc::JITDylib &GlobalJD; @@ -353,16 +403,24 @@ class JuliaOJIT { JITDebugInfoRegistry DebugRegistry; + //Map and inc are guarded by RLST_mutex + std::mutex RLST_mutex{}; + int RLST_inc = 0; + DenseMap ReverseLocalSymbolTable; + + //Compilation streams + jl_locked_stream dump_emitted_mi_name_stream; + jl_locked_stream dump_compiles_stream; + jl_locked_stream dump_llvm_opt_stream; + ResourcePool ContextPool; #ifndef JL_USE_JITLINK - std::shared_ptr MemMgr; + const std::shared_ptr MemMgr; #endif ObjLayerT ObjectLayer; - std::unique_ptr Pipelines[4]; + const std::array, 4> Pipelines; OptSelLayerT OptSelLayer; - - DenseMap ReverseLocalSymbolTable; }; extern JuliaOJIT *jl_ExecutionEngine; orc::ThreadSafeModule jl_create_llvm_module(StringRef name, orc::ThreadSafeContext ctx, bool imaging_mode, const DataLayout &DL = jl_ExecutionEngine->getDataLayout(), const Triple &triple = jl_ExecutionEngine->getTargetTriple()); diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp index 4f98f09309ba9..75ac96e1b3060 100644 --- a/src/llvm-cpufeatures.cpp +++ b/src/llvm-cpufeatures.cpp @@ -61,7 +61,7 @@ bool have_fma(Function &intr, Function &caller) { Attribute FSAttr = caller.getFnAttribute("target-features"); StringRef FS = - FSAttr.isValid() ? FSAttr.getValueAsString() : jl_ExecutionEngine->getTargetMachine().getTargetFeatureString(); + FSAttr.isValid() ? FSAttr.getValueAsString() : jl_ExecutionEngine->getTargetFeatureString(); SmallVector Features; FS.split(Features, ',');