diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 8c2be8d42ea43..e96c57173962a 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -685,32 +685,70 @@ static auto countBasicBlocks(const Function &F) JL_NOTSAFEPOINT return std::distance(F.begin(), F.end()); } -void JuliaOJIT::OptSelLayerT::emit(std::unique_ptr R, orc::ThreadSafeModule TSM) { - ++ModulesOptimized; - size_t optlevel = SIZE_MAX; - TSM.withModuleDo([&](Module &M) { - if (jl_generating_output()) { - optlevel = 0; +static constexpr size_t N_optlevels = 4; + +static Expected validateExternRelocations(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { +#if !defined(JL_NDEBUG) && !defined(JL_USE_JITLINK) + auto isIntrinsicFunction = [](GlobalObject &GO) JL_NOTSAFEPOINT { + auto F = dyn_cast(&GO); + if (!F) + return false; + return F->isIntrinsic() || F->getName().startswith("julia."); + }; + // validate the relocations for M (only for RuntimeDyld, JITLink performs its own symbol validation) + auto Err = TSM.withModuleDo([isIntrinsicFunction](Module &M) JL_NOTSAFEPOINT { + Error Err = Error::success(); + for (auto &GO : make_early_inc_range(M.global_objects())) { + if (GO.isDeclaration()) { + if (GO.use_empty()) + GO.eraseFromParent(); + else if (!isIntrinsicFunction(GO) && + !jl_ExecutionEngine->findUnmangledSymbol(GO.getName()) && + !SectionMemoryManager::getSymbolAddressInProcess( + jl_ExecutionEngine->getMangledName(GO.getName()))) { + Err = joinErrors(std::move(Err), make_error( + "Symbol \"" + GO.getName().str() + "\" not found", + inconvertibleErrorCode())); + } + } } - else { - optlevel = std::max(static_cast(jl_options.opt_level), 0); - size_t optlevel_min = std::max(static_cast(jl_options.opt_level_min), 0); - for (auto &F : M.functions()) { - if (!F.getBasicBlockList().empty()) { + return Err; + }); + if (Err) { + return std::move(Err); + } +#endif + return std::move(TSM); +} + +static Expected selectOptLevel(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) { + TSM.withModuleDo([](Module &M) { + size_t opt_level = std::max(static_cast(jl_options.opt_level), 0); + do { + if (jl_generating_output()) { + opt_level = 0; + break; + } + size_t opt_level_min = std::max(static_cast(jl_options.opt_level_min), 0); + for (auto &F : M) { + if (!F.isDeclaration()) { Attribute attr = F.getFnAttribute("julia-optimization-level"); StringRef val = attr.getValueAsString(); if (val != "") { size_t ol = (size_t)val[0] - '0'; - if (ol < optlevel) - optlevel = ol; + if (ol < opt_level) + opt_level = ol; } } } - optlevel = std::min(std::max(optlevel, optlevel_min), this->count); - } + if (opt_level < opt_level_min) + opt_level = opt_level_min; + } while (0); + // currently -O3 is max + opt_level = std::min(opt_level, N_optlevels - 1); + M.addModuleFlag(Module::Warning, "julia.optlevel", opt_level); }); - assert(optlevel != SIZE_MAX && "Failed to select a valid optimization level!"); - this->optimizers[optlevel]->OptimizeLayer.emit(std::move(R), std::move(TSM)); + return std::move(TSM); } void jl_register_jit_object(const object::ObjectFile &debugObj, @@ -1193,6 +1231,7 @@ namespace { auto operator()() JL_NOTSAFEPOINT { auto NPM = std::make_unique(cantFail(JTMB.createTargetMachine()), O); + // TODO this needs to be locked, as different resource pools may add to the printer vector at the same time printers.push_back([NPM = NPM.get()]() JL_NOTSAFEPOINT { NPM->printTimers(); }); @@ -1201,38 +1240,45 @@ namespace { }; #endif + template struct OptimizerT { - OptimizerT(TargetMachine &TM, int optlevel, std::vector> &printers) JL_NOTSAFEPOINT - : optlevel(optlevel), PMs(PMCreator(TM, optlevel, printers)) {} - OptimizerT(OptimizerT&) JL_NOTSAFEPOINT = delete; - OptimizerT(OptimizerT&&) JL_NOTSAFEPOINT = default; + OptimizerT(TargetMachine &TM, std::vector> &printers) JL_NOTSAFEPOINT { + for (size_t i = 0; i < N; i++) { + PMs[i] = std::make_unique>>(PMCreator(TM, i, printers)); + } + } OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { + auto PoolIdx = cast(cast(M.getModuleFlag("julia.optlevel"))->getValue())->getZExtValue(); + assert(PoolIdx < N && "Invalid optimization pool index"); + uint64_t start_time = 0; - std::stringstream before_stats_ss; - bool should_dump_opt_stats = false; + + struct Stat { + std::string name; + uint64_t insts; + uint64_t bbs; + + void dump(ios_t *stream) JL_NOTSAFEPOINT { + ios_printf(stream, " \"%s\":\n", name.c_str()); + ios_printf(stream, " instructions: %u\n", insts); + ios_printf(stream, " basicblocks: %zd\n", bbs); + } + + Stat(Function &F) JL_NOTSAFEPOINT : name(F.getName().str()), insts(F.getInstructionCount()), bbs(countBasicBlocks(F)) {} + + ~Stat() JL_NOTSAFEPOINT = default; + }; + SmallVector before_stats; { - auto stream = *jl_ExecutionEngine->get_dump_llvm_opt_stream(); - if (stream) { - // Ensures that we don't _just_ write the second part of the YAML object - should_dump_opt_stats = true; - // We use a stringstream to later atomically write a YAML object - // without the need to hold the stream lock over the optimization - // Print LLVM function statistics _before_ optimization - // Print all the information about this invocation as a YAML object - before_stats_ss << "- \n"; - // We print the name and some statistics for each function in the module, both - // before optimization and again afterwards. - before_stats_ss << " before: \n"; + if (*jl_ExecutionEngine->get_dump_llvm_opt_stream()) { for (auto &F : M.functions()) { if (F.isDeclaration() || F.getName().startswith("jfptr_")) { continue; } // Each function is printed as a YAML object with several attributes - before_stats_ss << " \"" << F.getName().str().c_str() << "\":\n"; - before_stats_ss << " instructions: " << F.getInstructionCount() << "\n"; - before_stats_ss << " basicblocks: " << countBasicBlocks(F) << "\n"; + before_stats.emplace_back(F); } start_time = jl_hrtime(); @@ -1243,18 +1289,41 @@ namespace { JL_TIMING(LLVM_JIT, JIT_Opt); //Run the optimization assert(!verifyLLVMIR(M)); - (***PMs).run(M); + (****PMs[PoolIdx]).run(M); assert(!verifyLLVMIR(M)); } - uint64_t end_time = 0; { - auto stream = *jl_ExecutionEngine->get_dump_llvm_opt_stream(); - if (stream && should_dump_opt_stats) { - ios_printf(stream, "%s", before_stats_ss.str().c_str()); - end_time = jl_hrtime(); + // Print optimization statistics as a YAML object + // Looks like: + // - + // before: + // "foo": + // instructions: uint64 + // basicblocks: uint64 + // "bar": + // instructions: uint64 + // basicblocks: uint64 + // time_ns: uint64 + // optlevel: int + // after: + // "foo": + // instructions: uint64 + // basicblocks: uint64 + // "bar": + // instructions: uint64 + // basicblocks: uint64 + if (auto stream = *jl_ExecutionEngine->get_dump_llvm_opt_stream()) { + uint64_t end_time = jl_hrtime(); + ios_printf(stream, "- \n"); + + // Print LLVM function statistic _before_ optimization + ios_printf(stream, " before: \n"); + for (auto &s : before_stats) { + s.dump(stream); + } ios_printf(stream, " time_ns: %" PRIu64 "\n", end_time - start_time); - ios_printf(stream, " optlevel: %d\n", optlevel); + ios_printf(stream, " optlevel: %d\n", PoolIdx); // Print LLVM function statistics _after_ optimization ios_printf(stream, " after: \n"); @@ -1262,47 +1331,114 @@ namespace { if (F.isDeclaration() || F.getName().startswith("jfptr_")) { continue; } - ios_printf(stream, " \"%s\":\n", F.getName().str().c_str()); - ios_printf(stream, " instructions: %u\n", F.getInstructionCount()); - ios_printf(stream, " basicblocks: %zd\n", countBasicBlocks(F)); + Stat(F).dump(stream); } } } + switch (PoolIdx) { + case 0: + ++OptO0; + break; + case 1: + ++OptO1; + break; + case 2: + ++OptO2; + break; + case 3: + ++OptO3; + break; + default: + // Change this if we ever gain other optlevels + llvm_unreachable("optlevel is between 0 and 3!"); + } }); - switch (optlevel) { - case 0: - ++OptO0; - break; - case 1: - ++OptO1; - break; - case 2: - ++OptO2; - break; - case 3: - ++OptO3; - break; - default: - llvm_unreachable("optlevel is between 0 and 3!"); - } return Expected{std::move(TSM)}; } private: - int optlevel; - JuliaOJIT::ResourcePool> PMs; + std::array>>, N> PMs; }; + template struct CompilerT : orc::IRCompileLayer::IRCompiler { - CompilerT(orc::IRSymbolMapper::ManglingOptions MO, TargetMachine &TM, int optlevel) JL_NOTSAFEPOINT - : orc::IRCompileLayer::IRCompiler(MO), TMs(TMCreator(TM, optlevel)) {} + CompilerT(orc::IRSymbolMapper::ManglingOptions MO, TargetMachine &TM) JL_NOTSAFEPOINT + : orc::IRCompileLayer::IRCompiler(MO) { + for (size_t i = 0; i < N; ++i) { + TMs[i] = std::make_unique>>(TMCreator(TM, i)); + } + } Expected> operator()(Module &M) override { JL_TIMING(LLVM_JIT, JIT_Compile); - return orc::SimpleCompiler(***TMs)(M); + size_t PoolIdx; + if (auto opt_level = M.getModuleFlag("julia.optlevel")) { + PoolIdx = cast(cast(opt_level)->getValue())->getZExtValue(); + } else { + PoolIdx = jl_options.opt_level; + } + assert(PoolIdx < N && "Invalid optimization level for compiler!"); + return orc::SimpleCompiler(****TMs[PoolIdx])(M); } - JuliaOJIT::ResourcePool> TMs; + std::array>>, N> TMs; + }; + + struct JITPointersT { + + JITPointersT(orc::ExecutionSession &ES) : ES(ES) {} + + Expected operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) { + TSM.withModuleDo([&](Module &M) { + for (auto &GV : make_early_inc_range(M.globals())) { + if (auto *Shared = getSharedBytes(GV)) { + ++InternedGlobals; + GV.replaceAllUsesWith(Shared); + GV.eraseFromParent(); + } + } + + // Windows needs some inline asm to help + // build unwind tables + jl_decorate_module(M); + }); + return std::move(TSM); + } + + // optimize memory by turning long strings into memoized copies, instead of + // making a copy per object file of output. + // we memoize them using the ExecutionSession's string pool; + // this makes it unsafe to call clearDeadEntries() on the pool. + Constant *getSharedBytes(GlobalVariable &GV) { + // We could probably technically get away with + // interning even external linkage globals, + // as long as they have global unnamedaddr, + // but currently we shouldn't be emitting those + // except in imaging mode, and we don't want to + // do this optimization there. + if (GV.hasExternalLinkage() || !GV.hasGlobalUnnamedAddr()) { + return nullptr; + } + if (!GV.hasInitializer()) { + return nullptr; + } + if (!GV.isConstant()) { + return nullptr; + } + auto CDS = dyn_cast(GV.getInitializer()); + if (!CDS) { + return nullptr; + } + StringRef Data = CDS->getRawDataValues(); + if (Data.size() < 16) { + // Cutoff, since we don't want to intern small strings + return nullptr; + } + auto Interned = *ES.intern(Data); + return literal_static_pointer_val(Interned.data(), GV.getType()); + } + + orc::ExecutionSession &ES; }; } @@ -1313,12 +1449,6 @@ llvm::DataLayout jl_create_datalayout(TargetMachine &TM) { return jl_data_layout; } -JuliaOJIT::PipelineT::PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel, std::vector> &PrintLLVMTimers) - : CompileLayer(BaseLayer.getExecutionSession(), BaseLayer, - std::make_unique(orc::irManglingOptionsFromTargetOptions(TM.Options), TM, optlevel)), - OptimizeLayer(CompileLayer.getExecutionSession(), CompileLayer, - llvm::orc::IRTransformLayer::TransformFunction(OptimizerT(TM, optlevel, PrintLLVMTimers))) {} - #ifdef _COMPILER_ASAN_ENABLED_ int64_t ___asan_globals_registered; #endif @@ -1348,15 +1478,13 @@ JuliaOJIT::JuliaOJIT() ), #endif LockLayer(ObjectLayer), - Pipelines{ - std::make_unique(LockLayer, *TM, 0, PrintLLVMTimers), - std::make_unique(LockLayer, *TM, 1, PrintLLVMTimers), - std::make_unique(LockLayer, *TM, 2, PrintLLVMTimers), - std::make_unique(LockLayer, *TM, 3, PrintLLVMTimers), - }, - OptSelLayer(Pipelines), + CompileLayer(ES, LockLayer, std::make_unique>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)), + JITPointersLayer(ES, CompileLayer, orc::IRTransformLayer::TransformFunction(JITPointersT(ES))), + OptimizeLayer(ES, JITPointersLayer, orc::IRTransformLayer::TransformFunction(OptimizerT(*TM, PrintLLVMTimers))), + OptSelLayer(ES, OptimizeLayer, orc::IRTransformLayer::TransformFunction(selectOptLevel)), + DepsVerifyLayer(ES, OptSelLayer, orc::IRTransformLayer::TransformFunction(validateExternRelocations)), ExternalCompileLayer(ES, LockLayer, - std::make_unique(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM, 2)) + std::make_unique>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)) { #ifdef JL_USE_JITLINK # if defined(LLVM_SHLIB) @@ -1490,34 +1618,12 @@ void JuliaOJIT::addModule(orc::ThreadSafeModule TSM) ++ModulesAdded; orc::SymbolLookupSet NewExports; TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { - jl_decorate_module(M); - shareStrings(M); for (auto &F : M.global_values()) { if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) { auto Name = ES.intern(getMangledName(F.getName())); NewExports.add(std::move(Name)); } } -#if !defined(JL_NDEBUG) && !defined(JL_USE_JITLINK) - // validate the relocations for M (not implemented for the JITLink memory manager yet) - for (Module::global_object_iterator I = M.global_objects().begin(), E = M.global_objects().end(); I != E; ) { - GlobalObject *F = &*I; - ++I; - if (F->isDeclaration()) { - if (F->use_empty()) - F->eraseFromParent(); - else if (!((isa(F) && isIntrinsicFunction(cast(F))) || - findUnmangledSymbol(F->getName()) || - SectionMemoryManager::getSymbolAddressInProcess( - getMangledName(F->getName())))) { - llvm::errs() << "FATAL ERROR: " - << "Symbol \"" << F->getName().str() << "\"" - << "not found"; - abort(); - } - } - } -#endif }); // TODO: what is the performance characteristics of this? @@ -1817,32 +1923,6 @@ void jl_merge_module(orc::ThreadSafeModule &destTSM, orc::ThreadSafeModule srcTS }); } -// optimize memory by turning long strings into memoized copies, instead of -// making a copy per object file of output. -void JuliaOJIT::shareStrings(Module &M) -{ - ++InternedGlobals; - std::vector erase; - for (auto &GV : M.globals()) { - if (!GV.hasInitializer() || !GV.isConstant()) - continue; - ConstantDataSequential *CDS = dyn_cast(GV.getInitializer()); - if (CDS == nullptr) - continue; - StringRef data = CDS->getRawDataValues(); - if (data.size() > 16) { // only for long strings: keep short ones as values - Type *T_size = Type::getIntNTy(GV.getContext(), sizeof(void*) * 8); - Constant *v = ConstantExpr::getIntToPtr( - ConstantInt::get(T_size, (uintptr_t)(*ES.intern(data)).data()), - GV.getType()); - GV.replaceAllUsesWith(v); - erase.push_back(&GV); - } - } - for (auto GV : erase) - GV->eraseFromParent(); -} - //TargetMachine pass-through methods std::unique_ptr JuliaOJIT::cloneTargetMachine() const diff --git a/src/jitlayers.h b/src/jitlayers.h index e197b2a9eb9ec..b8fdbc631a5be 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -325,7 +325,10 @@ class JuliaOJIT { std::mutex EmissionMutex; }; typedef orc::IRCompileLayer CompileLayerT; + typedef orc::IRTransformLayer JITPointersLayerT; typedef orc::IRTransformLayer OptimizeLayerT; + typedef orc::IRTransformLayer OptSelLayerT; + typedef orc::IRTransformLayer DepsVerifyLayerT; typedef object::OwningBinary OwningObj; template mutex; }; - struct PipelineT { - PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel, std::vector> &PrintLLVMTimers); - CompileLayerT CompileLayer; - OptimizeLayerT OptimizeLayer; - }; - - struct OptSelLayerT : orc::IRLayer { - - template - OptSelLayerT(const std::array, N> &optimizers) JL_NOTSAFEPOINT - : orc::IRLayer(optimizers[0]->OptimizeLayer.getExecutionSession(), - optimizers[0]->OptimizeLayer.getManglingOptions()), - optimizers(optimizers.data()), - count(N) { - static_assert(N > 0, "Expected array with at least one optimizer!"); - } - ~OptSelLayerT() JL_NOTSAFEPOINT = default; - - void emit(std::unique_ptr R, orc::ThreadSafeModule TSM) override; - - private: - const std::unique_ptr * const optimizers; - size_t count; - }; private: // Custom object emission notification handler for the JuliaOJIT @@ -529,10 +508,9 @@ class JuliaOJIT { jl_locked_stream &get_dump_llvm_opt_stream() JL_NOTSAFEPOINT { return dump_llvm_opt_stream; } -private: std::string getMangledName(StringRef Name) JL_NOTSAFEPOINT; std::string getMangledName(const GlobalValue *GV) JL_NOTSAFEPOINT; - void shareStrings(Module &M) JL_NOTSAFEPOINT; +private: const std::unique_ptr TM; const DataLayout DL; @@ -563,8 +541,11 @@ class JuliaOJIT { #endif ObjLayerT ObjectLayer; LockLayerT LockLayer; - const std::array, 4> Pipelines; + CompileLayerT CompileLayer; + JITPointersLayerT JITPointersLayer; + OptimizeLayerT OptimizeLayer; OptSelLayerT OptSelLayer; + DepsVerifyLayerT DepsVerifyLayer; CompileLayerT ExternalCompileLayer; }; @@ -601,10 +582,4 @@ Pass *createLowerSimdLoopPass() JL_NOTSAFEPOINT; // NewPM #include "passes.h" -// Whether the Function is an llvm or julia intrinsic. -static inline bool isIntrinsicFunction(Function *F) JL_NOTSAFEPOINT -{ - return F->isIntrinsic() || F->getName().startswith("julia."); -} - CodeGenOpt::Level CodeGenOptLevelFor(int optlevel) JL_NOTSAFEPOINT;