From e983b4f64ee6d919a60938b6e7371a66877f4a23 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 11 Feb 2019 07:46:04 -0800 Subject: [PATCH] rustc: Implement incremental "fat" LTO Currently the compiler will produce an error if both incremental compilation and full fat LTO is requested. With recent changes and the advent of incremental ThinLTO, however, all the hard work is already done for us and it's actually not too bad to remove this error! This commit updates the codegen backend to allow incremental full fat LTO. The semantics are that the input modules to LTO are all produce incrementally, but the final LTO step is always done unconditionally regardless of whether the inputs changed or not. The only real incremental win we could have here is if zero of the input modules changed, but that's so rare it's unlikely to be worthwhile to implement such a code path. cc #57968 cc rust-lang/cargo#6643 --- src/librustc/session/mod.rs | 16 +-- src/librustc_codegen_llvm/back/lto.rs | 139 ++++++++++++++++------- src/librustc_codegen_llvm/lib.rs | 40 ++++++- src/librustc_codegen_llvm/llvm/ffi.rs | 2 +- src/librustc_codegen_ssa/back/write.rs | 75 +++++++++--- src/librustc_codegen_ssa/traits/write.rs | 9 +- src/rustllvm/PassWrapper.cpp | 8 +- src/test/incremental/lto.rs | 40 +++++++ 8 files changed, 246 insertions(+), 83 deletions(-) create mode 100644 src/test/incremental/lto.rs diff --git a/src/librustc/session/mod.rs b/src/librustc/session/mod.rs index 9f387e1eab1ef..a5f1a754f240c 100644 --- a/src/librustc/session/mod.rs +++ b/src/librustc/session/mod.rs @@ -9,7 +9,7 @@ use crate::lint; use crate::lint::builtin::BuiltinLintDiagnostics; use crate::middle::allocator::AllocatorKind; use crate::middle::dependency_format; -use crate::session::config::{OutputType, Lto}; +use crate::session::config::OutputType; use crate::session::search_paths::{PathKind, SearchPath}; use crate::util::nodemap::{FxHashMap, FxHashSet}; use crate::util::common::{duration_to_secs_str, ErrorReported}; @@ -1246,20 +1246,6 @@ pub fn build_session_( // If it is useful to have a Session available already for validating a // commandline argument, you can do so here. fn validate_commandline_args_with_session_available(sess: &Session) { - - if sess.opts.incremental.is_some() { - match sess.lto() { - Lto::Thin | - Lto::Fat => { - sess.err("can't perform LTO when compiling incrementally"); - } - Lto::ThinLocal | - Lto::No => { - // This is fine - } - } - } - // Since we don't know if code in an rlib will be linked to statically or // dynamically downstream, rustc generates `__imp_` symbols that help the // MSVC linker deal with this lack of knowledge (#27438). Unfortunately, diff --git a/src/librustc_codegen_llvm/back/lto.rs b/src/librustc_codegen_llvm/back/lto.rs index 3e51078dc6436..ef7b36e5d7410 100644 --- a/src/librustc_codegen_llvm/back/lto.rs +++ b/src/librustc_codegen_llvm/back/lto.rs @@ -1,6 +1,6 @@ use back::bytecode::{DecodedBytecode, RLIB_BYTECODE_EXTENSION}; use rustc_codegen_ssa::back::symbol_export; -use rustc_codegen_ssa::back::write::{ModuleConfig, CodegenContext, pre_lto_bitcode_filename}; +use rustc_codegen_ssa::back::write::{ModuleConfig, CodegenContext, FatLTOInput}; use rustc_codegen_ssa::back::lto::{SerializedModule, LtoModuleCodegen, ThinShared, ThinModule}; use rustc_codegen_ssa::traits::*; use back::write::{self, DiagnosticHandlers, with_llvm_pmb, save_temp_bitcode, to_llvm_opt_settings}; @@ -21,7 +21,6 @@ use rustc_codegen_ssa::{ModuleCodegen, ModuleKind}; use libc; use std::ffi::{CStr, CString}; -use std::fs; use std::ptr; use std::slice; use std::sync::Arc; @@ -133,7 +132,8 @@ fn prepare_lto(cgcx: &CodegenContext, /// Performs fat LTO by merging all modules into a single one and returning it /// for further optimization. pub(crate) fn run_fat(cgcx: &CodegenContext, - modules: Vec>, + modules: Vec>, + cached_modules: Vec<(SerializedModule, WorkProduct)>, timeline: &mut Timeline) -> Result, FatalError> { @@ -142,7 +142,15 @@ pub(crate) fn run_fat(cgcx: &CodegenContext, let symbol_white_list = symbol_white_list.iter() .map(|c| c.as_ptr()) .collect::>(); - fat_lto(cgcx, &diag_handler, modules, upstream_modules, &symbol_white_list, timeline) + fat_lto( + cgcx, + &diag_handler, + modules, + cached_modules, + upstream_modules, + &symbol_white_list, + timeline, + ) } /// Performs thin LTO by performing necessary global analysis and returning two @@ -173,33 +181,17 @@ pub(crate) fn run_thin(cgcx: &CodegenContext, } pub(crate) fn prepare_thin( - cgcx: &CodegenContext, module: ModuleCodegen ) -> (String, ThinBuffer) { let name = module.name.clone(); let buffer = ThinBuffer::new(module.module_llvm.llmod()); - - // We emit the module after having serialized it into a ThinBuffer - // because only then it will contain the ThinLTO module summary. - if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir { - if cgcx.config(module.kind).emit_pre_thin_lto_bc { - let path = incr_comp_session_dir - .join(pre_lto_bitcode_filename(&name)); - - fs::write(&path, buffer.data()).unwrap_or_else(|e| { - panic!("Error writing pre-lto-bitcode file `{}`: {}", - path.display(), - e); - }); - } - } - (name, buffer) } fn fat_lto(cgcx: &CodegenContext, diag_handler: &Handler, - mut modules: Vec>, + mut modules: Vec>, + cached_modules: Vec<(SerializedModule, WorkProduct)>, mut serialized_modules: Vec<(SerializedModule, CString)>, symbol_white_list: &[*const libc::c_char], timeline: &mut Timeline) @@ -216,8 +208,14 @@ fn fat_lto(cgcx: &CodegenContext, // file copy operations in the backend work correctly. The only other kind // of module here should be an allocator one, and if your crate is smaller // than the allocator module then the size doesn't really matter anyway. - let (_, costliest_module) = modules.iter() + let costliest_module = modules.iter() .enumerate() + .filter_map(|(i, module)| { + match module { + FatLTOInput::InMemory(m) => Some((i, m)), + FatLTOInput::Serialized { .. } => None, + } + }) .filter(|&(_, module)| module.kind == ModuleKind::Regular) .map(|(i, module)| { let cost = unsafe { @@ -225,9 +223,38 @@ fn fat_lto(cgcx: &CodegenContext, }; (cost, i) }) - .max() - .expect("must be codegen'ing at least one module"); - let module = modules.remove(costliest_module); + .max(); + + // If we found a costliest module, we're good to go. Otherwise all our + // inputs were serialized which could happen in the case, for example, that + // all our inputs were incrementally reread from the cache and we're just + // re-executing the LTO passes. If that's the case deserialize the first + // module and create a linker with it. + let module: ModuleCodegen = match costliest_module { + Some((_cost, i)) => { + match modules.remove(i) { + FatLTOInput::InMemory(m) => m, + FatLTOInput::Serialized { .. } => unreachable!(), + } + } + None => { + let pos = modules.iter().position(|m| { + match m { + FatLTOInput::InMemory(_) => false, + FatLTOInput::Serialized { .. } => true, + } + }).expect("must have at least one serialized module"); + let (name, buffer) = match modules.remove(pos) { + FatLTOInput::Serialized { name, buffer } => (name, buffer), + FatLTOInput::InMemory(_) => unreachable!(), + }; + ModuleCodegen { + module_llvm: ModuleLlvm::parse(cgcx, &name, &buffer, diag_handler)?, + name, + kind: ModuleKind::Regular, + } + } + }; let mut serialized_bitcode = Vec::new(); { let (llcx, llmod) = { @@ -247,10 +274,20 @@ fn fat_lto(cgcx: &CodegenContext, // way we know of to do that is to serialize them to a string and them parse // them later. Not great but hey, that's why it's "fat" LTO, right? serialized_modules.extend(modules.into_iter().map(|module| { - let buffer = ModuleBuffer::new(module.module_llvm.llmod()); - let llmod_id = CString::new(&module.name[..]).unwrap(); - - (SerializedModule::Local(buffer), llmod_id) + match module { + FatLTOInput::InMemory(module) => { + let buffer = ModuleBuffer::new(module.module_llvm.llmod()); + let llmod_id = CString::new(&module.name[..]).unwrap(); + (SerializedModule::Local(buffer), llmod_id) + } + FatLTOInput::Serialized { name, buffer } => { + let llmod_id = CString::new(name).unwrap(); + (SerializedModule::Local(buffer), llmod_id) + } + } + })); + serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| { + (buffer, CString::new(wp.cgu_name.clone()).unwrap()) })); // For all serialized bitcode files we parse them and link them in as we did @@ -579,6 +616,16 @@ impl ModuleBuffer { llvm::LLVMRustModuleBufferCreate(m) }) } + + pub fn parse<'a>( + &self, + name: &str, + cx: &'a llvm::Context, + handler: &Handler, + ) -> Result<&'a llvm::Module, FatalError> { + let name = CString::new(name).unwrap(); + parse_module(cx, &name, self.data(), handler) + } } impl ModuleBufferMethods for ModuleBuffer { @@ -658,15 +705,12 @@ pub unsafe fn optimize_thin_module( // crates but for locally codegened modules we may be able to reuse // that LLVM Context and Module. let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names); - let llmod_raw = llvm::LLVMRustParseBitcodeForThinLTO( + let llmod_raw = parse_module( llcx, - thin_module.data().as_ptr(), - thin_module.data().len(), - thin_module.shared.module_names[thin_module.idx].as_ptr(), - ).ok_or_else(|| { - let msg = "failed to parse bitcode for thin LTO module"; - write::llvm_err(&diag_handler, msg) - })? as *const _; + &thin_module.shared.module_names[thin_module.idx], + thin_module.data(), + &diag_handler, + )? as *const _; let module = ModuleCodegen { module_llvm: ModuleLlvm { llmod_raw, @@ -823,3 +867,22 @@ fn module_name_to_str(c_str: &CStr) -> &str { c_str.to_str().unwrap_or_else(|e| bug!("Encountered non-utf8 LLVM module name `{}`: {}", c_str.to_string_lossy(), e)) } + +fn parse_module<'a>( + cx: &'a llvm::Context, + name: &CStr, + data: &[u8], + diag_handler: &Handler, +) -> Result<&'a llvm::Module, FatalError> { + unsafe { + llvm::LLVMRustParseBitcodeForLTO( + cx, + data.as_ptr(), + data.len(), + name.as_ptr(), + ).ok_or_else(|| { + let msg = "failed to parse bitcode for LTO module"; + write::llvm_err(&diag_handler, msg) + }) + } +} diff --git a/src/librustc_codegen_llvm/lib.rs b/src/librustc_codegen_llvm/lib.rs index ad8db25ee95a0..b605badc153f0 100644 --- a/src/librustc_codegen_llvm/lib.rs +++ b/src/librustc_codegen_llvm/lib.rs @@ -54,7 +54,7 @@ extern crate tempfile; extern crate memmap; use rustc_codegen_ssa::traits::*; -use rustc_codegen_ssa::back::write::{CodegenContext, ModuleConfig}; +use rustc_codegen_ssa::back::write::{CodegenContext, ModuleConfig, FatLTOInput}; use rustc_codegen_ssa::back::lto::{SerializedModule, LtoModuleCodegen, ThinModule}; use rustc_codegen_ssa::CompiledModule; use errors::{FatalError, Handler}; @@ -165,10 +165,11 @@ impl WriteBackendMethods for LlvmCodegenBackend { } fn run_fat_lto( cgcx: &CodegenContext, - modules: Vec>, + modules: Vec>, + cached_modules: Vec<(SerializedModule, WorkProduct)>, timeline: &mut Timeline ) -> Result, FatalError> { - back::lto::run_fat(cgcx, modules, timeline) + back::lto::run_fat(cgcx, modules, cached_modules, timeline) } fn run_thin_lto( cgcx: &CodegenContext, @@ -204,10 +205,14 @@ impl WriteBackendMethods for LlvmCodegenBackend { back::write::codegen(cgcx, diag_handler, module, config, timeline) } fn prepare_thin( - cgcx: &CodegenContext, module: ModuleCodegen ) -> (String, Self::ThinBuffer) { - back::lto::prepare_thin(cgcx, module) + back::lto::prepare_thin(module) + } + fn serialize_module( + module: ModuleCodegen + ) -> (String, Self::ModuleBuffer) { + (module.name, back::lto::ModuleBuffer::new(module.module_llvm.llmod())) } fn run_lto_pass_manager( cgcx: &CodegenContext, @@ -375,6 +380,31 @@ impl ModuleLlvm { } } + fn parse( + cgcx: &CodegenContext, + name: &str, + buffer: &back::lto::ModuleBuffer, + handler: &Handler, + ) -> Result { + unsafe { + let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names); + let llmod_raw = buffer.parse(name, llcx, handler)?; + let tm = match (cgcx.tm_factory.0)() { + Ok(m) => m, + Err(e) => { + handler.struct_err(&e).emit(); + return Err(FatalError) + } + }; + + Ok(ModuleLlvm { + llmod_raw, + llcx, + tm, + }) + } + } + fn llmod(&self) -> &llvm::Module { unsafe { &*self.llmod_raw diff --git a/src/librustc_codegen_llvm/llvm/ffi.rs b/src/librustc_codegen_llvm/llvm/ffi.rs index 58bdfc47fcaed..ba606f76eb65b 100644 --- a/src/librustc_codegen_llvm/llvm/ffi.rs +++ b/src/librustc_codegen_llvm/llvm/ffi.rs @@ -1804,7 +1804,7 @@ extern "C" { CallbackPayload: *mut c_void, ); pub fn LLVMRustFreeThinLTOData(Data: &'static mut ThinLTOData); - pub fn LLVMRustParseBitcodeForThinLTO( + pub fn LLVMRustParseBitcodeForLTO( Context: &Context, Data: *const u8, len: usize, diff --git a/src/librustc_codegen_ssa/back/write.rs b/src/librustc_codegen_ssa/back/write.rs index eeb191b09e249..01add4fb5fa8e 100644 --- a/src/librustc_codegen_ssa/back/write.rs +++ b/src/librustc_codegen_ssa/back/write.rs @@ -41,7 +41,7 @@ use std::sync::mpsc::{channel, Sender, Receiver}; use std::time::Instant; use std::thread; -const PRE_THIN_LTO_BC_EXT: &str = "pre-thin-lto.bc"; +const PRE_LTO_BC_EXT: &str = "pre-lto.bc"; /// Module-specific configuration for `optimize_and_codegen`. pub struct ModuleConfig { @@ -58,7 +58,7 @@ pub struct ModuleConfig { pub pgo_use: String, // Flags indicating which outputs to produce. - pub emit_pre_thin_lto_bc: bool, + pub emit_pre_lto_bc: bool, pub emit_no_opt_bc: bool, pub emit_bc: bool, pub emit_bc_compressed: bool, @@ -96,7 +96,7 @@ impl ModuleConfig { pgo_use: String::new(), emit_no_opt_bc: false, - emit_pre_thin_lto_bc: false, + emit_pre_lto_bc: false, emit_bc: false, emit_bc_compressed: false, emit_lto_bc: false, @@ -258,7 +258,7 @@ impl CodegenContext { fn generate_lto_work( cgcx: &CodegenContext, - needs_fat_lto: Vec>, + needs_fat_lto: Vec>, needs_thin_lto: Vec<(String, B::ThinBuffer)>, import_only_modules: Vec<(SerializedModule, WorkProduct)> ) -> Vec<(WorkItem, u64)> { @@ -270,9 +270,13 @@ fn generate_lto_work( let (lto_modules, copy_jobs) = if !needs_fat_lto.is_empty() { assert!(needs_thin_lto.is_empty()); - assert!(import_only_modules.is_empty()); - let lto_module = B::run_fat_lto(cgcx, needs_fat_lto, &mut timeline) - .unwrap_or_else(|e| e.raise()); + let lto_module = B::run_fat_lto( + cgcx, + needs_fat_lto, + import_only_modules, + &mut timeline, + ) + .unwrap_or_else(|e| e.raise()); (vec![lto_module], vec![]) } else { assert!(needs_fat_lto.is_empty()); @@ -302,14 +306,14 @@ fn need_crate_bitcode_for_rlib(sess: &Session) -> bool { sess.opts.output_types.contains_key(&OutputType::Exe) } -fn need_pre_thin_lto_bitcode_for_incr_comp(sess: &Session) -> bool { +fn need_pre_lto_bitcode_for_incr_comp(sess: &Session) -> bool { if sess.opts.incremental.is_none() { return false } match sess.lto() { - Lto::Fat | Lto::No => false, + Lto::Fat | Lto::Thin | Lto::ThinLocal => true, } @@ -375,7 +379,7 @@ pub fn start_async_codegen( // Save all versions of the bytecode if we're saving our temporaries. if sess.opts.cg.save_temps { modules_config.emit_no_opt_bc = true; - modules_config.emit_pre_thin_lto_bc = true; + modules_config.emit_pre_lto_bc = true; modules_config.emit_bc = true; modules_config.emit_lto_bc = true; metadata_config.emit_bc = true; @@ -390,8 +394,8 @@ pub fn start_async_codegen( allocator_config.emit_bc_compressed = true; } - modules_config.emit_pre_thin_lto_bc = - need_pre_thin_lto_bitcode_for_incr_comp(sess); + modules_config.emit_pre_lto_bc = + need_pre_lto_bitcode_for_incr_comp(sess); modules_config.no_integrated_as = tcx.sess.opts.cg.no_integrated_as || tcx.sess.target.target.options.no_integrated_as; @@ -686,10 +690,18 @@ impl WorkItem { enum WorkItemResult { Compiled(CompiledModule), - NeedsFatLTO(ModuleCodegen), + NeedsFatLTO(FatLTOInput), NeedsThinLTO(String, B::ThinBuffer), } +pub enum FatLTOInput { + Serialized { + name: String, + buffer: B::ModuleBuffer, + }, + InMemory(ModuleCodegen), +} + fn execute_work_item( cgcx: &CodegenContext, work_item: WorkItem, @@ -771,6 +783,15 @@ fn execute_optimize_work_item( } }; + // If we're doing some form of incremental LTO then we need to be sure to + // save our module to disk first. + let bitcode = if cgcx.config(module.kind).emit_pre_lto_bc { + let filename = pre_lto_bitcode_filename(&module.name); + cgcx.incr_comp_session_dir.as_ref().map(|path| path.join(&filename)) + } else { + None + }; + Ok(match lto_type { ComputedLtoType::No => { let module = unsafe { @@ -779,10 +800,30 @@ fn execute_optimize_work_item( WorkItemResult::Compiled(module) } ComputedLtoType::Thin => { - let (name, thin_buffer) = B::prepare_thin(cgcx, module); + let (name, thin_buffer) = B::prepare_thin(module); + if let Some(path) = bitcode { + fs::write(&path, thin_buffer.data()).unwrap_or_else(|e| { + panic!("Error writing pre-lto-bitcode file `{}`: {}", + path.display(), + e); + }); + } WorkItemResult::NeedsThinLTO(name, thin_buffer) } - ComputedLtoType::Fat => WorkItemResult::NeedsFatLTO(module), + ComputedLtoType::Fat => { + match bitcode { + Some(path) => { + let (name, buffer) = B::serialize_module(module); + fs::write(&path, buffer.data()).unwrap_or_else(|e| { + panic!("Error writing pre-lto-bitcode file `{}`: {}", + path.display(), + e); + }); + WorkItemResult::NeedsFatLTO(FatLTOInput::Serialized { name, buffer }) + } + None => WorkItemResult::NeedsFatLTO(FatLTOInput::InMemory(module)), + } + } }) } @@ -866,7 +907,7 @@ fn execute_lto_work_item( pub enum Message { Token(io::Result), NeedsFatLTO { - result: ModuleCodegen, + result: FatLTOInput, worker_id: usize, }, NeedsThinLTO { @@ -1877,7 +1918,7 @@ pub fn submit_pre_lto_module_to_llvm( } pub fn pre_lto_bitcode_filename(module_name: &str) -> String { - format!("{}.{}", module_name, PRE_THIN_LTO_BC_EXT) + format!("{}.{}", module_name, PRE_LTO_BC_EXT) } fn msvc_imps_needed(tcx: TyCtxt) -> bool { diff --git a/src/librustc_codegen_ssa/traits/write.rs b/src/librustc_codegen_ssa/traits/write.rs index e8ef815b32acb..d8fb7c608c8af 100644 --- a/src/librustc_codegen_ssa/traits/write.rs +++ b/src/librustc_codegen_ssa/traits/write.rs @@ -1,5 +1,5 @@ use crate::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule}; -use crate::back::write::{CodegenContext, ModuleConfig}; +use crate::back::write::{CodegenContext, ModuleConfig, FatLTOInput}; use crate::{CompiledModule, ModuleCodegen}; use rustc::dep_graph::WorkProduct; @@ -18,7 +18,8 @@ pub trait WriteBackendMethods: 'static + Sized + Clone { /// for further optimization. fn run_fat_lto( cgcx: &CodegenContext, - modules: Vec>, + modules: Vec>, + cached_modules: Vec<(SerializedModule, WorkProduct)>, timeline: &mut Timeline, ) -> Result, FatalError>; /// Performs thin LTO by performing necessary global analysis and returning two @@ -51,9 +52,11 @@ pub trait WriteBackendMethods: 'static + Sized + Clone { timeline: &mut Timeline, ) -> Result; fn prepare_thin( - cgcx: &CodegenContext, module: ModuleCodegen ) -> (String, Self::ThinBuffer); + fn serialize_module( + module: ModuleCodegen + ) -> (String, Self::ModuleBuffer); fn run_lto_pass_manager( cgcx: &CodegenContext, llmod: &ModuleCodegen, diff --git a/src/rustllvm/PassWrapper.cpp b/src/rustllvm/PassWrapper.cpp index 18d277be21a16..25595e14982ae 100644 --- a/src/rustllvm/PassWrapper.cpp +++ b/src/rustllvm/PassWrapper.cpp @@ -1092,10 +1092,10 @@ LLVMRustThinLTOBufferLen(const LLVMRustThinLTOBuffer *Buffer) { // processing. We'll call this once per module optimized through ThinLTO, and // it'll be called concurrently on many threads. extern "C" LLVMModuleRef -LLVMRustParseBitcodeForThinLTO(LLVMContextRef Context, - const char *data, - size_t len, - const char *identifier) { +LLVMRustParseBitcodeForLTO(LLVMContextRef Context, + const char *data, + size_t len, + const char *identifier) { StringRef Data(data, len); MemoryBufferRef Buffer(Data, identifier); unwrap(Context)->enableDebugTypeODRUniquing(); diff --git a/src/test/incremental/lto.rs b/src/test/incremental/lto.rs new file mode 100644 index 0000000000000..2a3e3c2467cdc --- /dev/null +++ b/src/test/incremental/lto.rs @@ -0,0 +1,40 @@ +// no-prefer-dynamic +// revisions:rpass1 rpass2 +// compile-flags: -C lto + +mod x { + pub struct X { + x: u32, y: u32, + } + + #[cfg(rpass1)] + fn make() -> X { + X { x: 22, y: 0 } + } + + #[cfg(rpass2)] + fn make() -> X { + X { x: 11, y: 11 } + } + + pub fn new() -> X { + make() + } + + pub fn sum(x: &X) -> u32 { + x.x + x.y + } +} + +mod y { + use x; + + pub fn assert_sum() -> bool { + let x = x::new(); + x::sum(&x) == 22 + } +} + +pub fn main() { + y::assert_sum(); +}