From 21ea254b868b92c5c618ee41451fcf7c18c09cdf Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 19 Apr 2019 09:42:53 -0700 Subject: [PATCH 01/10] Remove the `Vec` from `DependencyQueue` I... don't think this needed any more! Doing some digging looks like this was originally added in 79768eb0d. That was so early it didn't even use a PR and was almost 5 years ago. Since then we've had a huge number of changes to the backend and `Unit` nowadays does all the deduplication we need, so no need to store a `Vec` here and we can just have a mapping of `Key` to `Job` and that's it. --- src/cargo/core/compiler/job_queue.rs | 30 +++++++--------------------- src/cargo/util/dependency_queue.rs | 10 +++------- 2 files changed, 10 insertions(+), 30 deletions(-) diff --git a/src/cargo/core/compiler/job_queue.rs b/src/cargo/core/compiler/job_queue.rs index 1c23c0d2d7f..201b2fe6c79 100644 --- a/src/cargo/core/compiler/job_queue.rs +++ b/src/cargo/core/compiler/job_queue.rs @@ -32,11 +32,10 @@ use crate::util::{Progress, ProgressStyle}; /// actual compilation step of each package. Packages enqueue units of work and /// then later on the entire graph is processed and compiled. pub struct JobQueue<'a, 'cfg> { - queue: DependencyQueue, Vec>, + queue: DependencyQueue, Job>, tx: Sender>, rx: Receiver>, active: Vec>, - pending: HashMap, PendingBuild>, compiled: HashSet, documented: HashSet, counts: HashMap, @@ -44,12 +43,6 @@ pub struct JobQueue<'a, 'cfg> { progress: Progress<'cfg>, } -/// A helper structure for metadata about the state of a building package. -struct PendingBuild { - /// The number of jobs currently active. - amt: usize, -} - #[derive(Clone, Copy, Eq, PartialEq, Hash)] struct Key<'a> { pkg: PackageId, @@ -140,7 +133,6 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { tx, rx, active: Vec::new(), - pending: HashMap::new(), compiled: HashSet::new(), documented: HashSet::new(), counts: HashMap::new(), @@ -157,7 +149,7 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { ) -> CargoResult<()> { let key = Key::new(unit); let deps = key.dependencies(cx)?; - self.queue.queue(&key, Vec::new(), &deps).push(job); + self.queue.queue(&key, job, &deps); *self.counts.entry(key.pkg).or_insert(0) += 1; Ok(()) } @@ -236,13 +228,10 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { // possible that can run. Note that this is also the point where we // start requesting job tokens. Each job after the first needs to // request a token. - while let Some((key, jobs)) = self.queue.dequeue() { - self.pending.insert(key, PendingBuild { amt: jobs.len() }); - for job in jobs { - queue.push((key, job)); - if !self.active.is_empty() || !queue.is_empty() { - jobserver_helper.request_token(); - } + while let Some((key, job)) = self.queue.dequeue() { + queue.push((key, job)); + if !self.active.is_empty() || !queue.is_empty() { + jobserver_helper.request_token(); } } @@ -463,12 +452,7 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { if key.mode.is_run_custom_build() && cx.bcx.show_warnings(key.pkg) { self.emit_warnings(None, &key, cx)?; } - - let state = self.pending.get_mut(&key).unwrap(); - state.amt -= 1; - if state.amt == 0 { - self.queue.finish(&key); - } + self.queue.finish(&key); Ok(()) } diff --git a/src/cargo/util/dependency_queue.rs b/src/cargo/util/dependency_queue.rs index d86dd7c3671..9df373ee4f0 100644 --- a/src/cargo/util/dependency_queue.rs +++ b/src/cargo/util/dependency_queue.rs @@ -4,7 +4,6 @@ //! This structure is used to store the dependency graph and dynamically update //! it to figure out when a dependency should be built. -use std::collections::hash_map::Entry::{Occupied, Vacant}; use std::collections::{HashMap, HashSet}; use std::hash::Hash; @@ -53,11 +52,8 @@ impl DependencyQueue { /// /// It is assumed that any dependencies of this package will eventually also /// be added to the dependency queue. - pub fn queue(&mut self, key: &K, value: V, dependencies: &[K]) -> &mut V { - let slot = match self.dep_map.entry(key.clone()) { - Occupied(v) => return &mut v.into_mut().1, - Vacant(v) => v, - }; + pub fn queue(&mut self, key: &K, value: V, dependencies: &[K]) { + assert!(!self.dep_map.contains_key(key)); let mut my_dependencies = HashSet::new(); for dep in dependencies { @@ -68,7 +64,7 @@ impl DependencyQueue { .or_insert_with(HashSet::new); rev.insert(key.clone()); } - &mut slot.insert((my_dependencies, value)).1 + self.dep_map.insert(key.clone(), (my_dependencies, value)); } /// All nodes have been added, calculate some internal metadata and prepare From b36594db395baa3c9f0c94fb4f065a84070f618b Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 19 Apr 2019 09:47:01 -0700 Subject: [PATCH 02/10] Correct a condition to request jobserver tokens This looks like it was a bug ever present from the original implementation of a GNU jobserver in #4110, but we currently unconditionally request a token is allocated for any job we pull off our job queue. Rather we only need to request tokens for everything but the first job because we already have an implicit token for that job. --- src/cargo/core/compiler/job_queue.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cargo/core/compiler/job_queue.rs b/src/cargo/core/compiler/job_queue.rs index 201b2fe6c79..4e14c8e77ab 100644 --- a/src/cargo/core/compiler/job_queue.rs +++ b/src/cargo/core/compiler/job_queue.rs @@ -230,7 +230,7 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { // request a token. while let Some((key, job)) = self.queue.dequeue() { queue.push((key, job)); - if !self.active.is_empty() || !queue.is_empty() { + if self.active.len() + queue.len() > 1 { jobserver_helper.request_token(); } } From 21cae017cdff63c657e2bbea00d6425ec1ab73e7 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 19 Apr 2019 09:57:05 -0700 Subject: [PATCH 03/10] Refactor JobQueue::run slightly Take a `&Context` argument instead of a few component arguments, avoids passing around some extraneous information. --- src/cargo/core/compiler/job_queue.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/cargo/core/compiler/job_queue.rs b/src/cargo/core/compiler/job_queue.rs index 4e14c8e77ab..b5175d2b999 100644 --- a/src/cargo/core/compiler/job_queue.rs +++ b/src/cargo/core/compiler/job_queue.rs @@ -207,7 +207,6 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { ) -> CargoResult<()> { let mut tokens = Vec::new(); let mut queue = Vec::new(); - let build_plan = cx.bcx.build_config.build_plan; let mut print = DiagnosticPrinter::new(cx.bcx.config); trace!("queue: {:#?}", self.queue); @@ -240,7 +239,7 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { // we're able to perform some parallel work. while error.is_none() && self.active.len() < tokens.len() + 1 && !queue.is_empty() { let (key, job) = queue.remove(0); - self.run(key, job, cx.bcx.config, scope, build_plan)?; + self.run(key, job, cx, scope)?; } // If after all that we're not actually running anything then we're @@ -358,7 +357,7 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { "{} [{}] target(s) in {}", build_type, opt_type, time_elapsed ); - if !build_plan { + if !cx.bcx.build_config.build_plan { cx.bcx.config.shell().status("Finished", message)?; } Ok(()) @@ -389,9 +388,8 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { &mut self, key: Key<'a>, job: Job, - config: &Config, + cx: &Context<'_, '_>, scope: &Scope<'a>, - build_plan: bool, ) -> CargoResult<()> { info!("start: {:?}", key); @@ -405,9 +403,9 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { my_tx.send(Message::Finish(key, res)).unwrap(); }; - if !build_plan { + if !cx.bcx.build_config.build_plan { // Print out some nice progress information. - self.note_working_on(config, &key, fresh)?; + self.note_working_on(cx.bcx.config, &key, fresh)?; } match fresh { From 12e0ffa6a7f9d132d3465b2c7f6f800924844fa4 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 19 Apr 2019 10:27:58 -0700 Subject: [PATCH 04/10] Remove the `Key` type from `JobQueue` This isn't actually necessary with a bit of refactoring, and in general it makes management of `JobQueue` simpler since there's one less type to worry about. The one main usage of `Key` vs `Unit` was that `Key` was `Send` to be placed in `Message`, but this was replaced by just assigning each `Unit` an ever-increasing integer, and then `Finished` contains the integer rather than the `Key` itself which we can map once we get back to the main thread. --- src/cargo/core/compiler/job_queue.rs | 265 +++++++++++---------------- 1 file changed, 107 insertions(+), 158 deletions(-) diff --git a/src/cargo/core/compiler/job_queue.rs b/src/cargo/core/compiler/job_queue.rs index b5175d2b999..f7a6a6f7e45 100644 --- a/src/cargo/core/compiler/job_queue.rs +++ b/src/cargo/core/compiler/job_queue.rs @@ -1,8 +1,6 @@ -use std::collections::hash_map::HashMap; -use std::collections::HashSet; -use std::fmt; +use std::collections::{HashMap, HashSet}; use std::io; -use std::mem; +use std::marker; use std::process::Output; use std::sync::mpsc::{channel, Receiver, Sender}; use std::sync::Arc; @@ -16,9 +14,8 @@ use super::job::{ Freshness::{self, Dirty, Fresh}, Job, }; -use super::{BuildContext, BuildPlan, CompileMode, Context, Kind, Unit}; -use crate::core::profiles::Profile; -use crate::core::{PackageId, Target, TargetKind}; +use super::{BuildContext, BuildPlan, CompileMode, Context, Unit}; +use crate::core::{PackageId, TargetKind}; use crate::handle_error; use crate::util; use crate::util::diagnostic_server::{self, DiagnosticPrinter}; @@ -32,59 +29,33 @@ use crate::util::{Progress, ProgressStyle}; /// actual compilation step of each package. Packages enqueue units of work and /// then later on the entire graph is processed and compiled. pub struct JobQueue<'a, 'cfg> { - queue: DependencyQueue, Job>, - tx: Sender>, - rx: Receiver>, - active: Vec>, + queue: DependencyQueue, Job>, + tx: Sender, + rx: Receiver, + active: HashMap>, compiled: HashSet, documented: HashSet, counts: HashMap, is_release: bool, progress: Progress<'cfg>, -} - -#[derive(Clone, Copy, Eq, PartialEq, Hash)] -struct Key<'a> { - pkg: PackageId, - target: &'a Target, - profile: Profile, - kind: Kind, - mode: CompileMode, -} - -impl<'a> Key<'a> { - fn name_for_progress(&self) -> String { - let pkg_name = self.pkg.name(); - match self.mode { - CompileMode::Doc { .. } => format!("{}(doc)", pkg_name), - CompileMode::RunCustomBuild => format!("{}(build)", pkg_name), - _ => { - let annotation = match self.target.kind() { - TargetKind::Lib(_) => return pkg_name.to_string(), - TargetKind::CustomBuild => return format!("{}(build.rs)", pkg_name), - TargetKind::Bin => "bin", - TargetKind::Test => "test", - TargetKind::Bench => "bench", - TargetKind::ExampleBin | TargetKind::ExampleLib(_) => "example", - }; - format!("{}({})", self.target.name(), annotation) - } - } - } + next_id: u32, } pub struct JobState<'a> { - tx: Sender>, + tx: Sender, + // Historical versions of Cargo made use of the `'a` argument here, so to + // leave the door open to future refactorings keep it here. + _marker: marker::PhantomData<&'a ()>, } -enum Message<'a> { +enum Message { Run(String), BuildPlanMsg(String, ProcessBuilder, Arc>), Stdout(String), Stderr(String), FixDiagnostic(diagnostic_server::Message), Token(io::Result), - Finish(Key<'a>, CargoResult<()>), + Finish(u32, CargoResult<()>), } impl<'a> JobState<'a> { @@ -132,12 +103,13 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { queue: DependencyQueue::new(), tx, rx, - active: Vec::new(), + active: HashMap::new(), compiled: HashSet::new(), documented: HashSet::new(), counts: HashMap::new(), is_release: bcx.build_config.release, progress, + next_id: 0, } } @@ -147,10 +119,18 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { unit: &Unit<'a>, job: Job, ) -> CargoResult<()> { - let key = Key::new(unit); - let deps = key.dependencies(cx)?; - self.queue.queue(&key, job, &deps); - *self.counts.entry(key.pkg).or_insert(0) += 1; + let dependencies = cx.dep_targets(unit); + let dependencies = dependencies + .iter() + .filter(|unit| { + // Binaries aren't actually needed to *compile* tests, just to run + // them, so we don't include this dependency edge in the job graph. + !unit.target.is_test() || !unit.target.is_bin() + }) + .cloned() + .collect::>(); + self.queue.queue(unit, job, &dependencies); + *self.counts.entry(unit.pkg.package_id()).or_insert(0) += 1; Ok(()) } @@ -163,22 +143,8 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { let _p = profile::start("executing the job graph"); self.queue.queue_finished(); - // We need to give a handle to the send half of our message queue to the - // jobserver and (optionally) diagnostic helper thread. Unfortunately - // though we need the handle to be `'static` as that's typically what's - // required when spawning a thread! - // - // To work around this we transmute the `Sender` to a static lifetime. - // we're only sending "longer living" messages and we should also - // destroy all references to the channel before this function exits as - // the destructor for the `helper` object will ensure the associated - // thread is no longer running. - // - // As a result, this `transmute` to a longer lifetime should be safe in - // practice. + // Create a helper thread for acquiring jobserver tokens let tx = self.tx.clone(); - let tx = unsafe { mem::transmute::>, Sender>>(tx) }; - let tx2 = tx.clone(); let helper = cx .jobserver .clone() @@ -186,14 +152,24 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { drop(tx.send(Message::Token(token))); }) .chain_err(|| "failed to create helper thread for jobserver management")?; + + // Create a helper thread to manage the diagnostics for rustfix if + // necessary. + let tx = self.tx.clone(); let _diagnostic_server = cx .bcx .build_config .rustfix_diagnostic_server .borrow_mut() .take() - .map(move |srv| srv.start(move |msg| drop(tx2.send(Message::FixDiagnostic(msg))))); - + .map(move |srv| srv.start(move |msg| drop(tx.send(Message::FixDiagnostic(msg))))); + + // Use `crossbeam` to create a scope in which we can execute scoped + // threads. Note that this isn't currently required by Cargo but it was + // historically required. This is left in for now in case we need the + // `'a` ability for child threads in the near future, but if this + // comment has been sitting here for a long time feel free to refactor + // away crossbeam. crossbeam_utils::thread::scope(|scope| self.drain_the_queue(cx, plan, scope, &helper)) .expect("child threads should't panic") } @@ -227,8 +203,8 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { // possible that can run. Note that this is also the point where we // start requesting job tokens. Each job after the first needs to // request a token. - while let Some((key, job)) = self.queue.dequeue() { - queue.push((key, job)); + while let Some((unit, job)) = self.queue.dequeue() { + queue.push((unit, job)); if self.active.len() + queue.len() > 1 { jobserver_helper.request_token(); } @@ -238,8 +214,8 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { // try to spawn it so long as we've got a jobserver token which says // we're able to perform some parallel work. while error.is_none() && self.active.len() < tokens.len() + 1 && !queue.is_empty() { - let (key, job) = queue.remove(0); - self.run(key, job, cx, scope)?; + let (unit, job) = queue.remove(0); + self.run(&unit, job, cx, scope)?; } // If after all that we're not actually running anything then we're @@ -288,26 +264,19 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { Message::FixDiagnostic(msg) => { print.print(&msg)?; } - Message::Finish(key, result) => { - info!("end: {:?}", key); - - // FIXME: switch to this when stabilized. - // self.active.remove_item(&key); - let pos = self - .active - .iter() - .position(|k| *k == key) - .expect("an unrecorded package has finished compiling"); - self.active.remove(pos); + Message::Finish(id, result) => { + let unit = self.active.remove(&id).unwrap(); + info!("end: {:?}", unit); + if !self.active.is_empty() { assert!(!tokens.is_empty()); drop(tokens.pop()); } match result { - Ok(()) => self.finish(key, cx)?, + Ok(()) => self.finish(&unit, cx)?, Err(e) => { let msg = "The following warnings were emitted during compilation:"; - self.emit_warnings(Some(msg), &key, cx)?; + self.emit_warnings(Some(msg), &unit, cx)?; if !self.active.is_empty() { error = Some(failure::format_err!("build failed")); @@ -373,8 +342,8 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { let count = total - self.queue.len(); let active_names = self .active - .iter() - .map(Key::name_for_progress) + .values() + .map(|u| self.name_for_progress(u)) .collect::>(); drop( self.progress @@ -382,30 +351,54 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { ); } + fn name_for_progress(&self, unit: &Unit<'_>) -> String { + let pkg_name = unit.pkg.name(); + match unit.mode { + CompileMode::Doc { .. } => format!("{}(doc)", pkg_name), + CompileMode::RunCustomBuild => format!("{}(build)", pkg_name), + _ => { + let annotation = match unit.target.kind() { + TargetKind::Lib(_) => return pkg_name.to_string(), + TargetKind::CustomBuild => return format!("{}(build.rs)", pkg_name), + TargetKind::Bin => "bin", + TargetKind::Test => "test", + TargetKind::Bench => "bench", + TargetKind::ExampleBin | TargetKind::ExampleLib(_) => "example", + }; + format!("{}({})", unit.target.name(), annotation) + } + } + } + /// Executes a job in the `scope` given, pushing the spawned thread's /// handled onto `threads`. fn run( &mut self, - key: Key<'a>, + unit: &Unit<'a>, job: Job, cx: &Context<'_, '_>, scope: &Scope<'a>, ) -> CargoResult<()> { - info!("start: {:?}", key); + info!("start: {:?}", unit); - self.active.push(key); - *self.counts.get_mut(&key.pkg).unwrap() -= 1; + let id = self.next_id; + self.next_id = id.checked_add(1).unwrap(); + assert!(self.active.insert(id, *unit).is_none()); + *self.counts.get_mut(&unit.pkg.package_id()).unwrap() -= 1; let my_tx = self.tx.clone(); let fresh = job.freshness(); let doit = move || { - let res = job.run(&JobState { tx: my_tx.clone() }); - my_tx.send(Message::Finish(key, res)).unwrap(); + let res = job.run(&JobState { + tx: my_tx.clone(), + _marker: marker::PhantomData, + }); + my_tx.send(Message::Finish(id, res)).unwrap(); }; if !cx.bcx.build_config.build_plan { // Print out some nice progress information. - self.note_working_on(cx.bcx.config, &key, fresh)?; + self.note_working_on(cx.bcx.config, unit, fresh)?; } match fresh { @@ -421,12 +414,12 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { fn emit_warnings( &mut self, msg: Option<&str>, - key: &Key<'a>, + unit: &Unit<'a>, cx: &mut Context<'_, '_>, ) -> CargoResult<()> { let output = cx.build_state.outputs.lock().unwrap(); let bcx = &mut cx.bcx; - if let Some(output) = output.get(&(key.pkg, key.kind)) { + if let Some(output) = output.get(&(unit.pkg.package_id(), unit.kind)) { if !output.warnings.is_empty() { if let Some(msg) = msg { writeln!(bcx.config.shell().err(), "{}\n", msg)?; @@ -446,11 +439,11 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { Ok(()) } - fn finish(&mut self, key: Key<'a>, cx: &mut Context<'_, '_>) -> CargoResult<()> { - if key.mode.is_run_custom_build() && cx.bcx.show_warnings(key.pkg) { - self.emit_warnings(None, &key, cx)?; + fn finish(&mut self, unit: &Unit<'a>, cx: &mut Context<'_, '_>) -> CargoResult<()> { + if unit.mode.is_run_custom_build() && cx.bcx.show_warnings(unit.pkg.package_id()) { + self.emit_warnings(None, unit, cx)?; } - self.queue.finish(&key); + self.queue.finish(unit); Ok(()) } @@ -466,11 +459,11 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { fn note_working_on( &mut self, config: &Config, - key: &Key<'a>, + unit: &Unit<'a>, fresh: Freshness, ) -> CargoResult<()> { - if (self.compiled.contains(&key.pkg) && !key.mode.is_doc()) - || (self.documented.contains(&key.pkg) && key.mode.is_doc()) + if (self.compiled.contains(&unit.pkg.package_id()) && !unit.mode.is_doc()) + || (self.documented.contains(&unit.pkg.package_id()) && unit.mode.is_doc()) { return Ok(()); } @@ -479,76 +472,32 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> { // Any dirty stage which runs at least one command gets printed as // being a compiled package. Dirty => { - if key.mode.is_doc() { + if unit.mode.is_doc() { // Skip doc test. - if !key.mode.is_any_test() { - self.documented.insert(key.pkg); - config.shell().status("Documenting", key.pkg)?; + if !unit.mode.is_any_test() { + self.documented.insert(unit.pkg.package_id()); + config.shell().status("Documenting", unit.pkg)?; } } else { - self.compiled.insert(key.pkg); - if key.mode.is_check() { - config.shell().status("Checking", key.pkg)?; + self.compiled.insert(unit.pkg.package_id()); + if unit.mode.is_check() { + config.shell().status("Checking", unit.pkg)?; } else { - config.shell().status("Compiling", key.pkg)?; + config.shell().status("Compiling", unit.pkg)?; } } } Fresh => { // If doc test are last, only print "Fresh" if nothing has been printed. - if self.counts[&key.pkg] == 0 - && !(key.mode == CompileMode::Doctest && self.compiled.contains(&key.pkg)) + if self.counts[&unit.pkg.package_id()] == 0 + && !(unit.mode == CompileMode::Doctest + && self.compiled.contains(&unit.pkg.package_id())) { - self.compiled.insert(key.pkg); - config.shell().verbose(|c| c.status("Fresh", key.pkg))?; + self.compiled.insert(unit.pkg.package_id()); + config.shell().verbose(|c| c.status("Fresh", unit.pkg))?; } } } Ok(()) } } - -impl<'a> Key<'a> { - fn new(unit: &Unit<'a>) -> Key<'a> { - Key { - pkg: unit.pkg.package_id(), - target: unit.target, - profile: unit.profile, - kind: unit.kind, - mode: unit.mode, - } - } - - fn dependencies<'cfg>(&self, cx: &Context<'a, 'cfg>) -> CargoResult>> { - let unit = Unit { - pkg: cx.get_package(self.pkg)?, - target: self.target, - profile: self.profile, - kind: self.kind, - mode: self.mode, - }; - let targets = cx.dep_targets(&unit); - Ok(targets - .iter() - .filter_map(|unit| { - // Binaries aren't actually needed to *compile* tests, just to run - // them, so we don't include this dependency edge in the job graph. - if self.target.is_test() && unit.target.is_bin() { - None - } else { - Some(Key::new(unit)) - } - }) - .collect()) - } -} - -impl<'a> fmt::Debug for Key<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "{} => {}/{} => {:?}", - self.pkg, self.target, self.profile, self.kind - ) - } -} From 724602857ccadc83512d5bc3737dd924a03700a4 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 22 Apr 2019 13:49:42 -0700 Subject: [PATCH 05/10] Sort units once, not each call to `dep_targets` The code lying around in `dep_targets` is pretty old at this point, but given the current iteraiton there's no need to re-sort on each call to `dep_targets`, only initially during construction! --- src/cargo/core/compiler/context/mod.rs | 4 +--- src/cargo/core/compiler/context/unit_dependencies.rs | 8 ++++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/cargo/core/compiler/context/mod.rs b/src/cargo/core/compiler/context/mod.rs index b0ae71e299a..37fd7d28ad7 100644 --- a/src/cargo/core/compiler/context/mod.rs +++ b/src/cargo/core/compiler/context/mod.rs @@ -380,9 +380,7 @@ impl<'a, 'cfg> Context<'a, 'cfg> { return Vec::new(); } } - let mut deps = self.unit_dependencies[unit].clone(); - deps.sort(); - deps + self.unit_dependencies[unit].clone() } pub fn is_primary_package(&self, unit: &Unit<'a>) -> bool { diff --git a/src/cargo/core/compiler/context/unit_dependencies.rs b/src/cargo/core/compiler/context/unit_dependencies.rs index 7ba1aaa38d4..1be5a37cd9e 100644 --- a/src/cargo/core/compiler/context/unit_dependencies.rs +++ b/src/cargo/core/compiler/context/unit_dependencies.rs @@ -87,6 +87,14 @@ pub fn build_unit_dependencies<'a, 'cfg>( connect_run_custom_build_deps(&mut state); + // Dependencies are used in tons of places throughout the backend, many of + // which affect the determinism of the build itself. As a result be sure + // that dependency lists are always sorted to ensure we've always got a + // deterministic output. + for list in state.deps.values_mut() { + list.sort(); + } + Ok(()) } From bd8253fdeb8f9e13a05ff64647ac0d3633c3e689 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 22 Apr 2019 14:07:31 -0700 Subject: [PATCH 06/10] Avoid reparsing `env_args` continuously We only need to parse this information once, so calculate it when a `TargetInfo` is created and then just reuse that from then on out. --- src/cargo/core/compiler/build_context/mod.rs | 146 +---------------- .../compiler/build_context/target_info.rs | 148 +++++++++++++++++- .../compiler/context/compilation_files.rs | 4 +- src/cargo/core/compiler/context/mod.rs | 4 +- src/cargo/core/compiler/fingerprint.rs | 6 +- src/cargo/core/compiler/mod.rs | 4 +- 6 files changed, 159 insertions(+), 153 deletions(-) diff --git a/src/cargo/core/compiler/build_context/mod.rs b/src/cargo/core/compiler/build_context/mod.rs index febdc65f4b5..07ac1d19e95 100644 --- a/src/cargo/core/compiler/build_context/mod.rs +++ b/src/cargo/core/compiler/build_context/mod.rs @@ -1,5 +1,4 @@ use std::collections::HashMap; -use std::env; use std::path::{Path, PathBuf}; use std::str; @@ -9,7 +8,7 @@ use crate::core::profiles::Profiles; use crate::core::{Dependency, Workspace}; use crate::core::{PackageId, PackageSet, Resolve}; use crate::util::errors::CargoResult; -use crate::util::{profile, Cfg, CfgExpr, Config, Rustc}; +use crate::util::{profile, Cfg, Config, Rustc}; use super::{BuildConfig, BuildOutput, Kind, Unit}; @@ -157,26 +156,12 @@ impl<'a, 'cfg> BuildContext<'a, 'cfg> { self.build_config.jobs } - pub fn rustflags_args(&self, unit: &Unit<'_>) -> CargoResult> { - env_args( - self.config, - &self.build_config.requested_target, - self.host_triple(), - self.info(unit.kind).cfg(), - unit.kind, - "RUSTFLAGS", - ) + pub fn rustflags_args(&self, unit: &Unit<'_>) -> &[String] { + &self.info(unit.kind).rustflags } - pub fn rustdocflags_args(&self, unit: &Unit<'_>) -> CargoResult> { - env_args( - self.config, - &self.build_config.requested_target, - self.host_triple(), - self.info(unit.kind).cfg(), - unit.kind, - "RUSTDOCFLAGS", - ) + pub fn rustdocflags_args(&self, unit: &Unit<'_>) -> &[String] { + &self.info(unit.kind).rustdocflags } pub fn show_warnings(&self, pkg: PackageId) -> bool { @@ -292,124 +277,3 @@ impl TargetConfig { Ok(ret) } } - -/// Acquire extra flags to pass to the compiler from various locations. -/// -/// The locations are: -/// -/// - the `RUSTFLAGS` environment variable -/// -/// then if this was not found -/// -/// - `target.*.rustflags` from the manifest (Cargo.toml) -/// - `target.cfg(..).rustflags` from the manifest -/// -/// then if neither of these were found -/// -/// - `build.rustflags` from the manifest -/// -/// Note that if a `target` is specified, no args will be passed to host code (plugins, build -/// scripts, ...), even if it is the same as the target. -fn env_args( - config: &Config, - requested_target: &Option, - host_triple: &str, - target_cfg: Option<&[Cfg]>, - kind: Kind, - name: &str, -) -> CargoResult> { - // We *want* to apply RUSTFLAGS only to builds for the - // requested target architecture, and not to things like build - // scripts and plugins, which may be for an entirely different - // architecture. Cargo's present architecture makes it quite - // hard to only apply flags to things that are not build - // scripts and plugins though, so we do something more hacky - // instead to avoid applying the same RUSTFLAGS to multiple targets - // arches: - // - // 1) If --target is not specified we just apply RUSTFLAGS to - // all builds; they are all going to have the same target. - // - // 2) If --target *is* specified then we only apply RUSTFLAGS - // to compilation units with the Target kind, which indicates - // it was chosen by the --target flag. - // - // This means that, e.g., even if the specified --target is the - // same as the host, build scripts in plugins won't get - // RUSTFLAGS. - let compiling_with_target = requested_target.is_some(); - let is_target_kind = kind == Kind::Target; - - if compiling_with_target && !is_target_kind { - // This is probably a build script or plugin and we're - // compiling with --target. In this scenario there are - // no rustflags we can apply. - return Ok(Vec::new()); - } - - // First try RUSTFLAGS from the environment - if let Ok(a) = env::var(name) { - let args = a - .split(' ') - .map(str::trim) - .filter(|s| !s.is_empty()) - .map(str::to_string); - return Ok(args.collect()); - } - - let mut rustflags = Vec::new(); - - let name = name - .chars() - .flat_map(|c| c.to_lowercase()) - .collect::(); - // Then the target.*.rustflags value... - let target = requested_target - .as_ref() - .map(|s| s.as_str()) - .unwrap_or(host_triple); - let key = format!("target.{}.{}", target, name); - if let Some(args) = config.get_list_or_split_string(&key)? { - let args = args.val.into_iter(); - rustflags.extend(args); - } - // ...including target.'cfg(...)'.rustflags - if let Some(target_cfg) = target_cfg { - if let Some(table) = config.get_table("target")? { - let cfgs = table - .val - .keys() - .filter(|key| CfgExpr::matches_key(key, target_cfg)); - - // Note that we may have multiple matching `[target]` sections and - // because we're passing flags to the compiler this can affect - // cargo's caching and whether it rebuilds. Ensure a deterministic - // ordering through sorting for now. We may perhaps one day wish to - // ensure a deterministic ordering via the order keys were defined - // in files perhaps. - let mut cfgs = cfgs.collect::>(); - cfgs.sort(); - - for n in cfgs { - let key = format!("target.{}.{}", n, name); - if let Some(args) = config.get_list_or_split_string(&key)? { - let args = args.val.into_iter(); - rustflags.extend(args); - } - } - } - } - - if !rustflags.is_empty() { - return Ok(rustflags); - } - - // Then the `build.rustflags` value. - let key = format!("build.{}", name); - if let Some(args) = config.get_list_or_split_string(&key)? { - let args = args.val.into_iter(); - return Ok(args.collect()); - } - - Ok(Vec::new()) -} diff --git a/src/cargo/core/compiler/build_context/target_info.rs b/src/cargo/core/compiler/build_context/target_info.rs index 7ae50eab156..59748c5a4f8 100644 --- a/src/cargo/core/compiler/build_context/target_info.rs +++ b/src/cargo/core/compiler/build_context/target_info.rs @@ -1,11 +1,12 @@ use std::cell::RefCell; use std::collections::hash_map::{Entry, HashMap}; +use std::env; use std::path::PathBuf; use std::str::{self, FromStr}; -use super::env_args; use super::Kind; use crate::core::TargetKind; +use crate::util::CfgExpr; use crate::util::{CargoResult, CargoResultExt, Cfg, Config, ProcessBuilder, Rustc}; #[derive(Clone)] @@ -14,6 +15,8 @@ pub struct TargetInfo { crate_types: RefCell>>, cfg: Option>, pub sysroot_libdir: Option, + pub rustflags: Vec, + pub rustdocflags: Vec, } /// Type of each file generated by a Unit. @@ -136,7 +139,7 @@ impl TargetInfo { } let cfg = if has_cfg_and_sysroot { - Some(lines.map(Cfg::from_str).collect::>()?) + Some(lines.map(Cfg::from_str).collect::>>()?) } else { None }; @@ -144,8 +147,26 @@ impl TargetInfo { Ok(TargetInfo { crate_type_process: Some(crate_type_process), crate_types: RefCell::new(map), - cfg, sysroot_libdir, + // recalculate `rustflags` from above now that we have `cfg` + // information + rustflags: env_args( + config, + requested_target, + &rustc.host, + cfg.as_ref().map(|v| v.as_ref()), + kind, + "RUSTFLAGS", + )?, + rustdocflags: env_args( + config, + requested_target, + &rustc.host, + cfg.as_ref().map(|v| v.as_ref()), + kind, + "RUSTDOCFLAGS", + )?, + cfg, }) } @@ -289,3 +310,124 @@ fn parse_crate_type( Ok(Some((prefix.to_string(), suffix.to_string()))) } + +/// Acquire extra flags to pass to the compiler from various locations. +/// +/// The locations are: +/// +/// - the `RUSTFLAGS` environment variable +/// +/// then if this was not found +/// +/// - `target.*.rustflags` from the manifest (Cargo.toml) +/// - `target.cfg(..).rustflags` from the manifest +/// +/// then if neither of these were found +/// +/// - `build.rustflags` from the manifest +/// +/// Note that if a `target` is specified, no args will be passed to host code (plugins, build +/// scripts, ...), even if it is the same as the target. +fn env_args( + config: &Config, + requested_target: &Option, + host_triple: &str, + target_cfg: Option<&[Cfg]>, + kind: Kind, + name: &str, +) -> CargoResult> { + // We *want* to apply RUSTFLAGS only to builds for the + // requested target architecture, and not to things like build + // scripts and plugins, which may be for an entirely different + // architecture. Cargo's present architecture makes it quite + // hard to only apply flags to things that are not build + // scripts and plugins though, so we do something more hacky + // instead to avoid applying the same RUSTFLAGS to multiple targets + // arches: + // + // 1) If --target is not specified we just apply RUSTFLAGS to + // all builds; they are all going to have the same target. + // + // 2) If --target *is* specified then we only apply RUSTFLAGS + // to compilation units with the Target kind, which indicates + // it was chosen by the --target flag. + // + // This means that, e.g., even if the specified --target is the + // same as the host, build scripts in plugins won't get + // RUSTFLAGS. + let compiling_with_target = requested_target.is_some(); + let is_target_kind = kind == Kind::Target; + + if compiling_with_target && !is_target_kind { + // This is probably a build script or plugin and we're + // compiling with --target. In this scenario there are + // no rustflags we can apply. + return Ok(Vec::new()); + } + + // First try RUSTFLAGS from the environment + if let Ok(a) = env::var(name) { + let args = a + .split(' ') + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_string); + return Ok(args.collect()); + } + + let mut rustflags = Vec::new(); + + let name = name + .chars() + .flat_map(|c| c.to_lowercase()) + .collect::(); + // Then the target.*.rustflags value... + let target = requested_target + .as_ref() + .map(|s| s.as_str()) + .unwrap_or(host_triple); + let key = format!("target.{}.{}", target, name); + if let Some(args) = config.get_list_or_split_string(&key)? { + let args = args.val.into_iter(); + rustflags.extend(args); + } + // ...including target.'cfg(...)'.rustflags + if let Some(target_cfg) = target_cfg { + if let Some(table) = config.get_table("target")? { + let cfgs = table + .val + .keys() + .filter(|key| CfgExpr::matches_key(key, target_cfg)); + + // Note that we may have multiple matching `[target]` sections and + // because we're passing flags to the compiler this can affect + // cargo's caching and whether it rebuilds. Ensure a deterministic + // ordering through sorting for now. We may perhaps one day wish to + // ensure a deterministic ordering via the order keys were defined + // in files perhaps. + let mut cfgs = cfgs.collect::>(); + cfgs.sort(); + + for n in cfgs { + let key = format!("target.{}.{}", n, name); + if let Some(args) = config.get_list_or_split_string(&key)? { + let args = args.val.into_iter(); + rustflags.extend(args); + } + } + } + } + + if !rustflags.is_empty() { + return Ok(rustflags); + } + + // Then the `build.rustflags` value. + let key = format!("build.{}", name); + if let Some(args) = config.get_list_or_split_string(&key)? { + let args = args.val.into_iter(); + return Ok(args.collect()); + } + + Ok(Vec::new()) +} diff --git a/src/cargo/core/compiler/context/compilation_files.rs b/src/cargo/core/compiler/context/compilation_files.rs index a8bc75d40bb..02005e2059f 100644 --- a/src/cargo/core/compiler/context/compilation_files.rs +++ b/src/cargo/core/compiler/context/compilation_files.rs @@ -504,9 +504,9 @@ fn compute_metadata<'a, 'cfg>( // This helps when the target directory is a shared cache for projects with different cargo configs, // or if the user is experimenting with different rustflags manually. if unit.mode.is_doc() { - cx.bcx.rustdocflags_args(unit).ok().hash(&mut hasher); + cx.bcx.rustdocflags_args(unit).hash(&mut hasher); } else { - cx.bcx.rustflags_args(unit).ok().hash(&mut hasher); + cx.bcx.rustflags_args(unit).hash(&mut hasher); } // Artifacts compiled for the host should have a different metadata diff --git a/src/cargo/core/compiler/context/mod.rs b/src/cargo/core/compiler/context/mod.rs index 37fd7d28ad7..25bb4d35fbd 100644 --- a/src/cargo/core/compiler/context/mod.rs +++ b/src/cargo/core/compiler/context/mod.rs @@ -238,12 +238,12 @@ impl<'a, 'cfg> Context<'a, 'cfg> { .collect() }); } - let rustdocflags = self.bcx.rustdocflags_args(unit)?; + let rustdocflags = self.bcx.rustdocflags_args(unit); if !rustdocflags.is_empty() { self.compilation .rustdocflags .entry(unit.pkg.package_id()) - .or_insert(rustdocflags); + .or_insert(rustdocflags.to_vec()); } super::output_depinfo(&mut self, unit)?; diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 2586a06a295..e7a2b378542 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -1042,9 +1042,9 @@ fn calculate_normal<'a, 'cfg>( // hashed to take up less space on disk as we just need to know when things // change. let extra_flags = if unit.mode.is_doc() { - cx.bcx.rustdocflags_args(unit)? + cx.bcx.rustdocflags_args(unit) } else { - cx.bcx.rustflags_args(unit)? + cx.bcx.rustflags_args(unit) }; let profile_hash = util::hash_u64((&unit.profile, unit.mode, cx.bcx.extra_args_for(unit))); // Include metadata since it is exposed as environment variables. @@ -1065,7 +1065,7 @@ fn calculate_normal<'a, 'cfg>( local: Mutex::new(local), memoized_hash: Mutex::new(None), metadata, - rustflags: extra_flags, + rustflags: extra_flags.to_vec(), fs_status: FsStatus::Stale, outputs, }) diff --git a/src/cargo/core/compiler/mod.rs b/src/cargo/core/compiler/mod.rs index aea50fcb21b..0de3936f252 100644 --- a/src/cargo/core/compiler/mod.rs +++ b/src/cargo/core/compiler/mod.rs @@ -222,7 +222,7 @@ fn rustc<'a, 'cfg>( .with_extension("d"); let dep_info_loc = fingerprint::dep_info_loc(cx, unit); - rustc.args(&cx.bcx.rustflags_args(unit)?); + rustc.args(cx.bcx.rustflags_args(unit)); let json_messages = cx.bcx.build_config.json_messages(); let package_id = unit.pkg.package_id(); let target = unit.target.clone(); @@ -643,7 +643,7 @@ fn rustdoc<'a, 'cfg>(cx: &mut Context<'a, 'cfg>, unit: &Unit<'a>) -> CargoResult build_deps_args(&mut rustdoc, cx, unit)?; - rustdoc.args(&bcx.rustdocflags_args(unit)?); + rustdoc.args(bcx.rustdocflags_args(unit)); let name = unit.pkg.name().to_string(); let build_state = cx.build_state.clone(); From e45444665c99e3df468a00addee2ce3e239d003f Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 22 Apr 2019 15:06:37 -0700 Subject: [PATCH 07/10] Intern units for fast hashing This commit starts to intern `Unit` structures for a few reasons: * This primarily makes equality and hashing much faster. We have tons of hash lookups with units, and they were showing up quite high in profiles. It turns out `Unit` hashes a *ton* of data, and most of it is always redundant. To handle this they're all only hashed once now and hashing/equality are just pointer checks. * The size of `Unit` is now drastically reduced to just one pointer, so movement of units throughout the backend should be much more efficient. --- src/cargo/core/compiler/build_context/mod.rs | 7 +- .../compiler/build_context/target_info.rs | 2 +- src/cargo/core/compiler/build_plan.rs | 2 +- .../compiler/context/compilation_files.rs | 3 +- src/cargo/core/compiler/context/mod.rs | 49 +----- .../compiler/context/unit_dependencies.rs | 37 ++--- src/cargo/core/compiler/mod.rs | 6 +- src/cargo/core/compiler/unit.rs | 139 ++++++++++++++++++ src/cargo/ops/cargo_clean.rs | 35 ++--- src/cargo/ops/cargo_compile.rs | 68 ++++----- 10 files changed, 218 insertions(+), 130 deletions(-) create mode 100644 src/cargo/core/compiler/unit.rs diff --git a/src/cargo/core/compiler/build_context/mod.rs b/src/cargo/core/compiler/build_context/mod.rs index 07ac1d19e95..858f7ae9074 100644 --- a/src/cargo/core/compiler/build_context/mod.rs +++ b/src/cargo/core/compiler/build_context/mod.rs @@ -9,8 +9,8 @@ use crate::core::{Dependency, Workspace}; use crate::core::{PackageId, PackageSet, Resolve}; use crate::util::errors::CargoResult; use crate::util::{profile, Cfg, Config, Rustc}; - -use super::{BuildConfig, BuildOutput, Kind, Unit}; +use crate::core::compiler::{Unit, Kind, BuildConfig, BuildOutput}; +use crate::core::compiler::unit::UnitInterner; mod target_info; pub use self::target_info::{FileFlavor, TargetInfo}; @@ -37,6 +37,7 @@ pub struct BuildContext<'a, 'cfg: 'a> { pub target_config: TargetConfig, pub target_info: TargetInfo, pub host_info: TargetInfo, + pub units: &'a UnitInterner<'a>, } impl<'a, 'cfg> BuildContext<'a, 'cfg> { @@ -47,6 +48,7 @@ impl<'a, 'cfg> BuildContext<'a, 'cfg> { config: &'cfg Config, build_config: &'a BuildConfig, profiles: &'a Profiles, + units: &'a UnitInterner<'a>, extra_compiler_args: HashMap, Vec>, ) -> CargoResult> { let mut rustc = config.load_global_rustc(Some(ws))?; @@ -82,6 +84,7 @@ impl<'a, 'cfg> BuildContext<'a, 'cfg> { build_config, profiles, extra_compiler_args, + units, }) } diff --git a/src/cargo/core/compiler/build_context/target_info.rs b/src/cargo/core/compiler/build_context/target_info.rs index 59748c5a4f8..82e0d548fc7 100644 --- a/src/cargo/core/compiler/build_context/target_info.rs +++ b/src/cargo/core/compiler/build_context/target_info.rs @@ -4,7 +4,7 @@ use std::env; use std::path::PathBuf; use std::str::{self, FromStr}; -use super::Kind; +use crate::core::compiler::Kind; use crate::core::TargetKind; use crate::util::CfgExpr; use crate::util::{CargoResult, CargoResultExt, Cfg, Config, ProcessBuilder, Rustc}; diff --git a/src/cargo/core/compiler/build_plan.rs b/src/cargo/core/compiler/build_plan.rs index efdfe1153ec..cfdd1a01523 100644 --- a/src/cargo/core/compiler/build_plan.rs +++ b/src/cargo/core/compiler/build_plan.rs @@ -109,7 +109,7 @@ impl BuildPlan { } } - pub fn add(&mut self, cx: &Context<'_, '_>, unit: &Unit<'_>) -> CargoResult<()> { + pub fn add<'a>(&mut self, cx: &Context<'a, '_>, unit: &Unit<'a>) -> CargoResult<()> { let id = self.plan.invocations.len(); self.invocation_map.insert(unit.buildkey(), id); let deps = cx diff --git a/src/cargo/core/compiler/context/compilation_files.rs b/src/cargo/core/compiler/context/compilation_files.rs index 02005e2059f..217022f7132 100644 --- a/src/cargo/core/compiler/context/compilation_files.rs +++ b/src/cargo/core/compiler/context/compilation_files.rs @@ -8,7 +8,8 @@ use std::sync::Arc; use lazycell::LazyCell; use log::info; -use super::{BuildContext, Context, FileFlavor, Kind, Layout, Unit}; +use super::{BuildContext, Context, FileFlavor, Kind, Layout}; +use crate::core::compiler::Unit; use crate::core::{TargetKind, Workspace}; use crate::util::{self, CargoResult}; diff --git a/src/cargo/core/compiler/context/mod.rs b/src/cargo/core/compiler/context/mod.rs index 25bb4d35fbd..3eb20bb4bde 100644 --- a/src/cargo/core/compiler/context/mod.rs +++ b/src/cargo/core/compiler/context/mod.rs @@ -8,10 +8,10 @@ use std::sync::Arc; use jobserver::Client; use crate::core::compiler::compilation; -use crate::core::profiles::Profile; -use crate::core::{Package, PackageId, Resolve, Target}; +use crate::core::compiler::Unit; +use crate::core::{Package, PackageId, Resolve}; use crate::util::errors::{CargoResult, CargoResultExt}; -use crate::util::{internal, profile, short_hash, Config}; +use crate::util::{internal, profile, Config}; use super::build_plan::BuildPlan; use super::custom_build::{self, BuildDeps, BuildScripts, BuildState}; @@ -27,49 +27,6 @@ mod compilation_files; use self::compilation_files::CompilationFiles; pub use self::compilation_files::{Metadata, OutputFile}; -/// All information needed to define a unit. -/// -/// A unit is an object that has enough information so that cargo knows how to build it. -/// For example, if your package has dependencies, then every dependency will be built as a library -/// unit. If your package is a library, then it will be built as a library unit as well, or if it -/// is a binary with `main.rs`, then a binary will be output. There are also separate unit types -/// for `test`ing and `check`ing, amongst others. -/// -/// The unit also holds information about all possible metadata about the package in `pkg`. -/// -/// A unit needs to know extra information in addition to the type and root source file. For -/// example, it needs to know the target architecture (OS, chip arch etc.) and it needs to know -/// whether you want a debug or release build. There is enough information in this struct to figure -/// all that out. -#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug, PartialOrd, Ord)] -pub struct Unit<'a> { - /// Information about available targets, which files to include/exclude, etc. Basically stuff in - /// `Cargo.toml`. - pub pkg: &'a Package, - /// Information about the specific target to build, out of the possible targets in `pkg`. Not - /// to be confused with *target-triple* (or *target architecture* ...), the target arch for a - /// build. - pub target: &'a Target, - /// The profile contains information about *how* the build should be run, including debug - /// level, etc. - pub profile: Profile, - /// Whether this compilation unit is for the host or target architecture. - /// - /// For example, when - /// cross compiling and using a custom build script, the build script needs to be compiled for - /// the host architecture so the host rustc can use it (when compiling to the target - /// architecture). - pub kind: Kind, - /// The "mode" this unit is being compiled for. See [`CompileMode`] for more details. - pub mode: CompileMode, -} - -impl<'a> Unit<'a> { - pub fn buildkey(&self) -> String { - format!("{}-{}", self.pkg.name(), short_hash(self)) - } -} - pub struct Context<'a, 'cfg: 'a> { pub bcx: &'a BuildContext<'a, 'cfg>, pub compilation: Compilation<'cfg>, diff --git a/src/cargo/core/compiler/context/unit_dependencies.rs b/src/cargo/core/compiler/context/unit_dependencies.rs index 1be5a37cd9e..618c925b37a 100644 --- a/src/cargo/core/compiler/context/unit_dependencies.rs +++ b/src/cargo/core/compiler/context/unit_dependencies.rs @@ -20,7 +20,8 @@ use std::collections::{HashMap, HashSet}; use log::trace; -use super::{BuildContext, CompileMode, Kind, Unit}; +use super::{BuildContext, CompileMode, Kind}; +use crate::core::compiler::Unit; use crate::core::dependency::Kind as DepKind; use crate::core::package::Downloads; use crate::core::profiles::UnitFor; @@ -350,7 +351,7 @@ fn compute_deps_doc<'a, 'cfg, 'tmp>( fn maybe_lib<'a>( unit: &Unit<'a>, - bcx: &BuildContext<'_, '_>, + bcx: &BuildContext<'a, '_>, unit_for: UnitFor, ) -> Option<(Unit<'a>, UnitFor)> { unit.pkg.targets().iter().find(|t| t.linkable()).map(|t| { @@ -369,7 +370,7 @@ fn maybe_lib<'a>( /// build script. fn dep_build_script<'a>( unit: &Unit<'a>, - bcx: &BuildContext<'_, '_>, + bcx: &BuildContext<'a, '_>, ) -> Option<(Unit<'a>, UnitFor)> { unit.pkg .targets() @@ -378,16 +379,15 @@ fn dep_build_script<'a>( .map(|t| { // The profile stored in the Unit is the profile for the thing // the custom build script is running for. - ( - Unit { - pkg: unit.pkg, - target: t, - profile: bcx.profiles.get_profile_run_custom_build(&unit.profile), - kind: unit.kind, - mode: CompileMode::RunCustomBuild, - }, - UnitFor::new_build(), - ) + let unit = bcx.units.intern( + unit.pkg, + t, + bcx.profiles.get_profile_run_custom_build(&unit.profile), + unit.kind, + CompileMode::RunCustomBuild, + ); + + (unit, UnitFor::new_build()) }) } @@ -410,7 +410,7 @@ fn check_or_build_mode(mode: CompileMode, target: &Target) -> CompileMode { } fn new_unit<'a>( - bcx: &BuildContext<'_, '_>, + bcx: &BuildContext<'a, '_>, pkg: &'a Package, target: &'a Target, unit_for: UnitFor, @@ -424,13 +424,8 @@ fn new_unit<'a>( mode, bcx.build_config.release, ); - Unit { - pkg, - target, - profile, - kind, - mode, - } + + bcx.units.intern(pkg, target, profile, kind, mode) } /// Fill in missing dependencies for units of the `RunCustomBuild` diff --git a/src/cargo/core/compiler/mod.rs b/src/cargo/core/compiler/mod.rs index 0de3936f252..9c9f6533b92 100644 --- a/src/cargo/core/compiler/mod.rs +++ b/src/cargo/core/compiler/mod.rs @@ -9,6 +9,7 @@ mod job; mod job_queue; mod layout; mod output_depinfo; +mod unit; use std::env; use std::ffi::{OsStr, OsString}; @@ -26,13 +27,14 @@ pub use self::build_config::{BuildConfig, CompileMode, MessageFormat}; pub use self::build_context::{BuildContext, FileFlavor, TargetConfig, TargetInfo}; use self::build_plan::BuildPlan; pub use self::compilation::{Compilation, Doctest}; -pub use self::context::{Context, Unit}; +pub use self::context::Context; pub use self::custom_build::{BuildMap, BuildOutput, BuildScripts}; -use self::job::{Job, Work}; pub use self::job::Freshness; +use self::job::{Job, Work}; use self::job_queue::JobQueue; pub use self::layout::is_bad_artifact_name; use self::output_depinfo::output_depinfo; +pub use crate::core::compiler::unit::{Unit, UnitInterner}; use crate::core::manifest::TargetSourcePath; use crate::core::profiles::{Lto, PanicStrategy, Profile}; use crate::core::{PackageId, Target}; diff --git a/src/cargo/core/compiler/unit.rs b/src/cargo/core/compiler/unit.rs new file mode 100644 index 00000000000..3adc2b2ef03 --- /dev/null +++ b/src/cargo/core/compiler/unit.rs @@ -0,0 +1,139 @@ +use crate::core::compiler::{CompileMode, Kind}; +use crate::core::{profiles::Profile, Package, Target}; +use crate::util::hex::short_hash; +use std::cell::RefCell; +use std::collections::HashSet; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::ops::Deref; + +/// All information needed to define a unit. +/// +/// A unit is an object that has enough information so that cargo knows how to build it. +/// For example, if your package has dependencies, then every dependency will be built as a library +/// unit. If your package is a library, then it will be built as a library unit as well, or if it +/// is a binary with `main.rs`, then a binary will be output. There are also separate unit types +/// for `test`ing and `check`ing, amongst others. +/// +/// The unit also holds information about all possible metadata about the package in `pkg`. +/// +/// A unit needs to know extra information in addition to the type and root source file. For +/// example, it needs to know the target architecture (OS, chip arch etc.) and it needs to know +/// whether you want a debug or release build. There is enough information in this struct to figure +/// all that out. +#[derive(Clone, Copy, PartialOrd, Ord)] +pub struct Unit<'a> { + inner: &'a UnitInner<'a>, +} + +/// Internal fields of `Unit` which `Unit` will dereference to. +#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct UnitInner<'a> { + /// Information about available targets, which files to include/exclude, etc. Basically stuff in + /// `Cargo.toml`. + pub pkg: &'a Package, + /// Information about the specific target to build, out of the possible targets in `pkg`. Not + /// to be confused with *target-triple* (or *target architecture* ...), the target arch for a + /// build. + pub target: &'a Target, + /// The profile contains information about *how* the build should be run, including debug + /// level, etc. + pub profile: Profile, + /// Whether this compilation unit is for the host or target architecture. + /// + /// For example, when + /// cross compiling and using a custom build script, the build script needs to be compiled for + /// the host architecture so the host rustc can use it (when compiling to the target + /// architecture). + pub kind: Kind, + /// The "mode" this unit is being compiled for. See [`CompileMode`] for more details. + pub mode: CompileMode, +} + +impl<'a> Unit<'a> { + pub fn buildkey(&self) -> String { + format!("{}-{}", self.pkg.name(), short_hash(self)) + } +} + +// Just hash the pointer for fast hashing +impl<'a> Hash for Unit<'a> { + fn hash(&self, hasher: &mut H) { + (self.inner as *const UnitInner<'a>).hash(hasher) + } +} + +// Just equate the pointer since these are interned +impl<'a> PartialEq for Unit<'a> { + fn eq(&self, other: &Unit<'a>) -> bool { + self.inner as *const UnitInner<'a> == other.inner as *const UnitInner<'a> + } +} + +impl<'a> Eq for Unit<'a> {} + +impl<'a> Deref for Unit<'a> { + type Target = UnitInner<'a>; + + fn deref(&self) -> &UnitInner<'a> { + self.inner + } +} + +impl<'a> fmt::Debug for Unit<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Unit") + .field("pkg", &self.pkg) + .field("target", &self.target) + .field("profile", &self.profile) + .field("kind", &self.kind) + .field("mode", &self.mode) + .finish() + } +} + +pub struct UnitInterner<'a> { + state: RefCell>, +} + +struct InternerState<'a> { + cache: HashSet>>, +} + +impl<'a> UnitInterner<'a> { + pub fn new() -> UnitInterner<'a> { + UnitInterner { + state: RefCell::new(InternerState { + cache: HashSet::new(), + }), + } + } + + pub fn intern( + &'a self, + pkg: &'a Package, + target: &'a Target, + profile: Profile, + kind: Kind, + mode: CompileMode, + ) -> Unit<'a> { + let inner = self.intern_inner(&UnitInner { + pkg, + target, + profile, + kind, + mode, + }); + Unit { inner } + } + + fn intern_inner(&'a self, item: &UnitInner<'a>) -> &'a UnitInner<'a> { + let mut me = self.state.borrow_mut(); + if let Some(item) = me.cache.get(item) { + return unsafe { &*(&**item as *const UnitInner<'a>) }; + } + me.cache.insert(Box::new(item.clone())); + let item = me.cache.get(item).unwrap(); + return unsafe { &*(&**item as *const UnitInner<'a>) }; + } +} diff --git a/src/cargo/ops/cargo_clean.rs b/src/cargo/ops/cargo_clean.rs index 8dae82603bd..1a8a15cf0ef 100644 --- a/src/cargo/ops/cargo_clean.rs +++ b/src/cargo/ops/cargo_clean.rs @@ -2,7 +2,8 @@ use std::collections::HashMap; use std::fs; use std::path::Path; -use crate::core::compiler::{BuildConfig, BuildContext, CompileMode, Context, Kind, Unit}; +use crate::core::compiler::UnitInterner; +use crate::core::compiler::{BuildConfig, BuildContext, CompileMode, Context, Kind}; use crate::core::profiles::UnitFor; use crate::core::Workspace; use crate::ops; @@ -50,6 +51,19 @@ pub fn clean(ws: &Workspace<'_>, opts: &CleanOptions<'_>) -> CargoResult<()> { let (packages, resolve) = ops::resolve_ws(ws)?; let profiles = ws.profiles(); + let interner = UnitInterner::new(); + let mut build_config = BuildConfig::new(config, Some(1), &opts.target, CompileMode::Build)?; + build_config.release = opts.release; + let bcx = BuildContext::new( + ws, + &resolve, + &packages, + opts.config, + &build_config, + profiles, + &interner, + HashMap::new(), + )?; let mut units = Vec::new(); for spec in opts.spec.iter() { @@ -79,30 +93,13 @@ pub fn clean(ws: &Workspace<'_>, opts: &CleanOptions<'_>) -> CargoResult<()> { opts.release, ) }; - units.push(Unit { - pkg, - target, - profile, - kind: *kind, - mode: *mode, - }); + units.push(bcx.units.intern(pkg, target, profile, *kind, *mode)); } } } } } - let mut build_config = BuildConfig::new(config, Some(1), &opts.target, CompileMode::Build)?; - build_config.release = opts.release; - let bcx = BuildContext::new( - ws, - &resolve, - &packages, - opts.config, - &build_config, - profiles, - HashMap::new(), - )?; let mut cx = Context::new(config, &bcx)?; cx.prepare_units(None, &units)?; diff --git a/src/cargo/ops/cargo_compile.rs b/src/cargo/ops/cargo_compile.rs index dbc03005667..3d1719dfc34 100644 --- a/src/cargo/ops/cargo_compile.rs +++ b/src/cargo/ops/cargo_compile.rs @@ -25,10 +25,9 @@ use std::iter::FromIterator; use std::path::PathBuf; use std::sync::Arc; -use crate::core::compiler::{ - BuildConfig, BuildContext, Compilation, Context, DefaultExecutor, Executor, -}; +use crate::core::compiler::{BuildConfig, BuildContext, Compilation, Context}; use crate::core::compiler::{CompileMode, Kind, Unit}; +use crate::core::compiler::{DefaultExecutor, Executor, UnitInterner}; use crate::core::profiles::{Profiles, UnitFor}; use crate::core::resolver::{Method, Resolve}; use crate::core::{Package, Target}; @@ -349,6 +348,17 @@ pub fn compile_ws<'a>( let profiles = ws.profiles(); profiles.validate_packages(&mut config.shell(), &packages)?; + let interner = UnitInterner::new(); + let mut bcx = BuildContext::new( + ws, + &resolve_with_overrides, + &packages, + config, + build_config, + profiles, + &interner, + HashMap::new(), + )?; let units = generate_targets( ws, profiles, @@ -356,10 +366,9 @@ pub fn compile_ws<'a>( filter, default_arch_kind, &resolve_with_overrides, - build_config, + &bcx, )?; - let mut extra_compiler_args = HashMap::new(); if let Some(args) = extra_args { if units.len() != 1 { failure::bail!( @@ -369,27 +378,18 @@ pub fn compile_ws<'a>( extra_args_name ); } - extra_compiler_args.insert(units[0], args); + bcx.extra_compiler_args.insert(units[0], args); } if let Some(args) = local_rustdoc_args { for unit in &units { if unit.mode.is_doc() { - extra_compiler_args.insert(*unit, args.clone()); + bcx.extra_compiler_args.insert(*unit, args.clone()); } } } let ret = { let _p = profile::start("compiling"); - let bcx = BuildContext::new( - ws, - &resolve_with_overrides, - &packages, - config, - build_config, - profiles, - extra_compiler_args, - )?; let cx = Context::new(config, &bcx)?; cx.compile(&units, export_dir.clone(), exec)? }; @@ -581,11 +581,11 @@ fn generate_targets<'a>( filter: &CompileFilter, default_arch_kind: Kind, resolve: &Resolve, - build_config: &BuildConfig, + bcx: &BuildContext<'a, '_>, ) -> CargoResult>> { // Helper for creating a `Unit` struct. let new_unit = |pkg: &'a Package, target: &'a Target, target_mode: CompileMode| { - let unit_for = if build_config.mode.is_any_test() { + let unit_for = if bcx.build_config.mode.is_any_test() { // NOTE: the `UnitFor` here is subtle. If you have a profile // with `panic` set, the `panic` flag is cleared for // tests/benchmarks and their dependencies. If this @@ -650,15 +650,9 @@ fn generate_targets<'a>( ws.is_member(pkg), unit_for, target_mode, - build_config.release, + bcx.build_config.release, ); - Unit { - pkg, - target, - profile, - kind, - mode: target_mode, - } + bcx.units.intern(pkg, target, profile, kind, target_mode) }; // Create a list of proposed targets. @@ -669,14 +663,14 @@ fn generate_targets<'a>( required_features_filterable, } => { for pkg in packages { - let default = filter_default_targets(pkg.targets(), build_config.mode); + let default = filter_default_targets(pkg.targets(), bcx.build_config.mode); proposals.extend(default.into_iter().map(|target| Proposal { pkg, target, requires_features: !required_features_filterable, - mode: build_config.mode, + mode: bcx.build_config.mode, })); - if build_config.mode == CompileMode::Test { + if bcx.build_config.mode == CompileMode::Test { if let Some(t) = pkg .targets() .iter() @@ -702,9 +696,9 @@ fn generate_targets<'a>( } => { if *lib != LibRule::False { let mut libs = Vec::new(); - for proposal in filter_targets(packages, Target::is_lib, false, build_config.mode) { + for proposal in filter_targets(packages, Target::is_lib, false, bcx.build_config.mode) { let Proposal { target, pkg, .. } = proposal; - if build_config.mode == CompileMode::Doctest && !target.doctestable() { + if bcx.build_config.mode == CompileMode::Doctest && !target.doctestable() { ws.config().shell().warn(format!( "doc tests are not supported for crate type(s) `{}` in package `{}`", target.rustc_crate_types().join(", "), @@ -734,10 +728,10 @@ fn generate_targets<'a>( FilterRule::All => Target::tested, FilterRule::Just(_) => Target::is_test, }; - let test_mode = match build_config.mode { + let test_mode = match bcx.build_config.mode { CompileMode::Build => CompileMode::Test, CompileMode::Check { .. } => CompileMode::Check { test: true }, - _ => build_config.mode, + _ => bcx.build_config.mode, }; // If `--benches` was specified, add all targets that would be // generated by `cargo bench`. @@ -745,10 +739,10 @@ fn generate_targets<'a>( FilterRule::All => Target::benched, FilterRule::Just(_) => Target::is_bench, }; - let bench_mode = match build_config.mode { + let bench_mode = match bcx.build_config.mode { CompileMode::Build => CompileMode::Bench, CompileMode::Check { .. } => CompileMode::Check { test: true }, - _ => build_config.mode, + _ => bcx.build_config.mode, }; proposals.extend(list_rule_targets( @@ -756,14 +750,14 @@ fn generate_targets<'a>( bins, "bin", Target::is_bin, - build_config.mode, + bcx.build_config.mode, )?); proposals.extend(list_rule_targets( packages, examples, "example", Target::is_example, - build_config.mode, + bcx.build_config.mode, )?); proposals.extend(list_rule_targets( packages, From f660b3e464f33ff26d931b1c22fac81481ea03a6 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 22 Apr 2019 16:01:58 -0700 Subject: [PATCH 08/10] Turn `Workspace::is_member` into an `O(1)` query This commit moves it from a linear query which does a lot of hashing of `PathBuf` to an `O(1)` query that only hashes `PackageId`. This improves Servo's null build performance by 50ms or so, causing a number of functions to disappear from profiles. --- src/cargo/core/workspace.rs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/cargo/core/workspace.rs b/src/cargo/core/workspace.rs index 1b0c6bd0deb..7d71afde402 100644 --- a/src/cargo/core/workspace.rs +++ b/src/cargo/core/workspace.rs @@ -1,6 +1,6 @@ use std::cell::RefCell; use std::collections::hash_map::{Entry, HashMap}; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashSet}; use std::path::{Path, PathBuf}; use std::slice; @@ -10,7 +10,7 @@ use url::Url; use crate::core::profiles::Profiles; use crate::core::registry::PackageRegistry; -use crate::core::{Dependency, PackageIdSpec}; +use crate::core::{Dependency, PackageIdSpec, PackageId}; use crate::core::{EitherManifest, Package, SourceId, VirtualManifest}; use crate::ops; use crate::sources::PathSource; @@ -51,6 +51,7 @@ pub struct Workspace<'cfg> { // paths. The packages themselves can be looked up through the `packages` // set above. members: Vec, + member_ids: HashSet, // The subset of `members` that are used by the // `build`, `check`, `test`, and `bench` subcommands @@ -148,6 +149,7 @@ impl<'cfg> Workspace<'cfg> { root_manifest: None, target_dir, members: Vec::new(), + member_ids: HashSet::new(), default_members: Vec::new(), is_ephemeral: false, require_optional_deps: true, @@ -185,6 +187,7 @@ impl<'cfg> Workspace<'cfg> { root_manifest: None, target_dir: None, members: Vec::new(), + member_ids: HashSet::new(), default_members: Vec::new(), is_ephemeral: true, require_optional_deps, @@ -193,6 +196,7 @@ impl<'cfg> Workspace<'cfg> { }; { let key = ws.current_manifest.parent().unwrap(); + let id = package.package_id(); let package = MaybePackage::Package(package); ws.packages.packages.insert(key.to_path_buf(), package); ws.target_dir = if let Some(dir) = target_dir { @@ -201,6 +205,7 @@ impl<'cfg> Workspace<'cfg> { ws.config.target_dir()? }; ws.members.push(ws.current_manifest.clone()); + ws.member_ids.insert(id); ws.default_members.push(ws.current_manifest.clone()); } Ok(ws) @@ -315,7 +320,7 @@ impl<'cfg> Workspace<'cfg> { /// Returns true if the package is a member of the workspace. pub fn is_member(&self, pkg: &Package) -> bool { - self.members().any(|p| p == pkg) + self.member_ids.contains(&pkg.package_id()) } pub fn is_ephemeral(&self) -> bool { @@ -430,6 +435,10 @@ impl<'cfg> Workspace<'cfg> { debug!("find_members - only me as a member"); self.members.push(self.current_manifest.clone()); self.default_members.push(self.current_manifest.clone()); + if let Ok(pkg) = self.current() { + let id = pkg.package_id(); + self.member_ids.insert(id); + } return Ok(()); } }; @@ -515,6 +524,7 @@ impl<'cfg> Workspace<'cfg> { MaybePackage::Package(ref p) => p, MaybePackage::Virtual(_) => return Ok(()), }; + self.member_ids.insert(pkg.package_id()); pkg.dependencies() .iter() .map(|d| d.source_id()) From d274fba070b10b26c96598af2792ef31aba40b87 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 22 Apr 2019 16:08:19 -0700 Subject: [PATCH 09/10] Hoist a workspace membership check out of a loop This commit moves a linear scan which happens once-per-each-dependency to an O(1) lookup which happens only once for each package. This removes another 30ms or so from a null build in Servo. --- src/cargo/ops/resolve.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/cargo/ops/resolve.rs b/src/cargo/ops/resolve.rs index 352a9ad824f..0b415518bc5 100644 --- a/src/cargo/ops/resolve.rs +++ b/src/cargo/ops/resolve.rs @@ -517,6 +517,7 @@ fn register_previous_locks( if !visited.insert(member.package_id()) { continue; } + let is_ws_member = ws.is_member(&member); for dep in member.dependencies() { // If this dependency didn't match anything special then we may want // to poison the source as it may have been added. If this path @@ -529,11 +530,7 @@ fn register_previous_locks( // non-workspace member and then simultaneously editing the // dependency on that crate to enable the feature. For now, // this bug is better than the always-updating registry though. - if !ws - .members() - .any(|pkg| pkg.package_id() == member.package_id()) - && (dep.is_optional() || !dep.is_transitive()) - { + if !is_ws_member && (dep.is_optional() || !dep.is_transitive()) { continue; } From 32269f4a34a0a25c4e041807c49bee16e99e9b9c Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 23 Apr 2019 14:58:01 -0700 Subject: [PATCH 10/10] Document unsafety in `UnitInterner` --- src/cargo/core/compiler/unit.rs | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/cargo/core/compiler/unit.rs b/src/cargo/core/compiler/unit.rs index 3adc2b2ef03..bf7d6f0871c 100644 --- a/src/cargo/core/compiler/unit.rs +++ b/src/cargo/core/compiler/unit.rs @@ -92,6 +92,13 @@ impl<'a> fmt::Debug for Unit<'a> { } } +/// A small structure used to "intern" `Unit` values. +/// +/// A `Unit` is just a thin pointer to an internal `UnitInner`. This is done to +/// ensure that `Unit` itself is quite small as well as enabling a very +/// efficient hash/equality implementation for `Unit`. All units are +/// manufactured through an interner which guarantees that each equivalent value +/// is only produced once. pub struct UnitInterner<'a> { state: RefCell>, } @@ -101,6 +108,7 @@ struct InternerState<'a> { } impl<'a> UnitInterner<'a> { + /// Creates a new blank interner pub fn new() -> UnitInterner<'a> { UnitInterner { state: RefCell::new(InternerState { @@ -109,6 +117,9 @@ impl<'a> UnitInterner<'a> { } } + /// Creates a new `unit` from its components. The returned `Unit`'s fields + /// will all be equivalent to the provided arguments, although they may not + /// be the exact same instance. pub fn intern( &'a self, pkg: &'a Package, @@ -127,9 +138,30 @@ impl<'a> UnitInterner<'a> { Unit { inner } } + // Ok so interning here is a little unsafe, hence the usage of `unsafe` + // internally. The primary issue here is that we've got an internal cache of + // `UnitInner` instances added so far, but we may need to mutate it to add + // it, and the mutation for an interner happens behind a shared borrow. + // + // Our goal though is to escape the lifetime `borrow_mut` to the same + // lifetime as the borrowed passed into this function. That's where `unsafe` + // comes into play. What we're subverting here is resizing internally in the + // `HashSet` as well as overwriting previous keys in the `HashSet`. + // + // As a result we store `Box` internally to have an extra layer + // of indirection. That way `*const UnitInner` is a stable address that + // doesn't change with `HashSet` resizing. Furthermore we're careful to + // never overwrite an entry once inserted. + // + // Ideally we'd use an off-the-shelf interner from crates.io which avoids a + // small amount of unsafety here, but at the time this was written one + // wasn't obviously available. fn intern_inner(&'a self, item: &UnitInner<'a>) -> &'a UnitInner<'a> { let mut me = self.state.borrow_mut(); if let Some(item) = me.cache.get(item) { + // note that `item` has type `&Box`. Use `&**` to + // convert that to `&UnitInner<'a>`, then do some trickery to extend + // the lifetime to the `'a` on the function here. return unsafe { &*(&**item as *const UnitInner<'a>) }; } me.cache.insert(Box::new(item.clone()));