diff --git a/Cargo.toml b/Cargo.toml index f1d1b61f26a..1f1d0277ef9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,6 +45,7 @@ lazycell = "1.2.0" libc = "0.2" log = "0.4.6" libgit2-sys = "0.12.14" +md-5 = "0.9" memchr = "2.1.3" num_cpus = "1.0" opener = "0.4" @@ -55,6 +56,8 @@ semver = { version = "0.10", features = ["serde"] } serde = { version = "1.0.82", features = ["derive"] } serde_ignored = "0.1.0" serde_json = { version = "1.0.30", features = ["raw_value"] } +sha-1 = "0.9" +sha2 = "0.9" shell-escape = "0.1.4" strip-ansi-escapes = "0.1.0" tar = { version = "0.4.26", default-features = false } @@ -68,12 +71,18 @@ clap = "2.31.2" unicode-width = "0.1.5" openssl = { version = '0.10.11', optional = true } im-rc = "15.0.0" +ar="0.8" # A noop dependency that changes in the Rust repository, it's a bit of a hack. # See the `src/tools/rustc-workspace-hack/README.md` file in `rust-lang/rust` # for more information. rustc-workspace-hack = "1.0.0" +[dependencies.object] +version = "0.20.0" +default-features = false +features = ['read_core', 'elf', 'macho', 'pe', 'unaligned'] + [target.'cfg(target_os = "macos")'.dependencies] core-foundation = { version = "0.9.0", features = ["mac_os_10_7_support"] } diff --git a/src/cargo/core/compiler/content_hash.rs b/src/cargo/core/compiler/content_hash.rs new file mode 100644 index 00000000000..76b5f104f21 --- /dev/null +++ b/src/cargo/core/compiler/content_hash.rs @@ -0,0 +1,327 @@ +use std::fmt; +use std::fs; +use std::io::{self, Read}; +use std::num::NonZeroU64; +use std::path::Path; +use std::path::PathBuf; +use std::str::FromStr; + +use filetime::FileTime; +use log::debug; +use md5::{Digest, Md5}; +use object::Object; +use serde; +use serde::{Deserialize, Serialize}; +use sha1::Sha1; +use sha2::Sha256; + +/// A file location with identifying properties: size and hash. +#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Debug, Hash, Serialize, Deserialize)] +pub struct Fileprint { + pub path: PathBuf, //TODO is this field needed on here? + pub size: Option, + pub hash: Option, +} + +impl Fileprint { + pub(crate) fn from_md5(path: PathBuf) -> Self { + let size = CurrentFileprint::calc_size(&path); + let hash = CurrentFileprint::calc_hash(&path, FileHashAlgorithm::Md5); + Self { path, size, hash } + } +} + +#[derive(Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Debug, Serialize, Deserialize, Hash)] +pub enum FileHashAlgorithm { + /// Svh is embedded as a symbol or for rmeta is in the .rmeta filename inside a .rlib. + Svh, + Md5, + Sha1, + Sha256, +} + +impl FromStr for FileHashAlgorithm { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + match s { + "md5" => Ok(FileHashAlgorithm::Md5), + "svh" => Ok(FileHashAlgorithm::Svh), + "sha1" => Ok(FileHashAlgorithm::Sha1), + "sha256" => Ok(FileHashAlgorithm::Sha256), + _ => Err(anyhow::Error::msg("Unknown hash type")), + } + } +} + +impl std::fmt::Display for FileHashAlgorithm { + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> { + match self { + Self::Md5 => fmt.write_fmt(format_args!("md5"))?, + Self::Svh => fmt.write_fmt(format_args!("svh"))?, + Self::Sha1 => fmt.write_fmt(format_args!("sha1"))?, + Self::Sha256 => fmt.write_fmt(format_args!("sha256"))?, + }; + Ok(()) + } +} + +// While source files can't currently be > 4Gb, bin files could be. +pub type FileSize = NonZeroU64; + +#[derive(Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash, Serialize, Deserialize)] +pub struct FileHash { + kind: FileHashAlgorithm, + // arrays > 32 are currently hard work so broken in twain. + hash_front: [u8; 32], + hash_back: [u8; 32], +} + +impl FileHash { + pub fn from_hex_rev(kind: FileHashAlgorithm, hash: &str) -> Option { + let mut decoded = hex::decode(hash).ok()?; + decoded.reverse(); // The slice is stored as little endien. + Some(Self::from_slice(kind, &decoded[..])) + } + + pub fn from_hex(kind: FileHashAlgorithm, hash: &str) -> Option { + let decoded = hex::decode(hash).ok()?; + Some(Self::from_slice(kind, &decoded[..])) + } + + pub fn from_slice_rev(kind: FileHashAlgorithm, hash: &[u8]) -> FileHash { + let mut v = hash.to_vec(); + v.reverse(); + Self::from_slice(kind, &v) + } + + pub fn from_slice(kind: FileHashAlgorithm, hash: &[u8]) -> FileHash { + let mut result = FileHash { + kind, + hash_front: [0u8; 32], + hash_back: [0u8; 32], + }; + let len = hash.len(); + let front_len = std::cmp::min(len, 32); + (&mut result.hash_front[..front_len]).copy_from_slice(&hash[..front_len]); + if len > 32 { + let back_len = std::cmp::min(len, 64); + (&mut result.hash_back[..back_len - 32]).copy_from_slice(&hash[32..back_len]); + } + result + } + + pub fn write_to_vec(&self, vec: &mut Vec) { + vec.push(match self.kind { + FileHashAlgorithm::Md5 => 1, + FileHashAlgorithm::Sha1 => 2, + FileHashAlgorithm::Sha256 => 3, + FileHashAlgorithm::Svh => 4, + }); + vec.extend_from_slice(&self.hash_front[..]); + vec.extend_from_slice(&self.hash_back[..]); + } +} + +impl fmt::Display for FileHash { + fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> Result<(), fmt::Error> { + write!( + formatter, + "{}:{}{}", + self.kind, + hex::encode(self.hash_front), + hex::encode(self.hash_back) + ) + } +} + +fn get_svh_from_ar(reader: R) -> Option { + let mut ar = ar::Archive::new(reader); + while let Some(file) = ar.next_entry() { + match file { + Ok(file) => { + let s = String::from_utf8_lossy(&file.header().identifier()); + if s.ends_with(".rmeta") { + if let Some(index) = s.rfind('-') { + return FileHash::from_hex_rev( + FileHashAlgorithm::Svh, + &s[index + 1..(s.len() - ".rmeta".len())], + ); + } + } + } + Err(err) => debug!("Error reading ar: {}", err), + } + } + debug!("HASH svh not found in archive file."); + None +} + +// While this looks expensive, this is only invoked for dylibs +// with an incorrect timestamp the file is the expected size. +fn get_svh_from_object_file(mut reader: R) -> Option { + let mut data = vec![]; + reader.read_to_end(&mut data).ok()?; + let obj = object::read::File::parse(&data).ok()?; + + for (_idx, sym) in obj.symbols() { + if let Some(name) = sym.name() { + if name.starts_with("_rust_svh") { + if let Some(index) = name.rfind('_') { + return FileHash::from_hex_rev(FileHashAlgorithm::Svh, &name[index + 1..]); + } + } + } + } + debug!("HASH svh not found in object file"); + None +} + +fn get_svh_from_rmeta_file(mut reader: R) -> Option { + let mut data = Vec::with_capacity(128); + data.resize(128, 0); + reader.read_exact(&mut data).ok()?; + parse_svh(&data) +} + +fn parse_svh(data: &[u8]) -> Option { + debug!("HASHXX {:?}", data); + const METADATA_VERSION_LOC: usize = 7; + + if data[METADATA_VERSION_LOC] < 6 { + debug!("svh not available as compiler not recent enough."); + return None; + } + let rust_svh_len_pos = 12; + assert_eq!(data[rust_svh_len_pos], 64_u8); + let data = &data[rust_svh_len_pos + 1..]; + Some(FileHash::from_slice(FileHashAlgorithm::Svh, &data[..64])) +} + +/// Cache of file properties that we know to be true. +pub struct CurrentFileprint { + pub(crate) mtime: FileTime, + /// This will be None if not yet looked up. + size: Option, + /// This will be None if not yet calculated for this file. + hash: Option, +} + +impl CurrentFileprint { + pub(crate) fn new(mtime: FileTime) -> Self { + CurrentFileprint { + mtime, + size: None, + hash: None, + } + } + + pub(crate) fn size(&mut self, file: &Path) -> Option<&FileSize> { + if self.size.is_none() { + self.size = Self::calc_size(file); + } + self.size.as_ref() + } + + pub(crate) fn calc_size(file: &Path) -> Option { + std::fs::metadata(file) + .map(|metadata| NonZeroU64::new(metadata.len())) + .ok() + .flatten() + } + + pub(crate) fn file_hash(&mut self, path: &Path, reference: &FileHash) -> Option<&FileHash> { + if self.hash.is_none() { + self.hash = Self::calc_hash(path, reference.kind); + } + self.hash.as_ref() + } + + fn invoke_digest(reader: &mut R, kind: FileHashAlgorithm) -> Option + where + D: Digest, + R: Read, + { + let mut hasher = D::new(); + let mut buffer = [0; 1024]; + loop { + let count = reader.read(&mut buffer).ok()?; + if count == 0 { + break; + } + hasher.update(&buffer[..count]); + } + Some(FileHash::from_slice_rev(kind, &hasher.finalize()[..])) + } + + pub(crate) fn calc_hash(path: &Path, algo: FileHashAlgorithm) -> Option { + if let Ok(file) = fs::File::open(path) { + let mut reader: io::BufReader = io::BufReader::new(file); + + match algo { + FileHashAlgorithm::Md5 => Self::invoke_digest::(&mut reader, algo), + FileHashAlgorithm::Sha1 => Self::invoke_digest::(&mut reader, algo), + FileHashAlgorithm::Sha256 => Self::invoke_digest::(&mut reader, algo), + FileHashAlgorithm::Svh => { + if path.extension() == Some(std::ffi::OsStr::new("rlib")) { + get_svh_from_ar(reader) + } else if path.extension() == Some(std::ffi::OsStr::new("rmeta")) { + get_svh_from_rmeta_file(reader) + } else { + get_svh_from_object_file(reader) + } + } + } + } else { + debug!("HASH failed to open path {:?}", path); + None + } + } +} + +#[cfg(test)] +mod test { + use super::{parse_svh, FileHash, FileHashAlgorithm}; + + #[test] + fn test_no_svh_below_metadata_version_6() { + let vec: Vec = vec![ + 114, 117, 115, 116, 0, 0, 0, 5, 0, 13, 201, 29, 16, 114, 117, 115, 116, 99, 32, 49, 46, + 52, 57, 46, 48, 45, 100, 101, 118, 16, 49, 100, 54, 102, 97, 101, 54, 56, 102, 54, 100, + 52, 99, 99, 98, 102, 3, 115, 116, 100, 241, 202, 128, 159, 207, 146, 173, 243, 204, 1, + 0, 2, 17, 45, 48, 55, 56, 97, 54, 56, 51, 101, 99, 57, 57, 55, 50, 48, 53, 50, 4, 99, + 111, 114, 101, 190, 159, 241, 243, 142, 194, 224, 233, 82, 0, 2, 17, 45, 51, 101, 97, + 54, 98, 97, 57, 97, 57, 56, 99, 50, 57, 51, 54, 100, 17, 99, 111, 109, 112, 105, 108, + 101, 114, 95, 98, 117, 105, 108, + ]; + // r u s t / metadata version | base | r u s t c ' ' 1 . 4 9 . 0 - d e v |size| svh--> + assert!(parse_svh(&vec).is_none()); + } + + #[test] //TODO update the bits so svh is before rust version! + fn test_svh_in_metadata_version_6() { + let vec: Vec = vec![ + 114, 117, 115, 116, 0, 0, 0, 6, 0, 17, 73, 215, 64, 29, 94, 138, 62, 252, 69, 252, 224, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, + 114, 117, 115, 116, 99, 32, 49, 46, 53, 48, 46, 48, 45, 100, 101, 118, 3, 115, 116, + 100, 220, 173, 135, 163, 173, 242, 162, 182, 228, 1, 0, 2, 17, 45, 48, 55, 56, 97, 54, + 56, 51, 101, 99, 57, 57, 55, 50, 48, 53, 50, + ]; + // r u s t / metadata version | base | size=64 | svh | sizee_of_version | r u s t c ' ' 1 . 4 9 . 0 - d e v | base_pointer_points_here + assert_eq!( + parse_svh(&vec), + FileHash::from_hex(FileHashAlgorithm::Svh, "1d5e8a3efc45fce0") + ); + } + + #[test] + fn file_hash() { + let from_str = FileHash::from_hex(FileHashAlgorithm::Svh, "0102030405060708"); + let from_slice = Some(FileHash::from_slice( + FileHashAlgorithm::Svh, + &[1, 2, 3, 4, 5, 6, 7, 8], + )); + assert_eq!(from_str, from_slice); + } +} diff --git a/src/cargo/core/compiler/context/mod.rs b/src/cargo/core/compiler/context/mod.rs index dc09f7df8f4..03442b4aa2f 100644 --- a/src/cargo/core/compiler/context/mod.rs +++ b/src/cargo/core/compiler/context/mod.rs @@ -2,9 +2,10 @@ use std::collections::{BTreeSet, HashMap, HashSet}; use std::path::PathBuf; use std::sync::{Arc, Mutex}; -use filetime::FileTime; use jobserver::Client; +use crate::core::compiler::content_hash::CurrentFileprint; +use crate::core::compiler::fingerprint::RustcDepInfo; use crate::core::compiler::{self, compilation, Unit}; use crate::core::PackageId; use crate::util::errors::{CargoResult, CargoResultExt}; @@ -38,7 +39,9 @@ pub struct Context<'a, 'cfg> { /// Fingerprints used to detect if a unit is out-of-date. pub fingerprints: HashMap>, /// Cache of file mtimes to reduce filesystem hits. - pub mtime_cache: HashMap, + pub mtime_cache: HashMap, + /// Cache of dep_info to reduce filesystem hits. + pub dep_info_cache: HashMap, /// A set used to track which units have been compiled. /// A unit may appear in the job graph multiple times as a dependency of /// multiple packages, but it only needs to run once. @@ -107,6 +110,7 @@ impl<'a, 'cfg> Context<'a, 'cfg> { build_script_outputs: Arc::new(Mutex::new(BuildScriptOutputs::default())), fingerprints: HashMap::new(), mtime_cache: HashMap::new(), + dep_info_cache: HashMap::new(), compiled: HashSet::new(), build_scripts: HashMap::new(), build_explicit_deps: HashMap::new(), diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index bc3e43b624c..aa437ae6ce2 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -312,29 +312,31 @@ //! See the `A-rebuild-detection` flag on the issue tracker for more: //! -use std::collections::hash_map::{Entry, HashMap}; +use std::collections::hash_map::HashMap; use std::convert::TryInto; use std::env; use std::hash::{self, Hasher}; +use std::num::NonZeroU64; use std::path::{Path, PathBuf}; -use std::str; +use std::str::{self, FromStr}; use std::sync::{Arc, Mutex}; use std::time::SystemTime; use anyhow::{bail, format_err}; use filetime::FileTime; -use log::{debug, info}; +use log::{debug, info, warn}; +use serde; use serde::de; use serde::ser; use serde::{Deserialize, Serialize}; +use crate::core::compiler::content_hash::*; use crate::core::compiler::unit_graph::UnitDep; use crate::core::Package; use crate::util; use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::interning::InternedString; -use crate::util::paths; -use crate::util::{internal, profile, ProcessBuilder}; +use crate::util::{internal, paths, profile, Config, ProcessBuilder}; use super::custom_build::BuildDeps; use super::job::{ @@ -555,7 +557,7 @@ pub struct Fingerprint { /// fingerprint is out of date if this is missing, or if previous /// fingerprints output files are regenerated and look newer than this one. #[serde(skip)] - outputs: Vec, + outputs: Vec, } /// Indication of the status on the filesystem for a particular unit. @@ -671,7 +673,7 @@ enum LocalFingerprint { /// `output`, otherwise we need to recompile. RerunIfChanged { output: PathBuf, - paths: Vec, + paths: Vec, }, /// This represents a single `rerun-if-env-changed` annotation printed by a @@ -683,12 +685,24 @@ enum LocalFingerprint { enum StaleItem { MissingFile(PathBuf), - ChangedFile { + ChangedFileTime { reference: PathBuf, reference_mtime: FileTime, stale: PathBuf, stale_mtime: FileTime, }, + ChangedFileSize { + reference: PathBuf, + reference_size: FileSize, + stale: PathBuf, + stale_size: Option, + }, + ChangedFileHash { + reference: PathBuf, + reference_hash: FileHash, + stale: PathBuf, + stale_hash: Option, + }, ChangedEnv { var: String, previous: Option, @@ -710,7 +724,9 @@ impl LocalFingerprint { /// is where we'll find whether files have actually changed fn find_stale_item( &self, - mtime_cache: &mut HashMap, + config: &Config, + mtime_cache: &mut HashMap, + dep_info_cache: &mut HashMap, pkg_root: &Path, target_root: &Path, ) -> CargoResult> { @@ -723,7 +739,13 @@ impl LocalFingerprint { // rustc. LocalFingerprint::CheckDepInfo { dep_info } => { let dep_info = target_root.join(dep_info); - let info = match parse_dep_info(pkg_root, target_root, &dep_info)? { + if !dep_info_cache.contains_key(&dep_info) { + if let Some(rustc_dep_info) = parse_dep_info(pkg_root, target_root, &dep_info)? + { + dep_info_cache.insert(dep_info.clone(), rustc_dep_info); + } + } + let info = match dep_info_cache.get(&dep_info) { Some(info) => info, None => return Ok(Some(StaleItem::MissingFile(dep_info))), }; @@ -738,16 +760,27 @@ impl LocalFingerprint { current, })); } - Ok(find_stale_file(mtime_cache, &dep_info, info.files.iter())) + Ok(find_stale_file(config, mtime_cache, &dep_info, &info.files)) } // We need to verify that no paths listed in `paths` are newer than // the `output` path itself, or the last time the build script ran. - LocalFingerprint::RerunIfChanged { output, paths } => Ok(find_stale_file( - mtime_cache, - &target_root.join(output), - paths.iter().map(|p| pkg_root.join(p)), - )), + LocalFingerprint::RerunIfChanged { output, paths } => { + let c: Vec<_> = paths + .iter() + .map(|f| { + let mut f = f.clone(); + f.path = pkg_root.join(f.path); + f + }) + .collect(); + Ok(find_stale_file( + config, + mtime_cache, + &target_root.join(output), + c.as_slice(), + )) + } // These have no dependencies on the filesystem, and their values // are included natively in the `Fingerprint` hash so nothing @@ -962,9 +995,12 @@ impl Fingerprint { /// it to `UpToDate` if it can. fn check_filesystem( &mut self, - mtime_cache: &mut HashMap, + config: &Config, + mtime_cache: &mut HashMap, + dep_info_cache: &mut HashMap, pkg_root: &Path, target_root: &Path, + dep_info_loc: PathBuf, ) -> CargoResult<()> { assert!(!self.fs_status.up_to_date()); @@ -974,18 +1010,18 @@ impl Fingerprint { // afterwards based on the `mtime_on_use` flag. Afterwards we want the // minimum mtime as it's the one we'll be comparing to inputs and // dependencies. - for output in self.outputs.iter() { - let mtime = match paths::mtime(output) { + for Fileprint { path, .. } in self.outputs.iter() { + let mtime = match paths::mtime(path) { Ok(mtime) => mtime, // This path failed to report its `mtime`. It probably doesn't // exists, so leave ourselves as stale and bail out. Err(e) => { - debug!("failed to get mtime of {:?}: {}", output, e); + debug!("failed to get mtime of {:?}: {}", path, e); return Ok(()); } }; - assert!(mtimes.insert(output.clone(), mtime).is_none()); + assert!(mtimes.insert(path.clone(), mtime).is_none()); } let opt_max = mtimes.iter().max_by_key(|kv| kv.1); @@ -1005,6 +1041,9 @@ impl Fingerprint { pkg_root, max_path, max_mtime ); + let rmeta_ext = std::ffi::OsStr::new("rmeta"); + let output_dir = std::ffi::OsStr::new("output"); + for dep in self.deps.iter() { let dep_mtimes = match &dep.fingerprint.fs_status { FsStatus::UpToDate { mtimes } => mtimes, @@ -1039,15 +1078,164 @@ impl Fingerprint { // recompiled previously. We transitively become stale ourselves in // that case, so bail out. // - // Note that this comparison should probably be `>=`, not `>`, but + // Note that this comparison should probably be `>=`, not `>`, but // for a discussion of why it's `>` see the discussion about #5918 // below in `find_stale`. if dep_mtime > max_mtime { - info!( - "dependency on `{}` is newer than we are {} > {} {:?}", - dep.name, dep_mtime, max_mtime, pkg_root - ); - return Ok(()); + if config.cli_unstable().hash_tracking { + for (dep_in, dep_mtime) in dep_mtimes { + if dep.only_requires_rmeta && dep_in.extension() != Some(&rmeta_ext) { + continue; + } + + if dep_mtime > max_mtime { + let dep_info = dep_info_loc + .strip_prefix(&target_root) + .unwrap() + .to_path_buf(); + + let is_custom_build = dep_info + .to_str() + .unwrap() + .contains("dep-run-build-script-build-script-build"); + + if dep_path.file_name() == Some(&output_dir) && is_custom_build { + let stale = if let Some(Fileprint { + size: Some(size), + hash: Some(hash), + .. + }) = &dep + .fingerprint + .outputs + .iter() + .find(|Fileprint { path, .. }| path == dep_in) + { + CurrentFileprint::calc_size(dep_in) != Some(*size) + || CurrentFileprint::calc_hash( + dep_in, + FileHashAlgorithm::Md5, + ) + .as_ref() + != Some(hash) + } else { + true + }; + + debug!("build.rs output doesn't match previous hash {:?}", dep_in); + if stale { + return Ok(()); + } + } else { + let dep_info_file = if is_custom_build { + let mut ddep_info = PathBuf::new(); + for local_dep in (*dep.fingerprint.local.lock().unwrap()).iter() + { + if let LocalFingerprint::CheckDepInfo { dep_info } = + local_dep + { + ddep_info = dep_info.to_path_buf(); + } + } + target_root.join(&ddep_info).to_path_buf() + } else { + dep_info_loc.clone() + }; + + debug!("reading dep info file: {:?}", &dep_info_file); + + let rustc_dep_info = dep_info_cache.get(&dep_info_file); + if rustc_dep_info.is_none() { + let dep_result = + parse_dep_info(pkg_root, target_root, &dep_info_file); + + match dep_result { + Ok(dep) => { + if let Some(dep) = dep { + dep_info_cache.insert(dep_info_file.clone(), dep); + } else { + warn!("Dep info file could not be parsed"); + } + } + Err(err) => warn!("Error parsing dep info file {}", err), + } + } + + let mut stale = None; + if let Some(rustc_dep_info) = dep_info_cache.get(&dep_info_file) { + let ref_file = &rustc_dep_info + .files + .iter() + .find(|reference| *dep_in == reference.path); + if let Some(reference) = ref_file { + let mut file_facts = mtime_cache.get_mut(dep_in); + if file_facts.is_none() { + mtime_cache.insert( + dep_in.clone(), + CurrentFileprint::new(*dep_mtime), + ); + file_facts = mtime_cache.get_mut(dep_in); + } + let file_facts = file_facts.unwrap(); + + if let Some(current_size) = file_facts.size(dep_in) { + if Some(*current_size) != reference.size { + stale = Some(format!( + "File sizes don't match {:?} expected: {:?}", + current_size, reference.size + )); + } + } else { + stale = Some(format!( + "File sizes was not obtainable expected: {:?}", + reference.size + )); + } + + if stale.is_none() { + if let Some(reference_hash) = &reference.hash { + let current_hash = + file_facts.file_hash(dep_in, reference_hash); + + if let Some(file_facts_hash) = current_hash { + if reference_hash != file_facts_hash { + stale = Some(format!( + "Hash {:?} doesn't match expected: {:?}", + &file_facts_hash, &reference_hash + )); + } + } else { + stale = Some(format!( + "No hash found in the dep info file to compare to {:?}", + &reference.hash + )); + } + } else { + stale = + Some("No reference hash to compare to".into()); + } + } + } + } else { + stale = Some("HASH dep info file could not be found".into()); + } + if stale.is_some() { + info!( + "dependency on `{}` is newer than we are {} > {} {:?} {:?}", + dep.name, dep_mtime, max_mtime, pkg_root, dep_path + ); + info!("HASHMISS also {:?}", stale); + return Ok(()); + } + } + } + } + } else { + info!( + "dependency on `{}` is newer than we are {} > {} {:?}", + dep.name, dep_mtime, max_mtime, pkg_root + ); + return Ok(()); + } } } @@ -1056,7 +1244,9 @@ impl Fingerprint { // files for this package itself. If we do find something log a helpful // message and bail out so we stay stale. for local in self.local.get_mut().unwrap().iter() { - if let Some(item) = local.find_stale_item(mtime_cache, pkg_root, target_root)? { + if let Some(item) = + local.find_stale_item(config, mtime_cache, dep_info_cache, pkg_root, target_root)? + { item.log(); return Ok(()); } @@ -1180,16 +1370,36 @@ impl StaleItem { StaleItem::MissingFile(path) => { info!("stale: missing {:?}", path); } - StaleItem::ChangedFile { + StaleItem::ChangedFileTime { reference, reference_mtime, stale, stale_mtime, } => { - info!("stale: changed {:?}", stale); + info!("stale: time changed {:?}", stale); info!(" (vs) {:?}", reference); info!(" {:?} != {:?}", reference_mtime, stale_mtime); } + StaleItem::ChangedFileSize { + reference, + reference_size, + stale, + stale_size, + } => { + info!("stale: size changed {:?}", stale); + info!(" (vs) {:?}", reference); + info!(" {:?} != {:?}", reference_size, stale_size); + } + StaleItem::ChangedFileHash { + reference, + reference_hash, + stale, + stale_hash, + } => { + info!("stale: hash changed {:?}", stale); + info!(" (vs) {:?}", reference); + info!(" {:?} != {:?}", reference_hash, stale_hash); + } StaleItem::ChangedEnv { var, previous, @@ -1231,7 +1441,15 @@ fn calculate(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult, unit: &Unit) -> CargoResult, unit: &Unit) -> CargoRes local: Mutex::new(local), rustc: util::hash_u64(&cx.bcx.rustc().verbose_version), deps, - outputs: if overridden { Vec::new() } else { vec![output] }, + outputs: if overridden { + Vec::new() + } else { + vec![Fileprint::from_md5(output)] + }, // Most of the other info is blank here as we don't really include it // in the execution of the build script, but... this may be a latent @@ -1434,6 +1664,7 @@ fn build_script_local_fingerprints( // First up, if this build script is entirely overridden, then we just // return the hash of what we overrode it with. This is the easy case! if let Some(fingerprint) = build_script_override_fingerprint(cx, unit) { + debug!("HACK override local fingerprints deps {}", unit.pkg); debug!("override local fingerprints deps {}", unit.pkg); return ( Box::new( @@ -1532,7 +1763,11 @@ fn local_fingerprints_deps( let paths = deps .rerun_if_changed .iter() - .map(|p| p.strip_prefix(pkg_root).unwrap_or(p).to_path_buf()) + .map(|p| Fileprint { + path: p.strip_prefix(pkg_root).unwrap_or(p).to_path_buf(), + size: CurrentFileprint::calc_size(p), + hash: CurrentFileprint::calc_hash(p, FileHashAlgorithm::Md5), + }) .collect(); local.push(LocalFingerprint::RerunIfChanged { output, paths }); } @@ -1652,24 +1887,27 @@ pub fn parse_dep_info( ) -> CargoResult> { let data = match paths::read_bytes(dep_info) { Ok(data) => data, - Err(_) => return Ok(None), + Err(err) => { + warn!("could not read bytes from dep info file: {}", err); + return Ok(None); + } }; let info = match EncodedDepInfo::parse(&data) { Some(info) => info, None => { - log::warn!("failed to parse cargo's dep-info at {:?}", dep_info); + warn!("failed to parse dep-info file at {:?}", dep_info); return Ok(None); } }; let mut ret = RustcDepInfo::default(); ret.env = info.env; - for (ty, path) in info.files { + for (fileprint, ty) in info.files { let path = match ty { - DepInfoPathType::PackageRootRelative => pkg_root.join(path), + DepInfoPathType::PackageRootRelative => pkg_root.join(fileprint.path), // N.B. path might be absolute here in which case the join will have no effect - DepInfoPathType::TargetRootRelative => target_root.join(path), + DepInfoPathType::TargetRootRelative => target_root.join(fileprint.path), }; - ret.files.push(path); + ret.files.push(Fileprint { path, ..fileprint }); } Ok(Some(ret)) } @@ -1684,32 +1922,32 @@ fn pkg_fingerprint(bcx: &BuildContext<'_, '_>, pkg: &Package) -> CargoResult( - mtime_cache: &mut HashMap, +//type It = ; +fn find_stale_file( + config: &Config, + mtime_cache: &mut HashMap, reference: &Path, - paths: I, -) -> Option -where - I: IntoIterator, - I::Item: AsRef, -{ + paths: &[Fileprint], +) -> Option { let reference_mtime = match paths::mtime(reference) { Ok(mtime) => mtime, Err(..) => return Some(StaleItem::MissingFile(reference.to_path_buf())), }; - for path in paths { - let path = path.as_ref(); - let path_mtime = match mtime_cache.entry(path.to_path_buf()) { - Entry::Occupied(o) => *o.get(), - Entry::Vacant(v) => { - let mtime = match paths::mtime(path) { - Ok(mtime) => mtime, - Err(..) => return Some(StaleItem::MissingFile(path.to_path_buf())), - }; - *v.insert(mtime) - } - }; + for Fileprint { + path, + size: reference_size, + hash: reference_hash, + } in paths + { + if !mtime_cache.contains_key(path) { + let mtime = match paths::mtime(path) { + Ok(mtime) => mtime, + Err(..) => return Some(StaleItem::MissingFile(path.to_path_buf())), + }; + mtime_cache.insert(path.to_path_buf(), CurrentFileprint::new(mtime)); + } + let current = mtime_cache.get_mut(path).unwrap(); // TODO: fix #5918. // Note that equal mtimes should be considered "stale". For filesystems with @@ -1729,15 +1967,48 @@ where // if equal, files were changed just after a previous build finished. // Unfortunately this became problematic when (in #6484) cargo switch to more accurately // measuring the start time of builds. - if path_mtime <= reference_mtime { + + if current.mtime <= reference_mtime { continue; } - return Some(StaleItem::ChangedFile { + if config.cli_unstable().hash_tracking { + // File has expected content + if let (Some(reference_size), Some(reference_hash)) = (reference_size, reference_hash) { + let current_size = current.size(path); + if current_size != Some(reference_size) { + //if *current_size != *reference_size { + return Some(StaleItem::ChangedFileSize { + reference: reference.to_path_buf(), + reference_size: *reference_size, + stale: path.to_path_buf(), + stale_size: current_size.map(|s| *s), + }); + } + + // Same size but mtime is different. Probably there's no change... + // compute hash and compare to prevent change cascade... + let current_hash = current.file_hash(path, reference_hash); + if current_hash != Some(reference_hash) { + // FIXME? We could fail a little faster by seeing if any size discrepencies on _any_ file before checking hashes. + // but not sure it's worth the additional complexity. + return Some(StaleItem::ChangedFileHash { + reference: reference.to_path_buf(), + reference_hash: reference_hash.clone(), + stale: path.to_path_buf(), + stale_hash: current_hash.map(|h| h.clone()), + }); + } + + continue; + } + }; + + return Some(StaleItem::ChangedFileTime { reference: reference.to_path_buf(), reference_mtime, stale: path.to_path_buf(), - stale_mtime: path_mtime, + stale_mtime: current.mtime, }); } @@ -1823,10 +2094,10 @@ pub fn translate_dep_info( .env .retain(|(key, _)| !rustc_cmd.get_envs().contains_key(key)); - for file in depinfo.files { + for fileprint in depinfo.files { // The path may be absolute or relative, canonical or not. Make sure // it is canonicalized so we are comparing the same kinds of paths. - let abs_file = rustc_cwd.join(file); + let abs_file = rustc_cwd.join(fileprint.path); // If canonicalization fails, just use the abs path. There is currently // a bug where --remap-path-prefix is affecting .d files, causing them // to point to non-existent paths. @@ -1845,7 +2116,13 @@ pub fn translate_dep_info( // effect. (DepInfoPathType::TargetRootRelative, &*abs_file) }; - on_disk_info.files.push((ty, path.to_owned())); + on_disk_info.files.push(( + Fileprint { + path: path.to_path_buf(), + ..fileprint + }, + ty, + )); } paths::write(cargo_dep_info, on_disk_info.serialize()?)?; Ok(()) @@ -1853,8 +2130,9 @@ pub fn translate_dep_info( #[derive(Default)] pub struct RustcDepInfo { - /// The list of files that the main target in the dep-info file depends on. - pub files: Vec, + /// The list of files that the main target in the dep-info file depends on + /// and size and hash of those files. + pub files: Vec, //FIXME use Option instead? /// The list of environment variables we found that the rustc compilation /// depends on. /// @@ -1872,23 +2150,51 @@ pub struct RustcDepInfo { // Cargo will read it for crates on all future compilations. #[derive(Default)] struct EncodedDepInfo { - files: Vec<(DepInfoPathType, PathBuf)>, + files: Vec<(Fileprint, DepInfoPathType)>, env: Vec<(String, Option)>, } impl EncodedDepInfo { fn parse(mut bytes: &[u8]) -> Option { let bytes = &mut bytes; - let nfiles = read_usize(bytes)?; + let nfiles = read_usize(bytes).unwrap(); let mut files = Vec::with_capacity(nfiles as usize); for _ in 0..nfiles { + let eight_bytes: &[u8; 8] = (bytes[0..8]).try_into().ok()?; + let size = NonZeroU64::new(u64::from_le_bytes(*eight_bytes)); + *bytes = &bytes[8..]; + + let kind = match read_u8(bytes)? { + 0 => None, + 1 => Some(FileHashAlgorithm::Md5), + 2 => Some(FileHashAlgorithm::Sha1), + 3 => Some(FileHashAlgorithm::Sha256), + 4 => Some(FileHashAlgorithm::Svh), + _ => return None, + }; + + let hash = if let Some(kind) = kind { + let hash = FileHash::from_slice(kind, &bytes[..64]); + *bytes = &bytes[64..]; + Some(hash) + } else { + None + }; + let ty = match read_u8(bytes)? { 0 => DepInfoPathType::PackageRootRelative, 1 => DepInfoPathType::TargetRootRelative, _ => return None, }; let bytes = read_bytes(bytes)?; - files.push((ty, util::bytes2path(bytes).ok()?)); + files.push(( + Fileprint { + path: util::bytes2path(bytes).ok()?, + size, + hash, + }, + ty, + )); } let nenv = read_usize(bytes)?; @@ -1926,14 +2232,23 @@ impl EncodedDepInfo { fn serialize(&self) -> CargoResult> { let mut ret = Vec::new(); - let dst = &mut ret; + let mut dst = &mut ret; write_usize(dst, self.files.len()); - for (ty, file) in self.files.iter() { + for (Fileprint { path, size, hash }, ty) in self.files.iter() { + //debug!("writing depinfo size as {} ", *size as usize); + write_u64(dst, size.map(|s| u64::from(s)).unwrap_or(0)); + //write(dst, hash.hash); + if let Some(hash) = hash { + hash.write_to_vec(&mut dst); + } else { + dst.push(0); //None + } match ty { DepInfoPathType::PackageRootRelative => dst.push(0), DepInfoPathType::TargetRootRelative => dst.push(1), } - write_bytes(dst, util::path2bytes(file)?); + + write_bytes(dst, util::path2bytes(path)?); } write_usize(dst, self.env.len()); @@ -1958,6 +2273,10 @@ impl EncodedDepInfo { fn write_usize(dst: &mut Vec, val: usize) { dst.extend(&u32::to_le_bytes(val as u32)); } + + fn write_u64(dst: &mut Vec, val: u64) { + dst.extend_from_slice(&u64::to_le_bytes(val)); + } } } @@ -1967,8 +2286,13 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult let mut ret = RustcDepInfo::default(); let mut found_deps = false; + let mut prev_line: Option<&str> = None; for line in contents.lines() { - if let Some(rest) = line.strip_prefix("# env-dep:") { + //if let Some(rest) = line.strip_prefix("# env-dep:") { + let env_dep_prefix = "# env-dep:"; + let size_dep_prefix = "# size:"; + if line.starts_with(env_dep_prefix) { + let rest = &line[env_dep_prefix.len()..]; let mut parts = rest.splitn(2, '='); let env_var = match parts.next() { Some(s) => s, @@ -1979,6 +2303,22 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult None => None, }; ret.env.push((unescape_env(env_var)?, env_val)); + } else if line.starts_with(size_dep_prefix) { + if let Some(prev) = prev_line { + let file = &prev[0..prev.len() - 1]; + for i in 0..ret.files.len() { + if ret.files[i].path.to_string_lossy() == file { + let size_and_hash: Vec<_> = line["# size:".len()..].split(' ').collect(); //TODO: find/rfind + ret.files[i].size = size_and_hash[0].parse().ok(); + let kind_hash: Vec<_> = size_and_hash[1].split(":").collect(); + let hash = kind_hash[1]; + let kind = FileHashAlgorithm::from_str(kind_hash[0])?; + ret.files[i].hash = FileHash::from_hex_rev(kind, hash); + break; + } + } + prev_line = None; + } } else if let Some(pos) = line.find(": ") { if found_deps { continue; @@ -1995,8 +2335,14 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult internal("malformed dep-info format, trailing \\".to_string()) })?); } - ret.files.push(file.into()); + ret.files.push(Fileprint { + path: file.into(), + size: None, + hash: None, + }); } + } else { + prev_line = Some(line); } } return Ok(ret); diff --git a/src/cargo/core/compiler/mod.rs b/src/cargo/core/compiler/mod.rs index 53849e300e1..00b48e01fc2 100644 --- a/src/cargo/core/compiler/mod.rs +++ b/src/cargo/core/compiler/mod.rs @@ -3,6 +3,7 @@ mod build_context; mod build_plan; mod compilation; mod compile_kind; +mod content_hash; mod context; mod crate_type; mod custom_build; diff --git a/src/cargo/core/compiler/output_depinfo.rs b/src/cargo/core/compiler/output_depinfo.rs index c8c10a516ee..9971ad08fa4 100644 --- a/src/cargo/core/compiler/output_depinfo.rs +++ b/src/cargo/core/compiler/output_depinfo.rs @@ -24,13 +24,13 @@ use std::collections::{BTreeSet, HashSet}; use std::io::{BufWriter, Write}; -use std::path::{Path, PathBuf}; +use std::path::Path; use log::debug; use super::{fingerprint, Context, FileFlavor, Unit}; -use crate::util::paths; -use crate::util::{internal, CargoResult}; +use crate::core::compiler::content_hash::Fileprint; +use crate::util::{internal, paths, CargoResult}; fn render_filename>(path: P, basedir: Option<&str>) -> CargoResult { let path = path.as_ref(); @@ -48,7 +48,7 @@ fn render_filename>(path: P, basedir: Option<&str>) -> CargoResul } fn add_deps_for_unit( - deps: &mut BTreeSet, + deps: &mut BTreeSet, cx: &mut Context<'_, '_>, unit: &Unit, visited: &mut HashSet, @@ -62,11 +62,21 @@ fn add_deps_for_unit( if !unit.mode.is_run_custom_build() { // Add dependencies from rustc dep-info output (stored in fingerprint directory) let dep_info_loc = fingerprint::dep_info_loc(cx, unit); - if let Some(paths) = - fingerprint::parse_dep_info(unit.pkg.root(), cx.files().host_root(), &dep_info_loc)? - { - for path in paths.files { - deps.insert(path); + + let mut dep_info = cx.dep_info_cache.get(&dep_info_loc); + if dep_info.is_none() { + if let Some(parsed_dep_info) = + fingerprint::parse_dep_info(unit.pkg.root(), cx.files().host_root(), &dep_info_loc)? + { + cx.dep_info_cache + .insert(dep_info_loc.clone(), parsed_dep_info); + dep_info = cx.dep_info_cache.get(&dep_info_loc); + } + } + + if let Some(paths) = dep_info { + for path in &paths.files { + deps.insert(path.clone()); } } else { debug!( @@ -87,7 +97,7 @@ fn add_deps_for_unit( .get(unit.pkg.package_id(), metadata) { for path in &output.rerun_if_changed { - deps.insert(path.into()); + deps.insert(Fileprint::from_md5(path.to_path_buf())); } } } @@ -107,7 +117,7 @@ fn add_deps_for_unit( /// This only saves files for uplifted artifacts. pub fn output_depinfo(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<()> { let bcx = cx.bcx; - let mut deps = BTreeSet::new(); + let mut deps: BTreeSet = BTreeSet::new(); let mut visited = HashSet::new(); let success = add_deps_for_unit(&mut deps, cx, unit, &mut visited).is_ok(); let basedir_string; @@ -125,7 +135,7 @@ pub fn output_depinfo(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<()> }; let deps = deps .iter() - .map(|f| render_filename(f, basedir)) + .map(|f| render_filename(&f.path, basedir).map(|rendered| (rendered, f))) .collect::>>()?; for output in cx @@ -141,7 +151,7 @@ pub fn output_depinfo(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<()> // If nothing changed don't recreate the file which could alter // its mtime if let Ok(previous) = fingerprint::parse_rustc_dep_info(&output_path) { - if previous.files.iter().eq(deps.iter().map(Path::new)) { + if previous.files.iter().eq(deps.iter().map(|(_, dep)| *dep)) { continue; } } @@ -149,11 +159,29 @@ pub fn output_depinfo(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<()> // Otherwise write it all out let mut outfile = BufWriter::new(paths::create(output_path)?); write!(outfile, "{}:", target_fn)?; - for dep in &deps { - write!(outfile, " {}", dep)?; + for (rendered_dep, _) in &deps { + write!(outfile, " {}", rendered_dep)?; } writeln!(outfile)?; + // Emit a fake target for each input file to the compilation. This + // prevents `make` from spitting out an error if a file is later + // deleted. For more info see #28735 + for ( + rendered_dep, + Fileprint { + path: _dep, + size, + hash, + }, + ) in &deps + { + writeln!(outfile, "{}:", rendered_dep)?; + if let (Some(size), Some(hash)) = (size, hash) { + writeln!(outfile, "# size:{} {}", size, hash)?; + } + } + // dep-info generation failed, so delete output file. This will // usually cause the build system to always rerun the build // rule, which is correct if inefficient. diff --git a/src/cargo/core/features.rs b/src/cargo/core/features.rs index f2e5d957728..774cc212113 100644 --- a/src/cargo/core/features.rs +++ b/src/cargo/core/features.rs @@ -358,6 +358,7 @@ pub struct CliUnstable { pub rustdoc_map: bool, pub terminal_width: Option>, pub namespaced_features: bool, + pub hash_tracking: bool, pub weak_dep_features: bool, } @@ -465,6 +466,7 @@ impl CliUnstable { "rustdoc-map" => self.rustdoc_map = parse_empty(k, v)?, "terminal-width" => self.terminal_width = Some(parse_usize_opt(v)?), "namespaced-features" => self.namespaced_features = parse_empty(k, v)?, + "hash-tracking" => self.hash_tracking = parse_empty(k, v)?, "weak-dep-features" => self.weak_dep_features = parse_empty(k, v)?, _ => bail!("unknown `-Z` flag specified: {}", k), } diff --git a/tests/testsuite/dep_info.rs b/tests/testsuite/dep_info.rs index d63d09652b7..11b07ba42bf 100644 --- a/tests/testsuite/dep_info.rs +++ b/tests/testsuite/dep_info.rs @@ -27,6 +27,16 @@ fn assert_deps(project: &Project, fingerprint: &str, test_cb: impl Fn(&Path, &[( let dep_info = &mut &dep_info[..]; let deps = (0..read_usize(dep_info)) .map(|_| { + //FIXME rather than discarding these we could check them? + let eight_bytes: &[u8; 8] = (dep_info[0..8]).try_into().unwrap(); + let _size = u64::from_le_bytes(*eight_bytes); + *dep_info = &dep_info[8..]; + + let hash_kind = read_u8(dep_info); //hashkind + + if hash_kind != 0 { + str::from_utf8(read_bytes(dep_info)).unwrap(); //hash + } ( read_u8(dep_info), str::from_utf8(read_bytes(dep_info)).unwrap(),