From 68c94e8eb809b75b936b735371f38d5bfa73a444 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Sun, 16 Aug 2020 17:26:36 +0100 Subject: [PATCH 01/39] mtime+content tracking --- Cargo.toml | 1 + src/cargo/core/compiler/context/mod.rs | 2 +- src/cargo/core/compiler/fingerprint.rs | 187 ++++++++++++++++++---- src/cargo/core/compiler/output_depinfo.rs | 10 +- src/cargo/core/features.rs | 2 + 5 files changed, 164 insertions(+), 38 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f1d1b61f26a..2508f16aee9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ anyhow = "1.0" filetime = "0.2.9" flate2 = { version = "1.0.3", default-features = false, features = ["zlib"] } git2 = "0.13.12" +fxhash = "0.2.1" git2-curl = "0.14.0" glob = "0.3.0" hex = "0.4" diff --git a/src/cargo/core/compiler/context/mod.rs b/src/cargo/core/compiler/context/mod.rs index dc09f7df8f4..9db67039c36 100644 --- a/src/cargo/core/compiler/context/mod.rs +++ b/src/cargo/core/compiler/context/mod.rs @@ -38,7 +38,7 @@ pub struct Context<'a, 'cfg> { /// Fingerprints used to detect if a unit is out-of-date. pub fingerprints: HashMap>, /// Cache of file mtimes to reduce filesystem hits. - pub mtime_cache: HashMap, + pub mtime_cache: HashMap, /// A set used to track which units have been compiled. /// A unit may appear in the job graph multiple times as a dependency of /// multiple packages, but it only needs to run once. diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 3889ee84d8d..bd124b654ca 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -314,7 +314,9 @@ use std::collections::hash_map::{Entry, HashMap}; use std::env; +use std::fs; use std::hash::{self, Hasher}; +use std::io::{self, Read}; use std::path::{Path, PathBuf}; use std::str; use std::sync::{Arc, Mutex}; @@ -322,6 +324,7 @@ use std::time::SystemTime; use anyhow::{bail, format_err}; use filetime::FileTime; +use fxhash::FxHasher; use log::{debug, info}; use serde::de; use serde::ser; @@ -332,8 +335,7 @@ use crate::core::Package; use crate::util; use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::interning::InternedString; -use crate::util::paths; -use crate::util::{internal, profile, ProcessBuilder}; +use crate::util::{internal, paths, profile, Config, ProcessBuilder}; use super::custom_build::BuildDeps; use super::job::{ @@ -342,6 +344,9 @@ use super::job::{ }; use super::{BuildContext, Context, FileFlavor, Unit}; +type FileSize = u32; +type FileHash = u64; + /// Determines if a `unit` is up-to-date, and if not prepares necessary work to /// update the persisted fingerprint. /// @@ -709,7 +714,8 @@ impl LocalFingerprint { /// is where we'll find whether files have actually changed fn find_stale_item( &self, - mtime_cache: &mut HashMap, + config: &Config, + mtime_cache: &mut HashMap, pkg_root: &Path, target_root: &Path, ) -> CargoResult> { @@ -737,16 +743,23 @@ impl LocalFingerprint { current, })); } - Ok(find_stale_file(mtime_cache, &dep_info, info.files.iter())) + Ok(find_stale_file(config, mtime_cache, &dep_info, &info.files)) } // We need to verify that no paths listed in `paths` are newer than // the `output` path itself, or the last time the build script ran. - LocalFingerprint::RerunIfChanged { output, paths } => Ok(find_stale_file( - mtime_cache, - &target_root.join(output), - paths.iter().map(|p| pkg_root.join(p)), - )), + LocalFingerprint::RerunIfChanged { output, paths } => { + let c: Vec<_> = paths + .iter() + .map(|p| (pkg_root.join(p), 0u32, 0u64)) + .collect(); + Ok(find_stale_file( + config, + mtime_cache, + &target_root.join(output), + &c, + )) + } // These have no dependencies on the filesystem, and their values // are included natively in the `Fingerprint` hash so nothing @@ -961,12 +974,12 @@ impl Fingerprint { /// it to `UpToDate` if it can. fn check_filesystem( &mut self, - mtime_cache: &mut HashMap, + config: &Config, + mtime_cache: &mut HashMap, pkg_root: &Path, target_root: &Path, ) -> CargoResult<()> { assert!(!self.fs_status.up_to_date()); - let mut mtimes = HashMap::new(); // Get the `mtime` of all outputs. Optionally update their mtime @@ -1055,7 +1068,7 @@ impl Fingerprint { // files for this package itself. If we do find something log a helpful // message and bail out so we stay stale. for local in self.local.get_mut().unwrap().iter() { - if let Some(item) = local.find_stale_item(mtime_cache, pkg_root, target_root)? { + if let Some(item) = local.find_stale_item(config, mtime_cache, pkg_root, target_root)? { item.log(); return Ok(()); } @@ -1230,7 +1243,12 @@ fn calculate(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult pkg_root.join(path), // N.B. path might be absolute here in which case the join will have no effect DepInfoPathType::TargetRootRelative => target_root.join(path), }; - ret.files.push(path); + ret.files.push((path, size, hash)); } Ok(Some(ret)) } @@ -1683,30 +1701,37 @@ fn pkg_fingerprint(bcx: &BuildContext<'_, '_>, pkg: &Package) -> CargoResult( - mtime_cache: &mut HashMap, +//type It = ; +fn find_stale_file( + config: &Config, + mtime_cache: &mut HashMap, reference: &Path, - paths: I, -) -> Option -where - I: IntoIterator, - I::Item: AsRef, -{ + paths: &[(PathBuf, u32, u64)], +) -> Option { let reference_mtime = match paths::mtime(reference) { Ok(mtime) => mtime, Err(..) => return Some(StaleItem::MissingFile(reference.to_path_buf())), }; - for path in paths { - let path = path.as_ref(); - let path_mtime = match mtime_cache.entry(path.to_path_buf()) { + for (path, reference_size, reference_hash) in paths { + let path = &path; + let (path_mtime, path_size, path_hash) = match mtime_cache.entry(path.to_path_buf()) { Entry::Occupied(o) => *o.get(), Entry::Vacant(v) => { let mtime = match paths::mtime(path) { Ok(mtime) => mtime, Err(..) => return Some(StaleItem::MissingFile(path.to_path_buf())), }; - *v.insert(mtime) + let current_size = if config.cli_unstable().hash_tracking { + match std::fs::metadata(path) { + // For file difference checking just check the lower bits of file size + Ok(metadata) => metadata.len() as u32, + Err(..) => return Some(StaleItem::MissingFile(path.to_path_buf())), //todo + } + } else { + 0 + }; + *v.insert((mtime, current_size, 0u64)) // Hash calculated only if needed later. } }; @@ -1728,10 +1753,45 @@ where // if equal, files were changed just after a previous build finished. // Unfortunately this became problematic when (in #6484) cargo switch to more accurately // measuring the start time of builds. + + // Has size changed? + if config.cli_unstable().hash_tracking + && *reference_size > 0 + && path_size != *reference_size + { + return Some(StaleItem::ChangedFile { + reference: reference.to_path_buf(), + reference_mtime, + stale: path.to_path_buf(), + stale_mtime: path_mtime, + }); + } + if path_mtime <= reference_mtime { continue; } + // Same size but mtime is different. Probably there's no change... + // compute hash and compare to prevent change cascade... + if config.cli_unstable().hash_tracking && *reference_hash > 0 { + //FIXME put the result in the mtime_cache rather than hashing each time! + let mut reader = io::BufReader::new(fs::File::open(&path).unwrap()); //FIXME + let mut hasher = FxHasher::default(); + let mut buffer = [0; 1024]; + loop { + let count = reader.read(&mut buffer).unwrap(); //FIXME + if count == 0 { + break; + } + hasher.write(&buffer[..count]); + } + let hash = hasher.finish(); + + if hash == *reference_hash { + continue; + } + } + return Some(StaleItem::ChangedFile { reference: reference.to_path_buf(), reference_mtime, @@ -1822,7 +1882,7 @@ pub fn translate_dep_info( .env .retain(|(key, _)| !rustc_cmd.get_envs().contains_key(key)); - for file in depinfo.files { + for (file, size, hash) in depinfo.files { // The path may be absolute or relative, canonical or not. Make sure // it is canonicalized so we are comparing the same kinds of paths. let abs_file = rustc_cwd.join(file); @@ -1844,7 +1904,7 @@ pub fn translate_dep_info( // effect. (DepInfoPathType::TargetRootRelative, &*abs_file) }; - on_disk_info.files.push((ty, path.to_owned())); + on_disk_info.files.push((size, hash, ty, path.to_owned())); } paths::write(cargo_dep_info, on_disk_info.serialize()?)?; Ok(()) @@ -1853,7 +1913,8 @@ pub fn translate_dep_info( #[derive(Default)] pub struct RustcDepInfo { /// The list of files that the main target in the dep-info file depends on. - pub files: Vec, + /// and lower 32bits of size and hash (or 0 if not there). + pub files: Vec<(PathBuf, u32, u64)>, //FIXME use Option instead? /// The list of environment variables we found that the rustc compilation /// depends on. /// @@ -1871,7 +1932,7 @@ pub struct RustcDepInfo { // Cargo will read it for crates on all future compilations. #[derive(Default)] struct EncodedDepInfo { - files: Vec<(DepInfoPathType, PathBuf)>, + files: Vec<(FileSize, FileHash, DepInfoPathType, PathBuf)>, env: Vec<(String, Option)>, } @@ -1881,13 +1942,18 @@ impl EncodedDepInfo { let nfiles = read_usize(bytes)?; let mut files = Vec::with_capacity(nfiles as usize); for _ in 0..nfiles { + //FIXME: backward compatibility!!! + let size = read_usize(bytes)? as FileSize; + //debug!("read size as {}", size); + let hash = read_u64(bytes)?; + //debug!("read hash as {}", hash); let ty = match read_u8(bytes)? { 0 => DepInfoPathType::PackageRootRelative, 1 => DepInfoPathType::TargetRootRelative, _ => return None, }; let bytes = read_bytes(bytes)?; - files.push((ty, util::bytes2path(bytes).ok()?)); + files.push((size, hash, ty, util::bytes2path(bytes).ok()?)); } let nenv = read_usize(bytes)?; @@ -1914,6 +1980,21 @@ impl EncodedDepInfo { ) } + fn read_u64(bytes: &mut &[u8]) -> Option { + let ret = bytes.get(..8)?; + *bytes = &bytes[8..]; + Some( + ((ret[0] as u64) << 0) + | ((ret[1] as u64) << 8) + | ((ret[2] as u64) << 16) + | ((ret[3] as u64) << 24) + | ((ret[4] as u64) << 32) + | ((ret[5] as u64) << 40) + | ((ret[6] as u64) << 48) + | ((ret[7] as u64) << 56), + ) + } + fn read_u8(bytes: &mut &[u8]) -> Option { let ret = *bytes.get(0)?; *bytes = &bytes[1..]; @@ -1932,7 +2013,11 @@ impl EncodedDepInfo { let mut ret = Vec::new(); let dst = &mut ret; write_usize(dst, self.files.len()); - for (ty, file) in self.files.iter() { + for (size, hash, ty, file) in self.files.iter() { + //debug!("writing depinfo size as {} ", *size as usize); + write_usize(dst, *size as usize); + //debug!("writing depinfo hash as {} ", *hash); + write_u64(dst, *hash); match ty { DepInfoPathType::PackageRootRelative => dst.push(0), DepInfoPathType::TargetRootRelative => dst.push(1), @@ -1965,6 +2050,17 @@ impl EncodedDepInfo { dst.push((val >> 16) as u8); dst.push((val >> 24) as u8); } + + fn write_u64(dst: &mut Vec, val: u64) { + dst.push(val as u8); + dst.push((val >> 8) as u8); + dst.push((val >> 16) as u8); + dst.push((val >> 24) as u8); + dst.push((val >> 32) as u8); + dst.push((val >> 40) as u8); + dst.push((val >> 48) as u8); + dst.push((val >> 56) as u8); + } } } @@ -1974,8 +2070,13 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult let mut ret = RustcDepInfo::default(); let mut found_deps = false; + let mut prev_line: Option<&str> = None; for line in contents.lines() { - if let Some(rest) = line.strip_prefix("# env-dep:") { + //if let Some(rest) = line.strip_prefix("# env-dep:") { + let env_dep_prefix = "# env-dep:"; + let size_dep_prefix = "# size:"; + if line.starts_with(env_dep_prefix) { + let rest = &line[env_dep_prefix.len()..]; let mut parts = rest.splitn(2, '='); let env_var = match parts.next() { Some(s) => s, @@ -1986,6 +2087,20 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult None => None, }; ret.env.push((unescape_env(env_var)?, env_val)); + } else if line.starts_with(size_dep_prefix) { + if let Some(prev) = prev_line { + let file = &prev[0..prev.len() - 1]; + for i in 0..ret.files.len() { + if ret.files[i].0.to_string_lossy() == file { + let parts: Vec<_> = line["# size:".len()..].split(" ").collect(); + ret.files[i].1 = parts[0].trim().parse()?; //FIXME do we need trims? + let hash = &parts[1]["hash:".len()..].trim(); + ret.files[i].2 = hash.parse()?; + break; + } + } + prev_line = None; + } } else if let Some(pos) = line.find(": ") { if found_deps { continue; @@ -2002,8 +2117,10 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult internal("malformed dep-info format, trailing \\".to_string()) })?); } - ret.files.push(file.into()); + ret.files.push((file.into(), 0, 0)); } + } else { + prev_line = Some(line); } } return Ok(ret); diff --git a/src/cargo/core/compiler/output_depinfo.rs b/src/cargo/core/compiler/output_depinfo.rs index c8c10a516ee..b93a7a0795c 100644 --- a/src/cargo/core/compiler/output_depinfo.rs +++ b/src/cargo/core/compiler/output_depinfo.rs @@ -66,7 +66,7 @@ fn add_deps_for_unit( fingerprint::parse_dep_info(unit.pkg.root(), cx.files().host_root(), &dep_info_loc)? { for path in paths.files { - deps.insert(path); + deps.insert(path.0); //FIXME can we track size/hash of custom builds? } } else { debug!( @@ -141,7 +141,13 @@ pub fn output_depinfo(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<()> // If nothing changed don't recreate the file which could alter // its mtime if let Ok(previous) = fingerprint::parse_rustc_dep_info(&output_path) { - if previous.files.iter().eq(deps.iter().map(Path::new)) { + if previous + .files + .iter() + .map(|(path, _size, _hash)| path) + .eq(deps.iter().map(Path::new)) + { + //FIXME we could check for size differences here? continue; } } diff --git a/src/cargo/core/features.rs b/src/cargo/core/features.rs index 5d3aa4db8cf..a6afbc91231 100644 --- a/src/cargo/core/features.rs +++ b/src/cargo/core/features.rs @@ -358,6 +358,7 @@ pub struct CliUnstable { pub rustdoc_map: bool, pub terminal_width: Option>, pub namespaced_features: bool, + pub hash_tracking: bool, } fn deserialize_build_std<'de, D>(deserializer: D) -> Result>, D::Error> @@ -464,6 +465,7 @@ impl CliUnstable { "rustdoc-map" => self.rustdoc_map = parse_empty(k, v)?, "terminal-width" => self.terminal_width = Some(parse_usize_opt(v)?), "namespaced-features" => self.namespaced_features = parse_empty(k, v)?, + "hash-tracking" => self.hash_tracking = parse_empty(k, v)?, _ => bail!("unknown `-Z` flag specified: {}", k), } From df16720436eacc47c25d40625380190199edeb89 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Fri, 21 Aug 2020 10:42:53 +0100 Subject: [PATCH 02/39] v2: Take advantage of rustc's precalulated src hashes. --- Cargo.toml | 2 + src/cargo/core/compiler/context/mod.rs | 6 +- src/cargo/core/compiler/fingerprint.rs | 155 ++++++++++++++++++++----- 3 files changed, 129 insertions(+), 34 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2508f16aee9..ec1aacd4ffb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,7 @@ lazycell = "1.2.0" libc = "0.2" log = "0.4.6" libgit2-sys = "0.12.14" +md-5 = "0.8" memchr = "2.1.3" num_cpus = "1.0" opener = "0.4" @@ -56,6 +57,7 @@ semver = { version = "0.10", features = ["serde"] } serde = { version = "1.0.82", features = ["derive"] } serde_ignored = "0.1.0" serde_json = { version = "1.0.30", features = ["raw_value"] } +sha-1 = "0.8" shell-escape = "0.1.4" strip-ansi-escapes = "0.1.0" tar = { version = "0.4.26", default-features = false } diff --git a/src/cargo/core/compiler/context/mod.rs b/src/cargo/core/compiler/context/mod.rs index 9db67039c36..3ca4684e232 100644 --- a/src/cargo/core/compiler/context/mod.rs +++ b/src/cargo/core/compiler/context/mod.rs @@ -5,7 +5,9 @@ use std::sync::{Arc, Mutex}; use filetime::FileTime; use jobserver::Client; -use crate::core::compiler::{self, compilation, Unit}; +use crate::core::compiler::{ + self, compilation, fingerprint::FileHash, fingerprint::FileSize, Unit, +}; use crate::core::PackageId; use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::profile; @@ -38,7 +40,7 @@ pub struct Context<'a, 'cfg> { /// Fingerprints used to detect if a unit is out-of-date. pub fingerprints: HashMap>, /// Cache of file mtimes to reduce filesystem hits. - pub mtime_cache: HashMap, + pub mtime_cache: HashMap, /// A set used to track which units have been compiled. /// A unit may appear in the job graph multiple times as a dependency of /// multiple packages, but it only needs to run once. diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index bd124b654ca..75c3a19fe60 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -318,17 +318,18 @@ use std::fs; use std::hash::{self, Hasher}; use std::io::{self, Read}; use std::path::{Path, PathBuf}; -use std::str; +use std::str::{self, FromStr}; use std::sync::{Arc, Mutex}; use std::time::SystemTime; use anyhow::{bail, format_err}; use filetime::FileTime; -use fxhash::FxHasher; use log::{debug, info}; +use md5::{Digest, Md5}; use serde::de; use serde::ser; use serde::{Deserialize, Serialize}; +use sha1::Sha1; use crate::core::compiler::unit_graph::UnitDep; use crate::core::Package; @@ -344,8 +345,14 @@ use super::job::{ }; use super::{BuildContext, Context, FileFlavor, Unit}; -type FileSize = u32; -type FileHash = u64; +// While source files can't currently be > 4Gb, bin files could be. +pub type FileSize = u64; + +#[derive(Clone)] +pub struct FileHash { + kind: SourceFileHashAlgorithm, + hash: String, +} /// Determines if a `unit` is up-to-date, and if not prepares necessary work to /// update the persisted fingerprint. @@ -751,13 +758,22 @@ impl LocalFingerprint { LocalFingerprint::RerunIfChanged { output, paths } => { let c: Vec<_> = paths .iter() - .map(|p| (pkg_root.join(p), 0u32, 0u64)) + .map(|p| { + ( + pkg_root.join(p), + 0u64, + FileHash { + kind: SourceFileHashAlgorithm::Md5, + hash: String::new(), + }, + ) + }) .collect(); Ok(find_stale_file( config, mtime_cache, &target_root.join(output), - &c, + c.as_slice(), )) } @@ -1706,7 +1722,7 @@ fn find_stale_file( config: &Config, mtime_cache: &mut HashMap, reference: &Path, - paths: &[(PathBuf, u32, u64)], + paths: &[(PathBuf, FileSize, FileHash)], ) -> Option { let reference_mtime = match paths::mtime(reference) { Ok(mtime) => mtime, @@ -1716,7 +1732,7 @@ fn find_stale_file( for (path, reference_size, reference_hash) in paths { let path = &path; let (path_mtime, path_size, path_hash) = match mtime_cache.entry(path.to_path_buf()) { - Entry::Occupied(o) => *o.get(), + Entry::Occupied(o) => o.get().clone(), //FIXME? do we need to clone here? Entry::Vacant(v) => { let mtime = match paths::mtime(path) { Ok(mtime) => mtime, @@ -1725,13 +1741,21 @@ fn find_stale_file( let current_size = if config.cli_unstable().hash_tracking { match std::fs::metadata(path) { // For file difference checking just check the lower bits of file size - Ok(metadata) => metadata.len() as u32, + Ok(metadata) => metadata.len(), Err(..) => return Some(StaleItem::MissingFile(path.to_path_buf())), //todo } } else { 0 }; - *v.insert((mtime, current_size, 0u64)) // Hash calculated only if needed later. + v.insert(( + mtime, + current_size, + FileHash { + kind: SourceFileHashAlgorithm::Md5, + hash: String::new(), + }, + )) + .clone() // Hash calculated only if needed later. } }; @@ -1773,21 +1797,40 @@ fn find_stale_file( // Same size but mtime is different. Probably there's no change... // compute hash and compare to prevent change cascade... - if config.cli_unstable().hash_tracking && *reference_hash > 0 { + if config.cli_unstable().hash_tracking && reference_hash.hash.len() > 0 { + // FIXME? We could fail a little faster by seeing if any size discrepencies on _any_ file before checking hashes. + // but not sure it's worth the additional complexity. //FIXME put the result in the mtime_cache rather than hashing each time! let mut reader = io::BufReader::new(fs::File::open(&path).unwrap()); //FIXME - let mut hasher = FxHasher::default(); - let mut buffer = [0; 1024]; - loop { - let count = reader.read(&mut buffer).unwrap(); //FIXME - if count == 0 { - break; + + let hash = match reference_hash.kind { + SourceFileHashAlgorithm::Md5 => { + let mut hasher = Md5::new(); + let mut buffer = [0; 1024]; + loop { + let count = reader.read(&mut buffer).unwrap(); //FIXME + if count == 0 { + break; + } + hasher.input(&buffer[..count]); + } + format!("{:?}", hasher.result()) } - hasher.write(&buffer[..count]); - } - let hash = hasher.finish(); + SourceFileHashAlgorithm::Sha1 => { + let mut hasher = Sha1::new(); + let mut buffer = [0; 1024]; + loop { + let count = reader.read(&mut buffer).unwrap(); //FIXME + if count == 0 { + break; + } + hasher.input(&buffer[..count]); + } + format!("{:?}", hasher.result()) + } + }; - if hash == *reference_hash { + if hash == reference_hash.hash { continue; } } @@ -1807,6 +1850,24 @@ fn find_stale_file( None } +#[derive(Clone, Copy, Eq, PartialEq)] +pub enum SourceFileHashAlgorithm { + Md5, + Sha1, +} + +impl FromStr for SourceFileHashAlgorithm { + type Err = (); + + fn from_str(s: &str) -> Result { + match s { + "md5" => Ok(SourceFileHashAlgorithm::Md5), + "sha1" => Ok(SourceFileHashAlgorithm::Sha1), + _ => Err(()), + } + } +} + enum DepInfoPathType { // src/, e.g. src/lib.rs PackageRootRelative, @@ -1914,7 +1975,7 @@ pub fn translate_dep_info( pub struct RustcDepInfo { /// The list of files that the main target in the dep-info file depends on. /// and lower 32bits of size and hash (or 0 if not there). - pub files: Vec<(PathBuf, u32, u64)>, //FIXME use Option instead? + pub files: Vec<(PathBuf, FileSize, FileHash)>, //FIXME use Option instead? /// The list of environment variables we found that the rustc compilation /// depends on. /// @@ -1939,21 +2000,34 @@ struct EncodedDepInfo { impl EncodedDepInfo { fn parse(mut bytes: &[u8]) -> Option { let bytes = &mut bytes; - let nfiles = read_usize(bytes)?; + let nfiles = read_usize(bytes).unwrap(); let mut files = Vec::with_capacity(nfiles as usize); for _ in 0..nfiles { //FIXME: backward compatibility!!! - let size = read_usize(bytes)? as FileSize; + let size = read_u64(bytes)? as FileSize; //debug!("read size as {}", size); - let hash = read_u64(bytes)?; + let hash_buf = read_bytes(bytes)?; + + let hash = String::from_utf8(hash_buf.to_vec()).unwrap(); + //debug!("read hash as {}", hash); + let kind = match read_u8(bytes)? { + 0 => SourceFileHashAlgorithm::Md5, + 1 => SourceFileHashAlgorithm::Sha1, + _ => return None, + }; let ty = match read_u8(bytes)? { 0 => DepInfoPathType::PackageRootRelative, 1 => DepInfoPathType::TargetRootRelative, _ => return None, }; let bytes = read_bytes(bytes)?; - files.push((size, hash, ty, util::bytes2path(bytes).ok()?)); + files.push(( + size, + FileHash { kind, hash }, + ty, + util::bytes2path(bytes).ok()?, + )); } let nenv = read_usize(bytes)?; @@ -2015,9 +2089,14 @@ impl EncodedDepInfo { write_usize(dst, self.files.len()); for (size, hash, ty, file) in self.files.iter() { //debug!("writing depinfo size as {} ", *size as usize); - write_usize(dst, *size as usize); - //debug!("writing depinfo hash as {} ", *hash); - write_u64(dst, *hash); + write_u64(dst, *size); + //debug!("writing depinfo hash as {} ", hash.hash.len()); + write_bytes(dst, hash.hash.as_bytes()); + //write(dst, hash.hash); + match hash.kind { + SourceFileHashAlgorithm::Md5 => dst.push(0), + SourceFileHashAlgorithm::Sha1 => dst.push(1), + } match ty { DepInfoPathType::PackageRootRelative => dst.push(0), DepInfoPathType::TargetRootRelative => dst.push(1), @@ -2094,8 +2173,13 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult if ret.files[i].0.to_string_lossy() == file { let parts: Vec<_> = line["# size:".len()..].split(" ").collect(); ret.files[i].1 = parts[0].trim().parse()?; //FIXME do we need trims? - let hash = &parts[1]["hash:".len()..].trim(); - ret.files[i].2 = hash.parse()?; + let kind_hash: Vec<_> = parts[1].split(":").collect(); + let hash = kind_hash[1]; + ret.files[i].2 = FileHash { + kind: SourceFileHashAlgorithm::from_str(kind_hash[0]) + .expect("unknown hashing algo"), + hash: hash.to_string(), + }; break; } } @@ -2117,7 +2201,14 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult internal("malformed dep-info format, trailing \\".to_string()) })?); } - ret.files.push((file.into(), 0, 0)); + ret.files.push(( + file.into(), + 0, + FileHash { + kind: SourceFileHashAlgorithm::Md5, + hash: String::new(), + }, + )); } } else { prev_line = Some(line); From 642cf9c9ee232937f331832453a7f1db7f292b53 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Fri, 21 Aug 2020 19:19:18 +0100 Subject: [PATCH 03/39] Put hash back in cache. --- src/cargo/core/compiler/context/mod.rs | 2 +- src/cargo/core/compiler/fingerprint.rs | 81 +++++++++++++------------- 2 files changed, 43 insertions(+), 40 deletions(-) diff --git a/src/cargo/core/compiler/context/mod.rs b/src/cargo/core/compiler/context/mod.rs index 3ca4684e232..ec2114a25db 100644 --- a/src/cargo/core/compiler/context/mod.rs +++ b/src/cargo/core/compiler/context/mod.rs @@ -40,7 +40,7 @@ pub struct Context<'a, 'cfg> { /// Fingerprints used to detect if a unit is out-of-date. pub fingerprints: HashMap>, /// Cache of file mtimes to reduce filesystem hits. - pub mtime_cache: HashMap, + pub mtime_cache: HashMap)>, /// A set used to track which units have been compiled. /// A unit may appear in the job graph multiple times as a dependency of /// multiple packages, but it only needs to run once. diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 75c3a19fe60..9eed25c20a1 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -722,7 +722,7 @@ impl LocalFingerprint { fn find_stale_item( &self, config: &Config, - mtime_cache: &mut HashMap, + mtime_cache: &mut HashMap)>, pkg_root: &Path, target_root: &Path, ) -> CargoResult> { @@ -991,7 +991,7 @@ impl Fingerprint { fn check_filesystem( &mut self, config: &Config, - mtime_cache: &mut HashMap, + mtime_cache: &mut HashMap)>, pkg_root: &Path, target_root: &Path, ) -> CargoResult<()> { @@ -1720,7 +1720,7 @@ fn pkg_fingerprint(bcx: &BuildContext<'_, '_>, pkg: &Package) -> CargoResult, + mtime_cache: &mut HashMap)>, reference: &Path, paths: &[(PathBuf, FileSize, FileHash)], ) -> Option { @@ -1747,15 +1747,7 @@ fn find_stale_file( } else { 0 }; - v.insert(( - mtime, - current_size, - FileHash { - kind: SourceFileHashAlgorithm::Md5, - hash: String::new(), - }, - )) - .clone() // Hash calculated only if needed later. + v.insert((mtime, current_size, None)).clone() // Hash calculated only if needed later. } }; @@ -1797,37 +1789,48 @@ fn find_stale_file( // Same size but mtime is different. Probably there's no change... // compute hash and compare to prevent change cascade... - if config.cli_unstable().hash_tracking && reference_hash.hash.len() > 0 { - // FIXME? We could fail a little faster by seeing if any size discrepencies on _any_ file before checking hashes. - // but not sure it's worth the additional complexity. - //FIXME put the result in the mtime_cache rather than hashing each time! - let mut reader = io::BufReader::new(fs::File::open(&path).unwrap()); //FIXME - - let hash = match reference_hash.kind { - SourceFileHashAlgorithm::Md5 => { - let mut hasher = Md5::new(); - let mut buffer = [0; 1024]; - loop { - let count = reader.read(&mut buffer).unwrap(); //FIXME - if count == 0 { - break; + if config.cli_unstable().hash_tracking && !reference_hash.hash.is_empty() { + let hash = if let Some(path_hash) = path_hash { + //FIXME use unwrap_or + path_hash.hash + } else { + // FIXME? We could fail a little faster by seeing if any size discrepencies on _any_ file before checking hashes. + // but not sure it's worth the additional complexity. + //FIXME put the result in the mtime_cache rather than hashing each time! + let mut reader = io::BufReader::new(fs::File::open(&path).unwrap()); //FIXME + + let hash = match reference_hash.kind { + SourceFileHashAlgorithm::Md5 => { + let mut hasher = Md5::new(); + let mut buffer = [0; 1024]; + loop { + let count = reader.read(&mut buffer).unwrap(); //FIXME + if count == 0 { + break; + } + hasher.input(&buffer[..count]); } - hasher.input(&buffer[..count]); + format!("{:?}", hasher.result()) } - format!("{:?}", hasher.result()) - } - SourceFileHashAlgorithm::Sha1 => { - let mut hasher = Sha1::new(); - let mut buffer = [0; 1024]; - loop { - let count = reader.read(&mut buffer).unwrap(); //FIXME - if count == 0 { - break; + SourceFileHashAlgorithm::Sha1 => { + let mut hasher = Sha1::new(); + let mut buffer = [0; 1024]; + loop { + let count = reader.read(&mut buffer).unwrap(); //FIXME + if count == 0 { + break; + } + hasher.input(&buffer[..count]); } - hasher.input(&buffer[..count]); + format!("{:?}", hasher.result()) } - format!("{:?}", hasher.result()) - } + }; + let cached = mtime_cache.get_mut(&path.to_path_buf()).unwrap(); + cached.2 = Some(FileHash { + kind: reference_hash.kind, + hash: hash.clone(), + }); + hash }; if hash == reference_hash.hash { From fecc6dabe7f005d0b393abe7d383082148f000cf Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Sat, 22 Aug 2020 09:24:50 +0100 Subject: [PATCH 04/39] Fix existing tests. --- tests/testsuite/dep_info.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/testsuite/dep_info.rs b/tests/testsuite/dep_info.rs index ffc3bccc0db..dea7dbe9f43 100644 --- a/tests/testsuite/dep_info.rs +++ b/tests/testsuite/dep_info.rs @@ -26,6 +26,10 @@ fn assert_deps(project: &Project, fingerprint: &str, test_cb: impl Fn(&Path, &[( let dep_info = &mut &dep_info[..]; let deps = (0..read_usize(dep_info)) .map(|_| { + //FIXME rather than discarding these we could check them? + read_u64(dep_info); //filesize + str::from_utf8(read_bytes(dep_info)).unwrap(); //hash + read_u8(dep_info); //hashkind ( read_u8(dep_info), str::from_utf8(read_bytes(dep_info)).unwrap(), @@ -49,6 +53,21 @@ fn assert_deps(project: &Project, fingerprint: &str, test_cb: impl Fn(&Path, &[( ret } + fn read_u64(bytes: &mut &[u8]) -> Option { + let ret = bytes.get(..8)?; + *bytes = &bytes[8..]; + Some( + ((ret[0] as u64) << 0) + | ((ret[1] as u64) << 8) + | ((ret[2] as u64) << 16) + | ((ret[3] as u64) << 24) + | ((ret[4] as u64) << 32) + | ((ret[5] as u64) << 40) + | ((ret[6] as u64) << 48) + | ((ret[7] as u64) << 56), + ) + } + fn read_bytes<'a>(bytes: &mut &'a [u8]) -> &'a [u8] { let n = read_usize(bytes) as usize; let ret = &bytes[..n]; From 0675c0e4020dcebd084e0f37293084c4db34c8fe Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Sat, 5 Sep 2020 12:51:54 +0100 Subject: [PATCH 05/39] Optimisation: No need to figure out if bin files are up to date if they have svh hashes in their filenames. --- src/cargo/core/compiler/fingerprint.rs | 39 ++++++++++++++++---------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 9eed25c20a1..76c8a4b4310 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -350,7 +350,7 @@ pub type FileSize = u64; #[derive(Clone)] pub struct FileHash { - kind: SourceFileHashAlgorithm, + kind: FileHashAlgorithm, hash: String, } @@ -763,7 +763,7 @@ impl LocalFingerprint { pkg_root.join(p), 0u64, FileHash { - kind: SourceFileHashAlgorithm::Md5, + kind: FileHashAlgorithm::Md5, hash: String::new(), }, ) @@ -1800,7 +1800,7 @@ fn find_stale_file( let mut reader = io::BufReader::new(fs::File::open(&path).unwrap()); //FIXME let hash = match reference_hash.kind { - SourceFileHashAlgorithm::Md5 => { + FileHashAlgorithm::Md5 => { let mut hasher = Md5::new(); let mut buffer = [0; 1024]; loop { @@ -1812,7 +1812,7 @@ fn find_stale_file( } format!("{:?}", hasher.result()) } - SourceFileHashAlgorithm::Sha1 => { + FileHashAlgorithm::Sha1 => { let mut hasher = Sha1::new(); let mut buffer = [0; 1024]; loop { @@ -1824,6 +1824,9 @@ fn find_stale_file( } format!("{:?}", hasher.result()) } + FileHashAlgorithm::Filename => { + "0".to_string() + } }; let cached = mtime_cache.get_mut(&path.to_path_buf()).unwrap(); cached.2 = Some(FileHash { @@ -1854,18 +1857,22 @@ fn find_stale_file( } #[derive(Clone, Copy, Eq, PartialEq)] -pub enum SourceFileHashAlgorithm { +pub enum FileHashAlgorithm { Md5, Sha1, + /// If the hash is in the filename then as long as the file exists we can + /// assume it is up to date. + Filename, } -impl FromStr for SourceFileHashAlgorithm { +impl FromStr for FileHashAlgorithm { type Err = (); - fn from_str(s: &str) -> Result { + fn from_str(s: &str) -> Result { match s { - "md5" => Ok(SourceFileHashAlgorithm::Md5), - "sha1" => Ok(SourceFileHashAlgorithm::Sha1), + "md5" => Ok(FileHashAlgorithm::Md5), + "sha1" => Ok(FileHashAlgorithm::Sha1), + "hash_in_filename" => Ok(FileHashAlgorithm::Filename), _ => Err(()), } } @@ -2015,8 +2022,9 @@ impl EncodedDepInfo { //debug!("read hash as {}", hash); let kind = match read_u8(bytes)? { - 0 => SourceFileHashAlgorithm::Md5, - 1 => SourceFileHashAlgorithm::Sha1, + 0 => FileHashAlgorithm::Md5, + 1 => FileHashAlgorithm::Sha1, + 2 => FileHashAlgorithm::Filename, _ => return None, }; let ty = match read_u8(bytes)? { @@ -2097,8 +2105,9 @@ impl EncodedDepInfo { write_bytes(dst, hash.hash.as_bytes()); //write(dst, hash.hash); match hash.kind { - SourceFileHashAlgorithm::Md5 => dst.push(0), - SourceFileHashAlgorithm::Sha1 => dst.push(1), + FileHashAlgorithm::Md5 => dst.push(0), + FileHashAlgorithm::Sha1 => dst.push(1), + FileHashAlgorithm::Filename => dst.push(2), } match ty { DepInfoPathType::PackageRootRelative => dst.push(0), @@ -2179,7 +2188,7 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult let kind_hash: Vec<_> = parts[1].split(":").collect(); let hash = kind_hash[1]; ret.files[i].2 = FileHash { - kind: SourceFileHashAlgorithm::from_str(kind_hash[0]) + kind: FileHashAlgorithm::from_str(kind_hash[0]) .expect("unknown hashing algo"), hash: hash.to_string(), }; @@ -2208,7 +2217,7 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult file.into(), 0, FileHash { - kind: SourceFileHashAlgorithm::Md5, + kind: FileHashAlgorithm::Md5, hash: String::new(), }, )); From 7ca4fbd167339254b73d080cdd432fa56e98f0dd Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Thu, 29 Oct 2020 13:00:39 +0000 Subject: [PATCH 06/39] WIP object reading --- Cargo.toml | 5 +++++ src/cargo/core/compiler/fingerprint.rs | 14 ++++++++++++++ src/cargo/core/compiler/mod.rs | 1 + tests/testsuite/tool_paths.rs | 24 ++++++++++++++++++++++++ 4 files changed, 44 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index ec1aacd4ffb..dd6126b5e3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,6 +77,11 @@ im-rc = "15.0.0" # for more information. rustc-workspace-hack = "1.0.0" +[dependencies.object] +version = "0.20.0" +default-features = false +features = ['read_core', 'elf', 'macho', 'pe', 'unaligned'] + [target.'cfg(target_os = "macos")'.dependencies] core-foundation = { version = "0.9.0", features = ["mac_os_10_7_support"] } diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 76c8a4b4310..1793abef833 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -1824,6 +1824,16 @@ fn find_stale_file( } format!("{:?}", hasher.result()) } + FileHashAlgorithm::SvhInBin => { + debug!("found! got here"); + use object::Object; + let v : Vec = vec![]; + let obj = object::read::File::parse(&v).unwrap(); + for sym in obj.symbols() { + println!("{:#?}", sym); + } + "todo!".to_string() + } FileHashAlgorithm::Filename => { "0".to_string() } @@ -1859,6 +1869,7 @@ fn find_stale_file( #[derive(Clone, Copy, Eq, PartialEq)] pub enum FileHashAlgorithm { Md5, + SvhInBin, Sha1, /// If the hash is in the filename then as long as the file exists we can /// assume it is up to date. @@ -1871,6 +1882,7 @@ impl FromStr for FileHashAlgorithm { fn from_str(s: &str) -> Result { match s { "md5" => Ok(FileHashAlgorithm::Md5), + "svh_in_bin" => Ok(FileHashAlgorithm::SvhInBin), "sha1" => Ok(FileHashAlgorithm::Sha1), "hash_in_filename" => Ok(FileHashAlgorithm::Filename), _ => Err(()), @@ -2025,6 +2037,7 @@ impl EncodedDepInfo { 0 => FileHashAlgorithm::Md5, 1 => FileHashAlgorithm::Sha1, 2 => FileHashAlgorithm::Filename, + 3 => FileHashAlgorithm::SvhInBin, _ => return None, }; let ty = match read_u8(bytes)? { @@ -2108,6 +2121,7 @@ impl EncodedDepInfo { FileHashAlgorithm::Md5 => dst.push(0), FileHashAlgorithm::Sha1 => dst.push(1), FileHashAlgorithm::Filename => dst.push(2), + FileHashAlgorithm::SvhInBin => dst.push(3), } match ty { DepInfoPathType::PackageRootRelative => dst.push(0), diff --git a/src/cargo/core/compiler/mod.rs b/src/cargo/core/compiler/mod.rs index 53849e300e1..e0739fbe4d9 100644 --- a/src/cargo/core/compiler/mod.rs +++ b/src/cargo/core/compiler/mod.rs @@ -860,6 +860,7 @@ fn build_base_args( match cx.files().metadata(unit) { Some(m) => { + println!("extra filename called {}", m); cmd.arg("-C").arg(&format!("metadata={}", m)); cmd.arg("-C").arg(&format!("extra-filename=-{}", m)); } diff --git a/tests/testsuite/tool_paths.rs b/tests/testsuite/tool_paths.rs index 26c8335f0b1..8cfae6b60ac 100644 --- a/tests/testsuite/tool_paths.rs +++ b/tests/testsuite/tool_paths.rs @@ -32,6 +32,30 @@ fn pathless_tools() { .run(); } +#[cargo_test] +fn obj_test() { + use object::Object; + use object::ObjectSegment; + use std::io::Read; + use std::fs::File; + +// let mut file = File::open("/Users/gilescope/projects/tst/target/debug/deps/libmydep2-244330a37db7aca2.rlib").unwrap(); + let mut file = File::open("/Users/gilescope/projects/tst/target/debug/deps/tst").unwrap(); + + let mut data = Vec::new(); + file.read_to_end(&mut data).unwrap(); + let obj = object::read::File::parse(&data).unwrap(); + for sym in obj.symbols() { + println!("{:#?}", sym); + } + for seg in obj.segments() {//will be in __DATA seg for mach-o + if let Ok(Some("__DATA")) = seg.name() + { + println!("{:#?}", seg); + } + } +} + #[cargo_test] fn absolute_tools() { let target = rustc_host(); From 8073aac62ff199eab7cff507b2b34575c8d77537 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Fri, 30 Oct 2020 14:32:42 +0000 Subject: [PATCH 07/39] Read SVH in bin object files and rlibs --- Cargo.toml | 1 + src/cargo/core/compiler/fingerprint.rs | 86 +++++++++++++++++++------- 2 files changed, 66 insertions(+), 21 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index dd6126b5e3a..68006bf0e55 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -71,6 +71,7 @@ clap = "2.31.2" unicode-width = "0.1.5" openssl = { version = '0.10.11', optional = true } im-rc = "15.0.0" +ar="0.8" # A noop dependency that changes in the Rust repository, it's a bit of a hack. # See the `src/tools/rustc-workspace-hack/README.md` file in `rust-lang/rust` diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 1793abef833..e58c32f6dd5 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -1792,12 +1792,13 @@ fn find_stale_file( if config.cli_unstable().hash_tracking && !reference_hash.hash.is_empty() { let hash = if let Some(path_hash) = path_hash { //FIXME use unwrap_or - path_hash.hash + Some(path_hash.hash.clone()) } else { // FIXME? We could fail a little faster by seeing if any size discrepencies on _any_ file before checking hashes. // but not sure it's worth the additional complexity. //FIXME put the result in the mtime_cache rather than hashing each time! - let mut reader = io::BufReader::new(fs::File::open(&path).unwrap()); //FIXME + let mut reader: io::BufReader = + io::BufReader::new(fs::File::open(&path).unwrap()); //FIXME let hash = match reference_hash.kind { FileHashAlgorithm::Md5 => { @@ -1810,7 +1811,7 @@ fn find_stale_file( } hasher.input(&buffer[..count]); } - format!("{:?}", hasher.result()) + Some(format!("{:?}", hasher.result())) } FileHashAlgorithm::Sha1 => { let mut hasher = Sha1::new(); @@ -1822,32 +1823,32 @@ fn find_stale_file( } hasher.input(&buffer[..count]); } - format!("{:?}", hasher.result()) + Some(format!("{:?}", hasher.result())) } FileHashAlgorithm::SvhInBin => { debug!("found! got here"); - use object::Object; - let v : Vec = vec![]; - let obj = object::read::File::parse(&v).unwrap(); - for sym in obj.symbols() { - println!("{:#?}", sym); + if path.ends_with(".rmeta") { + get_svh_from_ar(reader) + } else { + get_svh_from_object_file(reader) } - "todo!".to_string() - } - FileHashAlgorithm::Filename => { - "0".to_string() } + FileHashAlgorithm::Filename => Some("0".to_string()), }; - let cached = mtime_cache.get_mut(&path.to_path_buf()).unwrap(); - cached.2 = Some(FileHash { - kind: reference_hash.kind, - hash: hash.clone(), - }); + if let Some(ref hash) = hash { + let cached = mtime_cache.get_mut(&path.to_path_buf()).unwrap(); + cached.2 = Some(FileHash { + kind: reference_hash.kind, + hash: hash.clone(), + }); + } hash }; - if hash == reference_hash.hash { - continue; + if let Some(hash) = hash { + if reference_hash.hash == hash { + continue; + } } } @@ -1866,10 +1867,53 @@ fn find_stale_file( None } +type Svh = String; +fn get_svh_from_ar(reader: R) -> Option { + // use std::fs::File; + // let file = File::open(path).ok()?; + + let mut ar = ar::Archive::new(reader); + while let Some(file) = ar.next_entry() { + if let Ok(file) = file { + let s = String::from_utf8_lossy(&file.header().identifier()); + if s.ends_with(".rmeta") { + if let Some(index) = s.rfind('-') { + return Some(s[index + 1..(s.len() - ".rmeta".len())].to_string()); + } + } + } + } + None +} + +fn get_svh_from_object_file(mut reader: R) -> Option { + use object::Object; + // use std::fs::File; + // use std::io::Read; + + //let mut file = File::open(path).ok()?; + let mut data = vec![]; + reader.read_to_end(&mut data).ok()?; //TODO: looks expensive! + let obj = object::read::File::parse(&data).ok()?; + + for (_idx, sym) in obj.symbols() { + //TODO: symbol is at the end typically. + if let Some(name) = sym.name() { + if name.starts_with("_rust_svh_") { + if let Some(index) = name.rfind('_') { + return Some(name[index + 1..].to_string()); + } + } + } + } + None +} + #[derive(Clone, Copy, Eq, PartialEq)] pub enum FileHashAlgorithm { - Md5, + /// Svh is embedded as a symbol or for rmeta is in the .rmeta filename. SvhInBin, + Md5, Sha1, /// If the hash is in the filename then as long as the file exists we can /// assume it is up to date. From 5e94d25111711bcb08d9ed525d458d9510593870 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Fri, 30 Oct 2020 17:12:14 +0000 Subject: [PATCH 08/39] SvhInBin => svh. No need for svh in filename. --- src/cargo/core/compiler/fingerprint.rs | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index e58c32f6dd5..a495239ccf1 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -1825,7 +1825,7 @@ fn find_stale_file( } Some(format!("{:?}", hasher.result())) } - FileHashAlgorithm::SvhInBin => { + FileHashAlgorithm::Svh => { debug!("found! got here"); if path.ends_with(".rmeta") { get_svh_from_ar(reader) @@ -1833,7 +1833,6 @@ fn find_stale_file( get_svh_from_object_file(reader) } } - FileHashAlgorithm::Filename => Some("0".to_string()), }; if let Some(ref hash) = hash { let cached = mtime_cache.get_mut(&path.to_path_buf()).unwrap(); @@ -1912,12 +1911,9 @@ fn get_svh_from_object_file(mut reader: R) -> Option { #[derive(Clone, Copy, Eq, PartialEq)] pub enum FileHashAlgorithm { /// Svh is embedded as a symbol or for rmeta is in the .rmeta filename. - SvhInBin, + Svh, Md5, Sha1, - /// If the hash is in the filename then as long as the file exists we can - /// assume it is up to date. - Filename, } impl FromStr for FileHashAlgorithm { @@ -1926,9 +1922,8 @@ impl FromStr for FileHashAlgorithm { fn from_str(s: &str) -> Result { match s { "md5" => Ok(FileHashAlgorithm::Md5), - "svh_in_bin" => Ok(FileHashAlgorithm::SvhInBin), + "svh" => Ok(FileHashAlgorithm::Svh), "sha1" => Ok(FileHashAlgorithm::Sha1), - "hash_in_filename" => Ok(FileHashAlgorithm::Filename), _ => Err(()), } } @@ -2080,8 +2075,7 @@ impl EncodedDepInfo { let kind = match read_u8(bytes)? { 0 => FileHashAlgorithm::Md5, 1 => FileHashAlgorithm::Sha1, - 2 => FileHashAlgorithm::Filename, - 3 => FileHashAlgorithm::SvhInBin, + 2 => FileHashAlgorithm::Svh, _ => return None, }; let ty = match read_u8(bytes)? { @@ -2164,8 +2158,7 @@ impl EncodedDepInfo { match hash.kind { FileHashAlgorithm::Md5 => dst.push(0), FileHashAlgorithm::Sha1 => dst.push(1), - FileHashAlgorithm::Filename => dst.push(2), - FileHashAlgorithm::SvhInBin => dst.push(3), + FileHashAlgorithm::Svh => dst.push(2), } match ty { DepInfoPathType::PackageRootRelative => dst.push(0), From 29cef89043fa3e72f55087cdddade0e5845f3cae Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Sat, 31 Oct 2020 11:49:27 +0000 Subject: [PATCH 09/39] Use hex encoding for src hashes --- Cargo.toml | 1 - src/cargo/core/compiler/fingerprint.rs | 52 +++++++++++++++----------- src/cargo/core/compiler/mod.rs | 1 - 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 68006bf0e55..bcaa455eaf0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,7 +33,6 @@ anyhow = "1.0" filetime = "0.2.9" flate2 = { version = "1.0.3", default-features = false, features = ["zlib"] } git2 = "0.13.12" -fxhash = "0.2.1" git2-curl = "0.14.0" glob = "0.3.0" hex = "0.4" diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index a495239ccf1..3fa2f70e344 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -326,6 +326,7 @@ use anyhow::{bail, format_err}; use filetime::FileTime; use log::{debug, info}; use md5::{Digest, Md5}; +use object::Object; use serde::de; use serde::ser; use serde::{Deserialize, Serialize}; @@ -348,7 +349,7 @@ use super::{BuildContext, Context, FileFlavor, Unit}; // While source files can't currently be > 4Gb, bin files could be. pub type FileSize = u64; -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct FileHash { kind: FileHashAlgorithm, hash: String, @@ -1790,7 +1791,7 @@ fn find_stale_file( // Same size but mtime is different. Probably there's no change... // compute hash and compare to prevent change cascade... if config.cli_unstable().hash_tracking && !reference_hash.hash.is_empty() { - let hash = if let Some(path_hash) = path_hash { + let new_hash = if let Some(path_hash) = path_hash { //FIXME use unwrap_or Some(path_hash.hash.clone()) } else { @@ -1800,7 +1801,7 @@ fn find_stale_file( let mut reader: io::BufReader = io::BufReader::new(fs::File::open(&path).unwrap()); //FIXME - let hash = match reference_hash.kind { + let new_hash = match reference_hash.kind { FileHashAlgorithm::Md5 => { let mut hasher = Md5::new(); let mut buffer = [0; 1024]; @@ -1811,7 +1812,7 @@ fn find_stale_file( } hasher.input(&buffer[..count]); } - Some(format!("{:?}", hasher.result())) + Some(to_hex(&hasher.result())) } FileHashAlgorithm::Sha1 => { let mut hasher = Sha1::new(); @@ -1823,7 +1824,7 @@ fn find_stale_file( } hasher.input(&buffer[..count]); } - Some(format!("{:?}", hasher.result())) + Some(to_hex(&hasher.result())) } FileHashAlgorithm::Svh => { debug!("found! got here"); @@ -1834,20 +1835,29 @@ fn find_stale_file( } } }; - if let Some(ref hash) = hash { + if let Some(ref hash) = new_hash { let cached = mtime_cache.get_mut(&path.to_path_buf()).unwrap(); cached.2 = Some(FileHash { kind: reference_hash.kind, hash: hash.clone(), }); } - hash + new_hash }; - if let Some(hash) = hash { - if reference_hash.hash == hash { + if let Some(new_hash) = new_hash { + if reference_hash.hash == new_hash { continue; } + debug!( + "Hash check failed for {:?}: {} (ref) != {}", + &path, reference_hash.hash, new_hash + ); + } else { + debug!( + "Hash unavalable for {:?} to compare with ref {}", + &path, reference_hash.hash + ); } } @@ -1866,11 +1876,16 @@ fn find_stale_file( None } +fn to_hex(bytes: &[u8]) -> String { + let mut result = String::with_capacity(bytes.len() * 2); + for byte in bytes { + result.push_str(&format!("{:x}", byte)); + } + result +} + type Svh = String; fn get_svh_from_ar(reader: R) -> Option { - // use std::fs::File; - // let file = File::open(path).ok()?; - let mut ar = ar::Archive::new(reader); while let Some(file) = ar.next_entry() { if let Ok(file) = file { @@ -1886,11 +1901,6 @@ fn get_svh_from_ar(reader: R) -> Option { } fn get_svh_from_object_file(mut reader: R) -> Option { - use object::Object; - // use std::fs::File; - // use std::io::Read; - - //let mut file = File::open(path).ok()?; let mut data = vec![]; reader.read_to_end(&mut data).ok()?; //TODO: looks expensive! let obj = object::read::File::parse(&data).ok()?; @@ -1908,7 +1918,7 @@ fn get_svh_from_object_file(mut reader: R) -> Option { None } -#[derive(Clone, Copy, Eq, PartialEq)] +#[derive(Clone, Copy, Eq, PartialEq, Debug)] pub enum FileHashAlgorithm { /// Svh is embedded as a symbol or for rmeta is in the .rmeta filename. Svh, @@ -2234,9 +2244,9 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult let file = &prev[0..prev.len() - 1]; for i in 0..ret.files.len() { if ret.files[i].0.to_string_lossy() == file { - let parts: Vec<_> = line["# size:".len()..].split(" ").collect(); - ret.files[i].1 = parts[0].trim().parse()?; //FIXME do we need trims? - let kind_hash: Vec<_> = parts[1].split(":").collect(); + let size_and_hash: Vec<_> = line["# size:".len()..].split(' ').collect(); + ret.files[i].1 = size_and_hash[0].parse()?; + let kind_hash: Vec<_> = size_and_hash[1].split(":").collect(); let hash = kind_hash[1]; ret.files[i].2 = FileHash { kind: FileHashAlgorithm::from_str(kind_hash[0]) diff --git a/src/cargo/core/compiler/mod.rs b/src/cargo/core/compiler/mod.rs index e0739fbe4d9..53849e300e1 100644 --- a/src/cargo/core/compiler/mod.rs +++ b/src/cargo/core/compiler/mod.rs @@ -860,7 +860,6 @@ fn build_base_args( match cx.files().metadata(unit) { Some(m) => { - println!("extra filename called {}", m); cmd.arg("-C").arg(&format!("metadata={}", m)); cmd.arg("-C").arg(&format!("extra-filename=-{}", m)); } From e3c98e4bf0a154ad1d03b202213d61ddcc12a278 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Sat, 31 Oct 2020 12:28:52 +0000 Subject: [PATCH 10/39] cargo fmt --- src/cargo/core/compiler/fingerprint.rs | 2 +- tests/testsuite/tool_paths.rs | 24 ------------------------ 2 files changed, 1 insertion(+), 25 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 3fa2f70e344..5e830cf6fa4 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -1920,7 +1920,7 @@ fn get_svh_from_object_file(mut reader: R) -> Option { #[derive(Clone, Copy, Eq, PartialEq, Debug)] pub enum FileHashAlgorithm { - /// Svh is embedded as a symbol or for rmeta is in the .rmeta filename. + /// Svh is embedded as a symbol or for rmeta is in the .rmeta filename inside a .rlib. Svh, Md5, Sha1, diff --git a/tests/testsuite/tool_paths.rs b/tests/testsuite/tool_paths.rs index 8cfae6b60ac..26c8335f0b1 100644 --- a/tests/testsuite/tool_paths.rs +++ b/tests/testsuite/tool_paths.rs @@ -32,30 +32,6 @@ fn pathless_tools() { .run(); } -#[cargo_test] -fn obj_test() { - use object::Object; - use object::ObjectSegment; - use std::io::Read; - use std::fs::File; - -// let mut file = File::open("/Users/gilescope/projects/tst/target/debug/deps/libmydep2-244330a37db7aca2.rlib").unwrap(); - let mut file = File::open("/Users/gilescope/projects/tst/target/debug/deps/tst").unwrap(); - - let mut data = Vec::new(); - file.read_to_end(&mut data).unwrap(); - let obj = object::read::File::parse(&data).unwrap(); - for sym in obj.symbols() { - println!("{:#?}", sym); - } - for seg in obj.segments() {//will be in __DATA seg for mach-o - if let Ok(Some("__DATA")) = seg.name() - { - println!("{:#?}", seg); - } - } -} - #[cargo_test] fn absolute_tools() { let target = rustc_host(); From 1f369a2d28f479e581fe815041bf87c1e07df6b0 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Sat, 31 Oct 2020 14:58:07 +0000 Subject: [PATCH 11/39] use from le bytes --- src/cargo/core/compiler/fingerprint.rs | 21 +++++---------------- tests/testsuite/dep_info.rs | 21 +++++---------------- 2 files changed, 10 insertions(+), 32 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 5e830cf6fa4..ddfd7ea3620 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -313,6 +313,7 @@ //! use std::collections::hash_map::{Entry, HashMap}; +use std::convert::TryInto; use std::env; use std::fs; use std::hash::{self, Hasher}; @@ -2075,7 +2076,10 @@ impl EncodedDepInfo { let mut files = Vec::with_capacity(nfiles as usize); for _ in 0..nfiles { //FIXME: backward compatibility!!! - let size = read_u64(bytes)? as FileSize; + let eight_bytes: &[u8; 8] = (bytes[0..8]).try_into().ok()?; + let size = u64::from_le_bytes(*eight_bytes) as FileSize; + *bytes = &bytes[8..]; + //debug!("read size as {}", size); let hash_buf = read_bytes(bytes)?; @@ -2126,21 +2130,6 @@ impl EncodedDepInfo { ) } - fn read_u64(bytes: &mut &[u8]) -> Option { - let ret = bytes.get(..8)?; - *bytes = &bytes[8..]; - Some( - ((ret[0] as u64) << 0) - | ((ret[1] as u64) << 8) - | ((ret[2] as u64) << 16) - | ((ret[3] as u64) << 24) - | ((ret[4] as u64) << 32) - | ((ret[5] as u64) << 40) - | ((ret[6] as u64) << 48) - | ((ret[7] as u64) << 56), - ) - } - fn read_u8(bytes: &mut &[u8]) -> Option { let ret = *bytes.get(0)?; *bytes = &bytes[1..]; diff --git a/tests/testsuite/dep_info.rs b/tests/testsuite/dep_info.rs index dea7dbe9f43..287a8284116 100644 --- a/tests/testsuite/dep_info.rs +++ b/tests/testsuite/dep_info.rs @@ -7,6 +7,7 @@ use cargo_test_support::{ basic_bin_manifest, basic_manifest, is_nightly, main_file, project, rustc_host, Project, }; use filetime::FileTime; +use std::convert::TryInto; use std::fs; use std::path::Path; use std::str; @@ -27,7 +28,10 @@ fn assert_deps(project: &Project, fingerprint: &str, test_cb: impl Fn(&Path, &[( let deps = (0..read_usize(dep_info)) .map(|_| { //FIXME rather than discarding these we could check them? - read_u64(dep_info); //filesize + let eight_bytes: &[u8; 8] = (dep_info[0..8]).try_into().unwrap(); + let _size = u64::from_le_bytes(*eight_bytes); + *bytes = &bytes[8..]; + str::from_utf8(read_bytes(dep_info)).unwrap(); //hash read_u8(dep_info); //hashkind ( @@ -53,21 +57,6 @@ fn assert_deps(project: &Project, fingerprint: &str, test_cb: impl Fn(&Path, &[( ret } - fn read_u64(bytes: &mut &[u8]) -> Option { - let ret = bytes.get(..8)?; - *bytes = &bytes[8..]; - Some( - ((ret[0] as u64) << 0) - | ((ret[1] as u64) << 8) - | ((ret[2] as u64) << 16) - | ((ret[3] as u64) << 24) - | ((ret[4] as u64) << 32) - | ((ret[5] as u64) << 40) - | ((ret[6] as u64) << 48) - | ((ret[7] as u64) << 56), - ) - } - fn read_bytes<'a>(bytes: &mut &'a [u8]) -> &'a [u8] { let n = read_usize(bytes) as usize; let ret = &bytes[..n]; From 3a47d6bdbff5e5af2facb1ee85cb0ef57cc1da70 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Sat, 31 Oct 2020 17:19:59 +0000 Subject: [PATCH 12/39] rlib rather than rmeta --- src/cargo/core/compiler/fingerprint.rs | 14 ++++++++++---- tests/testsuite/dep_info.rs | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index ddfd7ea3620..7fe9756aef4 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -1069,9 +1069,11 @@ impl Fingerprint { // recompiled previously. We transitively become stale ourselves in // that case, so bail out. // - // Note that this comparison should probably be `>=`, not `>`, but + // Note that this comparison should probably be `>=`, not `>`, but // for a discussion of why it's `>` see the discussion about #5918 // below in `find_stale`. + + //todo: need to do raw .rmeta files if dep_mtime > max_mtime { info!( "dependency on `{}` is newer than we are {} > {} {:?}", @@ -1829,7 +1831,7 @@ fn find_stale_file( } FileHashAlgorithm::Svh => { debug!("found! got here"); - if path.ends_with(".rmeta") { + if path.ends_with(".rlib") { get_svh_from_ar(reader) } else { get_svh_from_object_file(reader) @@ -1848,15 +1850,19 @@ fn find_stale_file( if let Some(new_hash) = new_hash { if reference_hash.hash == new_hash { + debug!( + "HASH: Hash hit: mtime mismatch but contents match for {:?}", + &path + ); continue; } debug!( - "Hash check failed for {:?}: {} (ref) != {}", + "HASH: Hash miss for {:?}: {} (ref) != {}", &path, reference_hash.hash, new_hash ); } else { debug!( - "Hash unavalable for {:?} to compare with ref {}", + "HASH: Hash miss (unavalable) for {:?} to compare with ref {}", &path, reference_hash.hash ); } diff --git a/tests/testsuite/dep_info.rs b/tests/testsuite/dep_info.rs index 287a8284116..9cdebe49ac8 100644 --- a/tests/testsuite/dep_info.rs +++ b/tests/testsuite/dep_info.rs @@ -30,7 +30,7 @@ fn assert_deps(project: &Project, fingerprint: &str, test_cb: impl Fn(&Path, &[( //FIXME rather than discarding these we could check them? let eight_bytes: &[u8; 8] = (dep_info[0..8]).try_into().unwrap(); let _size = u64::from_le_bytes(*eight_bytes); - *bytes = &bytes[8..]; + *dep_info = &dep_info[8..]; str::from_utf8(read_bytes(dep_info)).unwrap(); //hash read_u8(dep_info); //hashkind From edc38a9d6dcb99c1d1730286049cb6c93f9e366c Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Sun, 1 Nov 2020 12:24:02 +0000 Subject: [PATCH 13/39] bugfix --- src/cargo/core/compiler/fingerprint.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 7fe9756aef4..21dc2e289fe 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -1830,8 +1830,7 @@ fn find_stale_file( Some(to_hex(&hasher.result())) } FileHashAlgorithm::Svh => { - debug!("found! got here"); - if path.ends_with(".rlib") { + if path.extension() == Some(std::ffi::OsStr::new("rlib")) { get_svh_from_ar(reader) } else { get_svh_from_object_file(reader) From d2875fb86a7890f9a8e5bb0e655811523083505f Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Sun, 1 Nov 2020 17:41:09 +0000 Subject: [PATCH 14/39] Read svh from .rmeta --- src/cargo/core/compiler/fingerprint.rs | 60 +++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 7 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 21dc2e289fe..687d086ff0d 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -1832,6 +1832,8 @@ fn find_stale_file( FileHashAlgorithm::Svh => { if path.extension() == Some(std::ffi::OsStr::new("rlib")) { get_svh_from_ar(reader) + } else if path.extension() == Some(std::ffi::OsStr::new("rmeta")) { + get_svh_from_rmeta_file(reader) } else { get_svh_from_object_file(reader) } @@ -1894,25 +1896,29 @@ type Svh = String; fn get_svh_from_ar(reader: R) -> Option { let mut ar = ar::Archive::new(reader); while let Some(file) = ar.next_entry() { - if let Ok(file) = file { - let s = String::from_utf8_lossy(&file.header().identifier()); - if s.ends_with(".rmeta") { - if let Some(index) = s.rfind('-') { - return Some(s[index + 1..(s.len() - ".rmeta".len())].to_string()); + match file { + Ok(file) => { + let s = String::from_utf8_lossy(&file.header().identifier()); + if s.ends_with(".rmeta") { + if let Some(index) = s.rfind('-') { + return Some(s[index + 1..(s.len() - ".rmeta".len())].to_string()); + } } } + Err(err) => debug!("Error reading ar: {}", err), } } None } +// While this looks expensive this is only invoked when dylibs are compiled against +// and the timestamp is too recent and the file is the expected size. fn get_svh_from_object_file(mut reader: R) -> Option { let mut data = vec![]; - reader.read_to_end(&mut data).ok()?; //TODO: looks expensive! + reader.read_to_end(&mut data).ok()?; let obj = object::read::File::parse(&data).ok()?; for (_idx, sym) in obj.symbols() { - //TODO: symbol is at the end typically. if let Some(name) = sym.name() { if name.starts_with("_rust_svh_") { if let Some(index) = name.rfind('_') { @@ -1924,6 +1930,27 @@ fn get_svh_from_object_file(mut reader: R) -> Option { None } +fn get_svh_from_rmeta_file(mut reader: R) -> Option { + let mut data = Vec::with_capacity(128); + data.resize(128, 0); + reader.read_exact(&mut data).ok()?; + parse_svh(&data) +} + +fn parse_svh(data: &[u8]) -> Option { + let rust_version_len_pos = 12; + let data = &mut &data[rust_version_len_pos..]; + let rust_version_len = data[0] as usize; + let data = &mut &data[1..]; + //println!("rust version='{}'", String::from_utf8_lossy(&data[..rust_version_len])); + + let data = &data[rust_version_len..]; + let svh_len = data[0] as usize; + let data = &mut &data[1..]; + + Some(String::from_utf8_lossy(&data[..svh_len]).to_string()) +} + #[derive(Clone, Copy, Eq, PartialEq, Debug)] pub enum FileHashAlgorithm { /// Svh is embedded as a symbol or for rmeta is in the .rmeta filename inside a .rlib. @@ -2305,3 +2332,22 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult Ok(ret) } } + +#[cfg(test)] +mod test { + use super::parse_svh; + #[test] + fn test() { + let vec: Vec = vec![ + 114, 117, 115, 116, 0, 0, 0, 5, 0, 13, 201, 29, 16, 114, 117, 115, 116, 99, 32, 49, 46, + 52, 57, 46, 48, 45, 100, 101, 118, 16, 49, 100, 54, 102, 97, 101, 54, 56, 102, 54, 100, + 52, 99, 99, 98, 102, 3, 115, 116, 100, 241, 202, 128, 159, 207, 146, 173, 243, 204, 1, + 0, 2, 17, 45, 48, 55, 56, 97, 54, 56, 51, 101, 99, 57, 57, 55, 50, 48, 53, 50, 4, 99, + 111, 114, 101, 190, 159, 241, 243, 142, 194, 224, 233, 82, 0, 2, 17, 45, 51, 101, 97, + 54, 98, 97, 57, 97, 57, 56, 99, 50, 57, 51, 54, 100, 17, 99, 111, 109, 112, 105, 108, + 101, 114, 95, 98, 117, 105, 108, + ]; + // r u s t / version | base | r u s t c ' ' 1 . 4 9 . 0 - d e v |size| svh--> + assert!(parse_svh(&vec).is_some()); + } +} From 88869b99e542fad78b1b33a9d40408fa2abfc617 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Tue, 3 Nov 2020 08:58:33 +0000 Subject: [PATCH 15/39] First cut --- src/cargo/core/compiler/fingerprint.rs | 255 +++++++++++++++++-------- 1 file changed, 177 insertions(+), 78 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 687d086ff0d..751be291f8c 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -350,10 +350,10 @@ use super::{BuildContext, Context, FileFlavor, Unit}; // While source files can't currently be > 4Gb, bin files could be. pub type FileSize = u64; -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Ord, PartialOrd, Eq, PartialEq)] pub struct FileHash { - kind: FileHashAlgorithm, - hash: String, + pub kind: FileHashAlgorithm, + pub hash: String, } /// Determines if a `unit` is up-to-date, and if not prepares necessary work to @@ -761,14 +761,14 @@ impl LocalFingerprint { let c: Vec<_> = paths .iter() .map(|p| { - ( - pkg_root.join(p), - 0u64, - FileHash { + Fileprint { + path: pkg_root.join(p), + size: 0u64, + hash: FileHash { kind: FileHashAlgorithm::Md5, - hash: String::new(), + hash: String::new(), // TO DO }, - ) + } }) .collect(); Ok(find_stale_file( @@ -996,6 +996,7 @@ impl Fingerprint { mtime_cache: &mut HashMap)>, pkg_root: &Path, target_root: &Path, + dep_info_loc: PathBuf, ) -> CargoResult<()> { assert!(!self.fs_status.up_to_date()); let mut mtimes = HashMap::new(); @@ -1053,6 +1054,9 @@ impl Fingerprint { }) .expect("failed to find rmeta") } else { + for (dep, _dep2) in dep_mtimes { + debug!("HASH had to look at all the files {:?}", &dep); + } match dep_mtimes.iter().max_by_key(|kv| kv.1) { Some(dep_mtime) => dep_mtime, // If our dependencies is up to date and has no filesystem @@ -1075,11 +1079,79 @@ impl Fingerprint { //todo: need to do raw .rmeta files if dep_mtime > max_mtime { - info!( - "dependency on `{}` is newer than we are {} > {} {:?}", - dep.name, dep_mtime, max_mtime, pkg_root - ); - return Ok(()); + // let (dep_path, dep_mtime) = if dep.only_requires_rmeta { + // dep_mtimes + // .iter() + // .find(|(path, _mtime)| { + // path.extension().and_then(|s| s.to_str()) == Some("rmeta") + // }) + // .expect("failed to find rmeta") + // } else { @todo here + for (dep_in, dep_mtime) in dep_mtimes { + if dep_mtime > max_mtime { + let dep_info = dep_info_loc + .strip_prefix(&target_root) + .unwrap() + .to_path_buf(); + let dep_info = target_root.join(dep_info); + println!("HASH dep info file {:?}", &dep_info); + + let dep_info = parse_dep_info(pkg_root, target_root, &dep_info)?; + let mut stale = false; + if let Some(dep_info) = dep_info { + for file in dep_info.files { + //println!("HASH dep info {:?}", file); + //TODO hideiously inefficient! + if *dep_in == file.path { + match std::fs::metadata(dep_in) { + // For file difference checking just check the lower bits of file size + Ok(metadata) => { + if file.size != metadata.len() || file.size == 0 { + stale = true; + println!("HASH file size discrepency {:?}", file); + break; + } + } + Err(..) => { + stale = true; + println!("HASH couldn't read file {:?}", file); + break; + } + } + + if let Some(hash) = get_hash(dep_in, file.hash.kind) { + println!("HASH got hash file!!!! {:?}", hash); + if file.hash.hash != hash || hash == "" { + stale = true; + } else { + println!("HASH hit - same hash! {:?}", hash); + } + } else { + stale = true; + } + break; + } + } + } else { + println!("HASH dep info not found!"); + } + // if let Some(item) = local.find_stale_item(config, mtime_cache, pkg_root, dep)? { + + // } + if stale { + info!("HASH dep fingerprint {:#?}", &dep.fingerprint.path); + info!( + "HASH dependency on `{}` is newer than we are {} > {} {:?} {:?}", + dep.name, dep_mtime, max_mtime, pkg_root, dep_path + ); + return Ok(()); + } + } else { + println!("HASH dep skipped as up to date"); + } + // debug!("HASH had to look at all the files {:?}", &dep); + } + // what's our own output's dependency hash - what are we expecting it to be? } } @@ -1263,11 +1335,13 @@ fn calculate(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult pkg_root.join(path), + DepInfoPathType::PackageRootRelative => pkg_root.join(fileprint.path), // N.B. path might be absolute here in which case the join will have no effect - DepInfoPathType::TargetRootRelative => target_root.join(path), + DepInfoPathType::TargetRootRelative => target_root.join(fileprint.path), }; - ret.files.push((path, size, hash)); + ret.files.push(Fileprint { path, ..fileprint }); } Ok(Some(ret)) } @@ -1726,15 +1800,21 @@ fn find_stale_file( config: &Config, mtime_cache: &mut HashMap)>, reference: &Path, - paths: &[(PathBuf, FileSize, FileHash)], + paths: &[Fileprint], ) -> Option { let reference_mtime = match paths::mtime(reference) { Ok(mtime) => mtime, Err(..) => return Some(StaleItem::MissingFile(reference.to_path_buf())), }; - for (path, reference_size, reference_hash) in paths { + for Fileprint { + path, + size: reference_size, + hash: reference_hash, + } in paths + { let path = &path; + let (path_mtime, path_size, path_hash) = match mtime_cache.entry(path.to_path_buf()) { Entry::Occupied(o) => o.get().clone(), //FIXME? do we need to clone here? Entry::Vacant(v) => { @@ -1801,44 +1881,7 @@ fn find_stale_file( // FIXME? We could fail a little faster by seeing if any size discrepencies on _any_ file before checking hashes. // but not sure it's worth the additional complexity. //FIXME put the result in the mtime_cache rather than hashing each time! - let mut reader: io::BufReader = - io::BufReader::new(fs::File::open(&path).unwrap()); //FIXME - - let new_hash = match reference_hash.kind { - FileHashAlgorithm::Md5 => { - let mut hasher = Md5::new(); - let mut buffer = [0; 1024]; - loop { - let count = reader.read(&mut buffer).unwrap(); //FIXME - if count == 0 { - break; - } - hasher.input(&buffer[..count]); - } - Some(to_hex(&hasher.result())) - } - FileHashAlgorithm::Sha1 => { - let mut hasher = Sha1::new(); - let mut buffer = [0; 1024]; - loop { - let count = reader.read(&mut buffer).unwrap(); //FIXME - if count == 0 { - break; - } - hasher.input(&buffer[..count]); - } - Some(to_hex(&hasher.result())) - } - FileHashAlgorithm::Svh => { - if path.extension() == Some(std::ffi::OsStr::new("rlib")) { - get_svh_from_ar(reader) - } else if path.extension() == Some(std::ffi::OsStr::new("rmeta")) { - get_svh_from_rmeta_file(reader) - } else { - get_svh_from_object_file(reader) - } - } - }; + let new_hash = get_hash(&path, reference_hash.kind); if let Some(ref hash) = new_hash { let cached = mtime_cache.get_mut(&path.to_path_buf()).unwrap(); cached.2 = Some(FileHash { @@ -1884,6 +1927,46 @@ fn find_stale_file( None } +fn get_hash(path: &Path, algo: FileHashAlgorithm) -> Option { + let mut reader: io::BufReader = io::BufReader::new(fs::File::open(path).ok()?); + + match algo { + FileHashAlgorithm::Md5 => { + let mut hasher = Md5::new(); + let mut buffer = [0; 1024]; + loop { + let count = reader.read(&mut buffer).ok()?; + if count == 0 { + break; + } + hasher.input(&buffer[..count]); + } + Some(to_hex(&hasher.result())) + } + FileHashAlgorithm::Sha1 => { + let mut hasher = Sha1::new(); + let mut buffer = [0; 1024]; + loop { + let count = reader.read(&mut buffer).ok()?; + if count == 0 { + break; + } + hasher.input(&buffer[..count]); + } + Some(to_hex(&hasher.result())) + } + FileHashAlgorithm::Svh => { + if path.extension() == Some(std::ffi::OsStr::new("rlib")) { + get_svh_from_ar(reader) + } else if path.extension() == Some(std::ffi::OsStr::new("rmeta")) { + get_svh_from_rmeta_file(reader) + } else { + get_svh_from_object_file(reader) + } + } + } +} + fn to_hex(bytes: &[u8]) -> String { let mut result = String::with_capacity(bytes.len() * 2); for byte in bytes { @@ -1951,7 +2034,7 @@ fn parse_svh(data: &[u8]) -> Option { Some(String::from_utf8_lossy(&data[..svh_len]).to_string()) } -#[derive(Clone, Copy, Eq, PartialEq, Debug)] +#[derive(Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Debug)] pub enum FileHashAlgorithm { /// Svh is embedded as a symbol or for rmeta is in the .rmeta filename inside a .rlib. Svh, @@ -2047,10 +2130,10 @@ pub fn translate_dep_info( .env .retain(|(key, _)| !rustc_cmd.get_envs().contains_key(key)); - for (file, size, hash) in depinfo.files { + for fileprint in depinfo.files { // The path may be absolute or relative, canonical or not. Make sure // it is canonicalized so we are comparing the same kinds of paths. - let abs_file = rustc_cwd.join(file); + let abs_file = rustc_cwd.join(fileprint.path); // If canonicalization fails, just use the abs path. There is currently // a bug where --remap-path-prefix is affecting .d files, causing them // to point to non-existent paths. @@ -2069,7 +2152,13 @@ pub fn translate_dep_info( // effect. (DepInfoPathType::TargetRootRelative, &*abs_file) }; - on_disk_info.files.push((size, hash, ty, path.to_owned())); + on_disk_info.files.push(( + Fileprint { + path: path.to_path_buf(), + ..fileprint + }, + ty, + )); } paths::write(cargo_dep_info, on_disk_info.serialize()?)?; Ok(()) @@ -2079,7 +2168,7 @@ pub fn translate_dep_info( pub struct RustcDepInfo { /// The list of files that the main target in the dep-info file depends on. /// and lower 32bits of size and hash (or 0 if not there). - pub files: Vec<(PathBuf, FileSize, FileHash)>, //FIXME use Option instead? + pub files: Vec, //FIXME use Option instead? /// The list of environment variables we found that the rustc compilation /// depends on. /// @@ -2090,6 +2179,14 @@ pub struct RustcDepInfo { pub env: Vec<(String, Option)>, } +/// A file location with identifying properties: size and hash. +#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Debug)] +pub struct Fileprint { + pub path: PathBuf, + pub size: FileSize, + pub hash: FileHash, +} + // Same as `RustcDepInfo` except avoids absolute paths as much as possible to // allow moving around the target directory. // @@ -2097,7 +2194,7 @@ pub struct RustcDepInfo { // Cargo will read it for crates on all future compilations. #[derive(Default)] struct EncodedDepInfo { - files: Vec<(FileSize, FileHash, DepInfoPathType, PathBuf)>, + files: Vec<(Fileprint, DepInfoPathType)>, env: Vec<(String, Option)>, } @@ -2131,10 +2228,12 @@ impl EncodedDepInfo { }; let bytes = read_bytes(bytes)?; files.push(( - size, - FileHash { kind, hash }, + Fileprint { + path: util::bytes2path(bytes).ok()?, + size, + hash: FileHash { kind, hash }, + }, ty, - util::bytes2path(bytes).ok()?, )); } @@ -2180,7 +2279,7 @@ impl EncodedDepInfo { let mut ret = Vec::new(); let dst = &mut ret; write_usize(dst, self.files.len()); - for (size, hash, ty, file) in self.files.iter() { + for (Fileprint { path, size, hash }, ty) in self.files.iter() { //debug!("writing depinfo size as {} ", *size as usize); write_u64(dst, *size); //debug!("writing depinfo hash as {} ", hash.hash.len()); @@ -2195,7 +2294,7 @@ impl EncodedDepInfo { DepInfoPathType::PackageRootRelative => dst.push(0), DepInfoPathType::TargetRootRelative => dst.push(1), } - write_bytes(dst, util::path2bytes(file)?); + write_bytes(dst, util::path2bytes(path)?); } write_usize(dst, self.env.len()); @@ -2264,12 +2363,12 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult if let Some(prev) = prev_line { let file = &prev[0..prev.len() - 1]; for i in 0..ret.files.len() { - if ret.files[i].0.to_string_lossy() == file { + if ret.files[i].path.to_string_lossy() == file { let size_and_hash: Vec<_> = line["# size:".len()..].split(' ').collect(); - ret.files[i].1 = size_and_hash[0].parse()?; + ret.files[i].size = size_and_hash[0].parse()?; let kind_hash: Vec<_> = size_and_hash[1].split(":").collect(); let hash = kind_hash[1]; - ret.files[i].2 = FileHash { + ret.files[i].hash = FileHash { kind: FileHashAlgorithm::from_str(kind_hash[0]) .expect("unknown hashing algo"), hash: hash.to_string(), @@ -2295,14 +2394,14 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult internal("malformed dep-info format, trailing \\".to_string()) })?); } - ret.files.push(( - file.into(), - 0, - FileHash { + ret.files.push(Fileprint { + path: file.into(), + size: 0, + hash: FileHash { kind: FileHashAlgorithm::Md5, - hash: String::new(), + hash: String::new(), //TO DO }, - )); + }); } } else { prev_line = Some(line); From 36fd73d0f756ab70eaf68cde5a4df07eff18e169 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Tue, 3 Nov 2020 09:03:23 +0000 Subject: [PATCH 16/39] Use existing cache. Introduce dep_info_cache to stop parsing the same file multiple times. --- src/cargo/core/compiler/context/mod.rs | 11 +- src/cargo/core/compiler/fingerprint.rs | 324 +++++++++++----------- src/cargo/core/compiler/output_depinfo.rs | 40 ++- 3 files changed, 204 insertions(+), 171 deletions(-) diff --git a/src/cargo/core/compiler/context/mod.rs b/src/cargo/core/compiler/context/mod.rs index ec2114a25db..0a9c3899d33 100644 --- a/src/cargo/core/compiler/context/mod.rs +++ b/src/cargo/core/compiler/context/mod.rs @@ -2,12 +2,10 @@ use std::collections::{BTreeSet, HashMap, HashSet}; use std::path::PathBuf; use std::sync::{Arc, Mutex}; -use filetime::FileTime; use jobserver::Client; -use crate::core::compiler::{ - self, compilation, fingerprint::FileHash, fingerprint::FileSize, Unit, -}; +use crate::core::compiler::fingerprint::{CurrentFileprint, RustcDepInfo}; +use crate::core::compiler::{self, compilation, Unit}; use crate::core::PackageId; use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::profile; @@ -40,7 +38,9 @@ pub struct Context<'a, 'cfg> { /// Fingerprints used to detect if a unit is out-of-date. pub fingerprints: HashMap>, /// Cache of file mtimes to reduce filesystem hits. - pub mtime_cache: HashMap)>, + pub mtime_cache: HashMap, + /// Cache of dep_info to reduce filesystem hits. + pub dep_info_cache: HashMap, /// A set used to track which units have been compiled. /// A unit may appear in the job graph multiple times as a dependency of /// multiple packages, but it only needs to run once. @@ -109,6 +109,7 @@ impl<'a, 'cfg> Context<'a, 'cfg> { build_script_outputs: Arc::new(Mutex::new(BuildScriptOutputs::default())), fingerprints: HashMap::new(), mtime_cache: HashMap::new(), + dep_info_cache: HashMap::new(), compiled: HashSet::new(), build_scripts: HashMap::new(), build_explicit_deps: HashMap::new(), diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 751be291f8c..15bc3f79efa 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -312,7 +312,7 @@ //! See the `A-rebuild-detection` flag on the issue tracker for more: //! -use std::collections::hash_map::{Entry, HashMap}; +use std::collections::hash_map::HashMap; use std::convert::TryInto; use std::env; use std::fs; @@ -694,6 +694,80 @@ enum LocalFingerprint { RerunIfEnvChanged { var: String, val: Option }, } +/// Cache of file properties that we know to be true. +/// @todo currentfileprint +pub struct CurrentFileprint { + pub(crate) mtime: FileTime, + /// This will be None if not yet looked up. + size: Option, + /// This will be None if not yet calculated for this file. + hash: Option, +} + +impl CurrentFileprint { + pub(crate) fn new(mtime: FileTime) -> Self { + CurrentFileprint { + mtime, + size: None, + hash: None, + } + } + + pub(crate) fn size(&mut self, file: &Path) -> Option<&FileSize> { + if self.size.is_none() { + self.size = std::fs::metadata(file).map(|metadata| metadata.len()).ok(); + } + self.size.as_ref() + } + + pub(crate) fn hash(&mut self, path: &Path, algo: FileHashAlgorithm) -> Option<&FileHash> { + if self.hash.is_none() { + if let Ok(file) = fs::File::open(path) { + let mut reader: io::BufReader = io::BufReader::new(file); + + let hash = match algo { + FileHashAlgorithm::Md5 => { + let mut hasher = Md5::new(); + let mut buffer = [0; 1024]; + loop { + let count = reader.read(&mut buffer).ok()?; + if count == 0 { + break; + } + hasher.input(&buffer[..count]); + } + Some(to_hex(&hasher.result())) + } + FileHashAlgorithm::Sha1 => { + let mut hasher = Sha1::new(); + let mut buffer = [0; 1024]; + loop { + let count = reader.read(&mut buffer).ok()?; + if count == 0 { + break; + } + hasher.input(&buffer[..count]); + } + Some(to_hex(&hasher.result())) + } + FileHashAlgorithm::Svh => { + if path.extension() == Some(std::ffi::OsStr::new("rlib")) { + get_svh_from_ar(reader) + } else if path.extension() == Some(std::ffi::OsStr::new("rmeta")) { + get_svh_from_rmeta_file(reader) + } else { + get_svh_from_object_file(reader) + } + } + }; + + self.hash = hash.map(|hash| FileHash { kind: algo, hash }) + } + } + self.hash.as_ref() + } +} + enum StaleItem { MissingFile(PathBuf), ChangedFile { @@ -724,7 +798,8 @@ impl LocalFingerprint { fn find_stale_item( &self, config: &Config, - mtime_cache: &mut HashMap)>, + mtime_cache: &mut HashMap, + dep_info_cache: &mut HashMap, pkg_root: &Path, target_root: &Path, ) -> CargoResult> { @@ -737,7 +812,13 @@ impl LocalFingerprint { // rustc. LocalFingerprint::CheckDepInfo { dep_info } => { let dep_info = target_root.join(dep_info); - let info = match parse_dep_info(pkg_root, target_root, &dep_info)? { + if !dep_info_cache.contains_key(&dep_info) { + if let Some(rustc_dep_info) = parse_dep_info(pkg_root, target_root, &dep_info)? + { + dep_info_cache.insert(dep_info.clone(), rustc_dep_info); + } + } + let info = match dep_info_cache.get(&dep_info) { Some(info) => info, None => return Ok(Some(StaleItem::MissingFile(dep_info))), }; @@ -993,7 +1074,8 @@ impl Fingerprint { fn check_filesystem( &mut self, config: &Config, - mtime_cache: &mut HashMap)>, + mtime_cache: &mut HashMap, + dep_info_cache: &mut HashMap, pkg_root: &Path, target_root: &Path, dep_info_loc: PathBuf, @@ -1079,75 +1161,85 @@ impl Fingerprint { //todo: need to do raw .rmeta files if dep_mtime > max_mtime { - // let (dep_path, dep_mtime) = if dep.only_requires_rmeta { - // dep_mtimes - // .iter() - // .find(|(path, _mtime)| { - // path.extension().and_then(|s| s.to_str()) == Some("rmeta") - // }) - // .expect("failed to find rmeta") // } else { @todo here for (dep_in, dep_mtime) in dep_mtimes { + if dep.only_requires_rmeta + && dep_in.extension().and_then(|s| s.to_str()) != Some("rmeta") + { + continue; + } + if dep_mtime > max_mtime { let dep_info = dep_info_loc .strip_prefix(&target_root) .unwrap() .to_path_buf(); - let dep_info = target_root.join(dep_info); println!("HASH dep info file {:?}", &dep_info); + let dep_info_file = target_root.join(dep_info); + + let rustc_dep_info = dep_info_cache.get(&dep_info_file); + if rustc_dep_info.is_none() { + let dep = parse_dep_info(pkg_root, target_root, &dep_info_file)?; + if let Some(dep) = dep { + dep_info_cache.insert(dep_info_file.clone(), dep); + } + } - let dep_info = parse_dep_info(pkg_root, target_root, &dep_info)?; let mut stale = false; - if let Some(dep_info) = dep_info { - for file in dep_info.files { + if let Some(rustc_dep_info) = dep_info_cache.get(&dep_info_file) { + for reference in &rustc_dep_info.files { //println!("HASH dep info {:?}", file); - //TODO hideiously inefficient! - if *dep_in == file.path { - match std::fs::metadata(dep_in) { - // For file difference checking just check the lower bits of file size - Ok(metadata) => { - if file.size != metadata.len() || file.size == 0 { - stale = true; - println!("HASH file size discrepency {:?}", file); - break; - } - } - Err(..) => { + if *dep_in == reference.path { + let mut file_facts = mtime_cache.get_mut(dep_in); + if file_facts.is_none() { + mtime_cache.insert( + dep_in.clone(), + CurrentFileprint::new(*dep_mtime), + ); + file_facts = mtime_cache.get_mut(dep_in); + } + let file_facts = file_facts.unwrap(); + + if let Some(current_size) = file_facts.size(dep_in) { + if *current_size != reference.size { stale = true; - println!("HASH couldn't read file {:?}", file); break; } + } else { + stale = true; + break; } - if let Some(hash) = get_hash(dep_in, file.hash.kind) { - println!("HASH got hash file!!!! {:?}", hash); - if file.hash.hash != hash || hash == "" { - stale = true; + let current_hash = file_facts.hash(dep_in, reference.hash.kind); + + //println!("HASH got hash file!!!! {:?}", hash); + if let Some(file_facts_hash) = current_hash { + if reference.hash == *file_facts_hash { + println!("HASH hit - same hash! {:?}", file_facts.hash); } else { - println!("HASH hit - same hash! {:?}", hash); + // println!("HASH s {:?}", file_facts.hash); + stale = true; + break; } } else { stale = true; + break; } - break; } } } else { - println!("HASH dep info not found!"); + stale = true; } - // if let Some(item) = local.find_stale_item(config, mtime_cache, pkg_root, dep)? { - - // } if stale { info!("HASH dep fingerprint {:#?}", &dep.fingerprint.path); info!( - "HASH dependency on `{}` is newer than we are {} > {} {:?} {:?}", + "HASHMISS dependency on `{}` is newer than we are {} > {} {:?} {:?}", dep.name, dep_mtime, max_mtime, pkg_root, dep_path ); return Ok(()); } } else { - println!("HASH dep skipped as up to date"); + // debug!("HASH dep skipped as up to date"); } // debug!("HASH had to look at all the files {:?}", &dep); } @@ -1160,7 +1252,9 @@ impl Fingerprint { // files for this package itself. If we do find something log a helpful // message and bail out so we stay stale. for local in self.local.get_mut().unwrap().iter() { - if let Some(item) = local.find_stale_item(config, mtime_cache, pkg_root, target_root)? { + if let Some(item) = + local.find_stale_item(config, mtime_cache, dep_info_cache, pkg_root, target_root)? + { item.log(); return Ok(()); } @@ -1339,6 +1433,7 @@ fn calculate(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult, pkg: &Package) -> CargoResult)>, + mtime_cache: &mut HashMap, reference: &Path, paths: &[Fileprint], ) -> Option { @@ -1813,27 +1908,14 @@ fn find_stale_file( hash: reference_hash, } in paths { - let path = &path; - - let (path_mtime, path_size, path_hash) = match mtime_cache.entry(path.to_path_buf()) { - Entry::Occupied(o) => o.get().clone(), //FIXME? do we need to clone here? - Entry::Vacant(v) => { - let mtime = match paths::mtime(path) { - Ok(mtime) => mtime, - Err(..) => return Some(StaleItem::MissingFile(path.to_path_buf())), - }; - let current_size = if config.cli_unstable().hash_tracking { - match std::fs::metadata(path) { - // For file difference checking just check the lower bits of file size - Ok(metadata) => metadata.len(), - Err(..) => return Some(StaleItem::MissingFile(path.to_path_buf())), //todo - } - } else { - 0 - }; - v.insert((mtime, current_size, None)).clone() // Hash calculated only if needed later. - } - }; + if !mtime_cache.contains_key(path) { + let mtime = match paths::mtime(path) { + Ok(mtime) => mtime, + Err(..) => return Some(StaleItem::MissingFile(path.to_path_buf())), + }; + mtime_cache.insert(path.to_path_buf(), CurrentFileprint::new(mtime)); + } + let current = mtime_cache.get_mut(path).unwrap(); // TODO: fix #5918. // Note that equal mtimes should be considered "stale". For filesystems with @@ -1854,69 +1936,39 @@ fn find_stale_file( // Unfortunately this became problematic when (in #6484) cargo switch to more accurately // measuring the start time of builds. - // Has size changed? - if config.cli_unstable().hash_tracking - && *reference_size > 0 - && path_size != *reference_size - { - return Some(StaleItem::ChangedFile { - reference: reference.to_path_buf(), - reference_mtime, - stale: path.to_path_buf(), - stale_mtime: path_mtime, - }); - } - - if path_mtime <= reference_mtime { + if current.mtime <= reference_mtime { continue; } - // Same size but mtime is different. Probably there's no change... - // compute hash and compare to prevent change cascade... - if config.cli_unstable().hash_tracking && !reference_hash.hash.is_empty() { - let new_hash = if let Some(path_hash) = path_hash { - //FIXME use unwrap_or - Some(path_hash.hash.clone()) - } else { - // FIXME? We could fail a little faster by seeing if any size discrepencies on _any_ file before checking hashes. - // but not sure it's worth the additional complexity. - //FIXME put the result in the mtime_cache rather than hashing each time! - let new_hash = get_hash(&path, reference_hash.kind); - if let Some(ref hash) = new_hash { - let cached = mtime_cache.get_mut(&path.to_path_buf()).unwrap(); - cached.2 = Some(FileHash { - kind: reference_hash.kind, - hash: hash.clone(), - }); - } - new_hash - }; - - if let Some(new_hash) = new_hash { - if reference_hash.hash == new_hash { - debug!( - "HASH: Hash hit: mtime mismatch but contents match for {:?}", - &path - ); - continue; + if config.cli_unstable().hash_tracking { + if let Some(current_size) = current.size(path) { + if *current_size == *reference_size { + // Same size but mtime is different. Probably there's no change... + // compute hash and compare to prevent change cascade... + if let Some(current_hash) = current.hash(path, reference_hash.kind) { + // FIXME? We could fail a little faster by seeing if any size discrepencies on _any_ file before checking hashes. + // but not sure it's worth the additional complexity. + if *reference_hash == *current_hash { + debug!( + "HASH: Hash hit: mtime mismatch but contents match for {:?}", + &path + ); + continue; + } + debug!( + "HASH: Hash miss for {:?}: {} (ref) != {}", + &path, reference_hash.hash, current_hash.hash + ); + } } - debug!( - "HASH: Hash miss for {:?}: {} (ref) != {}", - &path, reference_hash.hash, new_hash - ); - } else { - debug!( - "HASH: Hash miss (unavalable) for {:?} to compare with ref {}", - &path, reference_hash.hash - ); } - } + }; return Some(StaleItem::ChangedFile { reference: reference.to_path_buf(), reference_mtime, stale: path.to_path_buf(), - stale_mtime: path_mtime, + stale_mtime: current.mtime, }); } @@ -1927,46 +1979,6 @@ fn find_stale_file( None } -fn get_hash(path: &Path, algo: FileHashAlgorithm) -> Option { - let mut reader: io::BufReader = io::BufReader::new(fs::File::open(path).ok()?); - - match algo { - FileHashAlgorithm::Md5 => { - let mut hasher = Md5::new(); - let mut buffer = [0; 1024]; - loop { - let count = reader.read(&mut buffer).ok()?; - if count == 0 { - break; - } - hasher.input(&buffer[..count]); - } - Some(to_hex(&hasher.result())) - } - FileHashAlgorithm::Sha1 => { - let mut hasher = Sha1::new(); - let mut buffer = [0; 1024]; - loop { - let count = reader.read(&mut buffer).ok()?; - if count == 0 { - break; - } - hasher.input(&buffer[..count]); - } - Some(to_hex(&hasher.result())) - } - FileHashAlgorithm::Svh => { - if path.extension() == Some(std::ffi::OsStr::new("rlib")) { - get_svh_from_ar(reader) - } else if path.extension() == Some(std::ffi::OsStr::new("rmeta")) { - get_svh_from_rmeta_file(reader) - } else { - get_svh_from_object_file(reader) - } - } - } -} - fn to_hex(bytes: &[u8]) -> String { let mut result = String::with_capacity(bytes.len() * 2); for byte in bytes { diff --git a/src/cargo/core/compiler/output_depinfo.rs b/src/cargo/core/compiler/output_depinfo.rs index b93a7a0795c..cf832b557c4 100644 --- a/src/cargo/core/compiler/output_depinfo.rs +++ b/src/cargo/core/compiler/output_depinfo.rs @@ -29,6 +29,7 @@ use std::path::{Path, PathBuf}; use log::debug; use super::{fingerprint, Context, FileFlavor, Unit}; +use crate::core::compiler::fingerprint::{FileHash, Fileprint}; use crate::util::paths; use crate::util::{internal, CargoResult}; @@ -48,7 +49,7 @@ fn render_filename>(path: P, basedir: Option<&str>) -> CargoResul } fn add_deps_for_unit( - deps: &mut BTreeSet, + deps: &mut BTreeSet, cx: &mut Context<'_, '_>, unit: &Unit, visited: &mut HashSet, @@ -62,11 +63,12 @@ fn add_deps_for_unit( if !unit.mode.is_run_custom_build() { // Add dependencies from rustc dep-info output (stored in fingerprint directory) let dep_info_loc = fingerprint::dep_info_loc(cx, unit); + //TODO: can we use the dep info cache here? if let Some(paths) = fingerprint::parse_dep_info(unit.pkg.root(), cx.files().host_root(), &dep_info_loc)? { for path in paths.files { - deps.insert(path.0); //FIXME can we track size/hash of custom builds? + deps.insert(path); } } else { debug!( @@ -87,7 +89,14 @@ fn add_deps_for_unit( .get(unit.pkg.package_id(), metadata) { for path in &output.rerun_if_changed { - deps.insert(path.into()); + deps.insert(Fileprint { + path: path.into(), + size: 0, + hash: FileHash { + kind: fingerprint::FileHashAlgorithm::Md5, + hash: String::new(), + }, + }); //TODO } } } @@ -107,7 +116,7 @@ fn add_deps_for_unit( /// This only saves files for uplifted artifacts. pub fn output_depinfo(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<()> { let bcx = cx.bcx; - let mut deps = BTreeSet::new(); + let mut deps: BTreeSet = BTreeSet::new(); let mut visited = HashSet::new(); let success = add_deps_for_unit(&mut deps, cx, unit, &mut visited).is_ok(); let basedir_string; @@ -125,7 +134,12 @@ pub fn output_depinfo(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<()> }; let deps = deps .iter() - .map(|f| render_filename(f, basedir)) + .map(|f| { + render_filename(&f.path, basedir).map(|path| Fileprint { + path: PathBuf::from(path), + ..(*f).clone() + }) + }) .collect::>>()?; for output in cx @@ -144,19 +158,25 @@ pub fn output_depinfo(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<()> if previous .files .iter() - .map(|(path, _size, _hash)| path) - .eq(deps.iter().map(Path::new)) + // .map(|(path, _size, _hash)| path) + .eq(deps.iter()) + //.map(|f| (Path::new(p), size, hash))) { - //FIXME we could check for size differences here? continue; } } // Otherwise write it all out + debug!("HASH: detected change in dependencies file!!!"); let mut outfile = BufWriter::new(paths::create(output_path)?); write!(outfile, "{}:", target_fn)?; - for dep in &deps { - write!(outfile, " {}", dep)?; + for Fileprint { + path: dep, + size: _, + hash: _, + } in &deps + { + write!(outfile, " {}", dep.to_string_lossy())?; //TO DO - should we be writing out hash/filesize here? } writeln!(outfile)?; From c25d8208d3de039e48b866522f95c8130d565329 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Sat, 7 Nov 2020 11:38:12 +0000 Subject: [PATCH 17/39] Signs that the caching of build.rs might be working but now output of running it is has no fallback aside from mtime... last hurdle? --- src/cargo/core/compiler/custom_build.rs | 1 + src/cargo/core/compiler/fingerprint.rs | 149 +++++++++++++++++----- src/cargo/core/compiler/output_depinfo.rs | 39 ++++-- 3 files changed, 144 insertions(+), 45 deletions(-) diff --git a/src/cargo/core/compiler/custom_build.rs b/src/cargo/core/compiler/custom_build.rs index 7e02008c8a1..16c9f28aa77 100644 --- a/src/cargo/core/compiler/custom_build.rs +++ b/src/cargo/core/compiler/custom_build.rs @@ -344,6 +344,7 @@ fn build_work(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult { // And now finally, run the build command itself! state.running(&cmd); let timestamp = paths::set_invocation_time(&script_run_dir)?; + println!("HASH EXEC {:?}", &script_run_dir); let prefix = format!("[{} {}] ", id.name(), id.version()); let mut warnings_in_case_of_panic = Vec::new(); let output = cmd diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 15bc3f79efa..13a4fc93946 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -350,7 +350,8 @@ use super::{BuildContext, Context, FileFlavor, Unit}; // While source files can't currently be > 4Gb, bin files could be. pub type FileSize = u64; -#[derive(Clone, Debug, Ord, PartialOrd, Eq, PartialEq)] +//TODO: implement hash yourself +#[derive(Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Serialize, Deserialize, Hash)] pub struct FileHash { pub kind: FileHashAlgorithm, pub hash: String, @@ -684,7 +685,7 @@ enum LocalFingerprint { /// `output`, otherwise we need to recompile. RerunIfChanged { output: PathBuf, - paths: Vec, + paths: Vec<(PathBuf, FileSize, FileHash)>, }, /// This represents a single `rerun-if-env-changed` annotation printed by a @@ -841,15 +842,10 @@ impl LocalFingerprint { LocalFingerprint::RerunIfChanged { output, paths } => { let c: Vec<_> = paths .iter() - .map(|p| { - Fileprint { - path: pkg_root.join(p), - size: 0u64, - hash: FileHash { - kind: FileHashAlgorithm::Md5, - hash: String::new(), // TO DO - }, - } + .map(|(p, size, hash)| Fileprint { + path: pkg_root.join(p), + size: *size, + hash: hash.clone(), }) .collect(); Ok(find_stale_file( @@ -1136,9 +1132,9 @@ impl Fingerprint { }) .expect("failed to find rmeta") } else { - for (dep, _dep2) in dep_mtimes { - debug!("HASH had to look at all the files {:?}", &dep); - } + // for (dep, _dep2) in dep_mtimes { + // debug!("HASH had to look at all the files {:?}", &dep); + // } match dep_mtimes.iter().max_by_key(|kv| kv.1) { Some(dep_mtime) => dep_mtime, // If our dependencies is up to date and has no filesystem @@ -1175,20 +1171,64 @@ impl Fingerprint { .unwrap() .to_path_buf(); println!("HASH dep info file {:?}", &dep_info); - let dep_info_file = target_root.join(dep_info); + + let dep_info_file = if dep_info + .to_str() + .unwrap() + .contains("dep-run-build-script-build-script-build") + { + // // let dir_hash = dep_path + // // .to_str() + // // .unwrap() + // // .split('-') + // // .last() + // // .unwrap_or_default(); + // let mut d = dep_path.parent().unwrap().to_path_buf(); + // d = d.join("dep-run-build-script-build-script-build"); + // // d.set_extension("d"); + // d + // //join(format!("build_script_build-{}.d", dir_hash)) + let mut ddep_info = PathBuf::new(); + let x = dep.fingerprint.local.lock().unwrap(); + for local_dep in (*x).iter() { + match local_dep { + LocalFingerprint::CheckDepInfo { dep_info } => { + ddep_info = dep_info.to_path_buf() + } + _ => {} + } + // println!("{:#?}", local_dep.pa); + } + target_root.join(&ddep_info).to_path_buf() + } else { + target_root.join(&dep_info) + }; + + //let dep_info_file = target_root.join(dep_info); + println!("dep info file: {:?}", &dep_info_file); let rustc_dep_info = dep_info_cache.get(&dep_info_file); if rustc_dep_info.is_none() { - let dep = parse_dep_info(pkg_root, target_root, &dep_info_file)?; - if let Some(dep) = dep { - dep_info_cache.insert(dep_info_file.clone(), dep); + let dep_result = parse_dep_info(pkg_root, target_root, &dep_info_file); + match dep_result { + Ok(dep) => { + if let Some(dep) = dep { + println!("HASH dep info file parsed"); + dep_info_cache.insert(dep_info_file.clone(), dep); + } else { + println!("HASH dep info file could not be parsed"); + } + } + Err(err) => println!("HASH error loading dep info file {}", err), } + } else { + println!("HASH CACHE hit on dep info file"); } - let mut stale = false; + let mut stale = None; if let Some(rustc_dep_info) = dep_info_cache.get(&dep_info_file) { for reference in &rustc_dep_info.files { - //println!("HASH dep info {:?}", file); + //println!("HASH dep info ref {:?}", &reference); if *dep_in == reference.path { let mut file_facts = mtime_cache.get_mut(dep_in); if file_facts.is_none() { @@ -1202,11 +1242,17 @@ impl Fingerprint { if let Some(current_size) = file_facts.size(dep_in) { if *current_size != reference.size { - stale = true; + stale = Some(format!( + "File sizes don't match {:?} expected: {}", + current_size, reference.size + )); break; } } else { - stale = true; + stale = Some(format!( + "File sizes was not obtainable expected: {}", + reference.size + )); break; } @@ -1218,24 +1264,35 @@ impl Fingerprint { println!("HASH hit - same hash! {:?}", file_facts.hash); } else { // println!("HASH s {:?}", file_facts.hash); - stale = true; + stale = Some(format!( + "Hash {:?} doesn't match expected: {:?}", + &file_facts_hash, &reference.hash + )); break; } } else { - stale = true; + stale = Some(format!( + "No hash found in the dep info file to compare to {:?}", + &reference.hash + )); break; } } } } else { - stale = true; + stale = Some("HASH dep info file could not be found".into()); } - if stale { - info!("HASH dep fingerprint {:#?}", &dep.fingerprint.path); + if stale.is_some() { + let x = dep.fingerprint.local.lock().unwrap(); + for local_dep in (*x).iter() { + println!("{:#?}", local_dep); + } + info!("HASH dep fingerprint {:#?}", &dep.fingerprint.path,); info!( "HASHMISS dependency on `{}` is newer than we are {} > {} {:?} {:?}", dep.name, dep_mtime, max_mtime, pkg_root, dep_path ); + info!("HASHMISS also {:?}", stale); return Ok(()); } } else { @@ -1256,6 +1313,7 @@ impl Fingerprint { local.find_stale_item(config, mtime_cache, dep_info_cache, pkg_root, target_root)? { item.log(); + println!("HASHMISS we are failing here"); return Ok(()); } } @@ -1738,7 +1796,20 @@ fn local_fingerprints_deps( let paths = deps .rerun_if_changed .iter() - .map(|p| p.strip_prefix(pkg_root).unwrap_or(p).to_path_buf()) + .map(|p| { + let mut f = CurrentFileprint::new(FileTime::zero()); + let hash = (*f.hash(p, FileHashAlgorithm::Md5).unwrap_or(&FileHash { + kind: FileHashAlgorithm::Md5, + hash: "".into(), + })) + .clone(); + let size = *f.size(p).unwrap_or(&0); + ( + p.strip_prefix(pkg_root).unwrap_or(p).to_path_buf(), + size, + hash, + ) + }) .collect(); local.push(LocalFingerprint::RerunIfChanged { output, paths }); } @@ -1858,12 +1929,15 @@ pub fn parse_dep_info( ) -> CargoResult> { let data = match paths::read_bytes(dep_info) { Ok(data) => data, - Err(_) => return Ok(None), + Err(err) => { + println!("HASH Couldn't read bytes from dep info file: {}", err); + return Ok(None); + } }; let info = match EncodedDepInfo::parse(&data) { Some(info) => info, None => { - log::warn!("failed to parse cargo's dep-info at {:?}", dep_info); + println!("HASH failed to parse cargo's dep-info at {:?}", dep_info); return Ok(None); } }; @@ -2046,7 +2120,7 @@ fn parse_svh(data: &[u8]) -> Option { Some(String::from_utf8_lossy(&data[..svh_len]).to_string()) } -#[derive(Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Debug)] +#[derive(Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Debug, Serialize, Deserialize, Hash)] pub enum FileHashAlgorithm { /// Svh is embedded as a symbol or for rmeta is in the .rmeta filename inside a .rlib. Svh, @@ -2067,6 +2141,17 @@ impl FromStr for FileHashAlgorithm { } } +impl std::fmt::Display for FileHashAlgorithm { + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> { + match self { + Self::Md5 => fmt.write_fmt(format_args!("md5"))?, + Self::Svh => fmt.write_fmt(format_args!("svh"))?, + Self::Sha1 => fmt.write_fmt(format_args!("sha1"))?, + }; + Ok(()) + } +} + enum DepInfoPathType { // src/, e.g. src/lib.rs PackageRootRelative, @@ -2194,7 +2279,7 @@ pub struct RustcDepInfo { /// A file location with identifying properties: size and hash. #[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Debug)] pub struct Fileprint { - pub path: PathBuf, + pub path: PathBuf, //TODO is this field needed on here? pub size: FileSize, pub hash: FileHash, } diff --git a/src/cargo/core/compiler/output_depinfo.rs b/src/cargo/core/compiler/output_depinfo.rs index cf832b557c4..43e0a8304b6 100644 --- a/src/cargo/core/compiler/output_depinfo.rs +++ b/src/cargo/core/compiler/output_depinfo.rs @@ -29,9 +29,10 @@ use std::path::{Path, PathBuf}; use log::debug; use super::{fingerprint, Context, FileFlavor, Unit}; -use crate::core::compiler::fingerprint::{FileHash, Fileprint}; +use crate::core::compiler::fingerprint::{CurrentFileprint, Fileprint}; use crate::util::paths; use crate::util::{internal, CargoResult}; +use filetime::FileTime; fn render_filename>(path: P, basedir: Option<&str>) -> CargoResult { let path = path.as_ref(); @@ -89,14 +90,15 @@ fn add_deps_for_unit( .get(unit.pkg.package_id(), metadata) { for path in &output.rerun_if_changed { + let mut file_print = CurrentFileprint::new(FileTime::zero()); deps.insert(Fileprint { - path: path.into(), - size: 0, - hash: FileHash { - kind: fingerprint::FileHashAlgorithm::Md5, - hash: String::new(), - }, - }); //TODO + path: path.to_path_buf(), + size: *file_print.size(path).unwrap(), + hash: file_print + .hash(path, fingerprint::FileHashAlgorithm::Md5) + .unwrap() + .clone(), + }); } } } @@ -167,18 +169,29 @@ pub fn output_depinfo(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<()> } // Otherwise write it all out - debug!("HASH: detected change in dependencies file!!!"); + debug!( + "HASH: detected change in dependencies file - rewriting: {:?}", + output_path + ); let mut outfile = BufWriter::new(paths::create(output_path)?); write!(outfile, "{}:", target_fn)?; + for Fileprint { path: dep, .. } in &deps { + write!(outfile, " {}", dep.to_string_lossy())?; + } + writeln!(outfile)?; + + // Emit a fake target for each input file to the compilation. This + // prevents `make` from spitting out an error if a file is later + // deleted. For more info see #28735 for Fileprint { path: dep, - size: _, - hash: _, + size, + hash, } in &deps { - write!(outfile, " {}", dep.to_string_lossy())?; //TO DO - should we be writing out hash/filesize here? + writeln!(outfile, "{}:", dep.to_string_lossy())?; + writeln!(outfile, "# size:{} {}:{}", size, hash.kind, hash.hash)?; } - writeln!(outfile)?; // dep-info generation failed, so delete output file. This will // usually cause the build system to always rerun the build From 4e917b7dea75d92456ed6aa4da92a44ecb4bbb4a Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Sun, 8 Nov 2020 21:40:05 +0000 Subject: [PATCH 18/39] Tentitve output hashing --- src/cargo/core/compiler/fingerprint.rs | 269 +++++++++++++++---------- 1 file changed, 165 insertions(+), 104 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 13a4fc93946..16d42d5455d 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -569,7 +569,7 @@ pub struct Fingerprint { /// fingerprint is out of date if this is missing, or if previous /// fingerprints output files are regenerated and look newer than this one. #[serde(skip)] - outputs: Vec, + outputs: Vec<(PathBuf, Option, Option)>, } /// Indication of the status on the filesystem for a particular unit. @@ -721,6 +721,7 @@ impl CurrentFileprint { self.size.as_ref() } + //TODO these need to not take self pub(crate) fn hash(&mut self, path: &Path, algo: FileHashAlgorithm) -> Option<&FileHash> { if self.hash.is_none() { if let Ok(file) = fs::File::open(path) { @@ -1083,7 +1084,8 @@ impl Fingerprint { // afterwards based on the `mtime_on_use` flag. Afterwards we want the // minimum mtime as it's the one we'll be comparing to inputs and // dependencies. - for output in self.outputs.iter() { + for (output, file_size, hash) in self.outputs.iter() { + println!("HASH is this too many files? {:?}", &output); let mtime = match paths::mtime(output) { Ok(mtime) => mtime, @@ -1172,128 +1174,164 @@ impl Fingerprint { .to_path_buf(); println!("HASH dep info file {:?}", &dep_info); - let dep_info_file = if dep_info - .to_str() - .unwrap() - .contains("dep-run-build-script-build-script-build") + if dep_path.to_str().unwrap().ends_with("output") + && dep_info + .to_str() + .unwrap() + .contains("dep-run-build-script-build-script-build") { - // // let dir_hash = dep_path - // // .to_str() - // // .unwrap() - // // .split('-') - // // .last() - // // .unwrap_or_default(); - // let mut d = dep_path.parent().unwrap().to_path_buf(); - // d = d.join("dep-run-build-script-build-script-build"); - // // d.set_extension("d"); - // d - // //join(format!("build_script_build-{}.d", dir_hash)) - let mut ddep_info = PathBuf::new(); - let x = dep.fingerprint.local.lock().unwrap(); - for local_dep in (*x).iter() { - match local_dep { - LocalFingerprint::CheckDepInfo { dep_info } => { - ddep_info = dep_info.to_path_buf() + println!("HASH output file detected "); + let mut stale = true; + for (path, size, hash) in &dep.fingerprint.outputs { + if path == dep_in { + println!("HASH oh found it {:?} {:?}, {:?}", path, size, hash); + + let mut f = CurrentFileprint::new(FileTime::zero()); + if size.is_some() + && hash.is_some() + && f.size(dep_in) == size.as_ref() + && f.hash(dep_in, FileHashAlgorithm::Md5) == hash.as_ref() + { + println!("HASH oh hit {:?} {:?} {:?}", path, size, hash); + stale = false; + break; } - _ => {} } - // println!("{:#?}", local_dep.pa); } - target_root.join(&ddep_info).to_path_buf() + if stale { + return Ok(()); + } } else { - target_root.join(&dep_info) - }; - - //let dep_info_file = target_root.join(dep_info); - println!("dep info file: {:?}", &dep_info_file); - - let rustc_dep_info = dep_info_cache.get(&dep_info_file); - if rustc_dep_info.is_none() { - let dep_result = parse_dep_info(pkg_root, target_root, &dep_info_file); - match dep_result { - Ok(dep) => { - if let Some(dep) = dep { - println!("HASH dep info file parsed"); - dep_info_cache.insert(dep_info_file.clone(), dep); - } else { - println!("HASH dep info file could not be parsed"); + let dep_info_file = if dep_info + .to_str() + .unwrap() + .contains("dep-run-build-script-build-script-build") + { + // // let dir_hash = dep_path + // // .to_str() + // // .unwrap() + // // .split('-') + // // .last() + // // .unwrap_or_default(); + // let mut d = dep_path.parent().unwrap().to_path_buf(); + // d = d.join("dep-run-build-script-build-script-build"); + // // d.set_extension("d"); + // d + // //join(format!("build_script_build-{}.d", dir_hash)) + let mut ddep_info = PathBuf::new(); + let x = dep.fingerprint.local.lock().unwrap(); + for local_dep in (*x).iter() { + match local_dep { + LocalFingerprint::CheckDepInfo { dep_info } => { + ddep_info = dep_info.to_path_buf() + } + _ => {} } + // println!("{:#?}", local_dep.pa); } - Err(err) => println!("HASH error loading dep info file {}", err), - } - } else { - println!("HASH CACHE hit on dep info file"); - } - - let mut stale = None; - if let Some(rustc_dep_info) = dep_info_cache.get(&dep_info_file) { - for reference in &rustc_dep_info.files { - //println!("HASH dep info ref {:?}", &reference); - if *dep_in == reference.path { - let mut file_facts = mtime_cache.get_mut(dep_in); - if file_facts.is_none() { - mtime_cache.insert( - dep_in.clone(), - CurrentFileprint::new(*dep_mtime), - ); - file_facts = mtime_cache.get_mut(dep_in); + target_root.join(&ddep_info).to_path_buf() + } else { + target_root.join(&dep_info) + }; + + //let dep_info_file = target_root.join(dep_info); + println!("dep info file: {:?}", &dep_info_file); + + let rustc_dep_info = dep_info_cache.get(&dep_info_file); + if rustc_dep_info.is_none() { + let dep_result = + parse_dep_info(pkg_root, target_root, &dep_info_file); + match dep_result { + Ok(dep) => { + if let Some(dep) = dep { + println!("HASH dep info file parsed"); + dep_info_cache.insert(dep_info_file.clone(), dep); + } else { + println!("HASH dep info file could not be parsed"); + } } - let file_facts = file_facts.unwrap(); + Err(err) => { + println!("HASH error loading dep info file {}", err) + } + } + } else { + println!("HAS CACHE hit on dep info file"); + } - if let Some(current_size) = file_facts.size(dep_in) { - if *current_size != reference.size { + let mut stale = None; + if let Some(rustc_dep_info) = dep_info_cache.get(&dep_info_file) { + for reference in &rustc_dep_info.files { + //println!("HASH dep info ref {:?}", &reference); + if *dep_in == reference.path { + let mut file_facts = mtime_cache.get_mut(dep_in); + if file_facts.is_none() { + mtime_cache.insert( + dep_in.clone(), + CurrentFileprint::new(*dep_mtime), + ); + file_facts = mtime_cache.get_mut(dep_in); + } + let file_facts = file_facts.unwrap(); + + if let Some(current_size) = file_facts.size(dep_in) { + if *current_size != reference.size { + stale = Some(format!( + "File sizes don't match {:?} expected: {}", + current_size, reference.size + )); + break; + } + } else { stale = Some(format!( - "File sizes don't match {:?} expected: {}", - current_size, reference.size + "File sizes was not obtainable expected: {}", + reference.size )); break; } - } else { - stale = Some(format!( - "File sizes was not obtainable expected: {}", - reference.size - )); - break; - } - let current_hash = file_facts.hash(dep_in, reference.hash.kind); - - //println!("HASH got hash file!!!! {:?}", hash); - if let Some(file_facts_hash) = current_hash { - if reference.hash == *file_facts_hash { - println!("HASH hit - same hash! {:?}", file_facts.hash); + let current_hash = + file_facts.hash(dep_in, reference.hash.kind); + + //println!("HASH got hash file!!!! {:?}", hash); + if let Some(file_facts_hash) = current_hash { + if reference.hash == *file_facts_hash { + println!( + "HAS hit - same hash! {:?}", + file_facts.hash + ); + } else { + // println!("HASH s {:?}", file_facts.hash); + stale = Some(format!( + "Hash {:?} doesn't match expected: {:?}", + &file_facts_hash, &reference.hash + )); + break; + } } else { - // println!("HASH s {:?}", file_facts.hash); stale = Some(format!( - "Hash {:?} doesn't match expected: {:?}", - &file_facts_hash, &reference.hash + "No hash found in the dep info file to compare to {:?}", + &reference.hash )); break; } - } else { - stale = Some(format!( - "No hash found in the dep info file to compare to {:?}", - &reference.hash - )); - break; } } + } else { + stale = Some("HASH dep info file could not be found".into()); } - } else { - stale = Some("HASH dep info file could not be found".into()); - } - if stale.is_some() { - let x = dep.fingerprint.local.lock().unwrap(); - for local_dep in (*x).iter() { - println!("{:#?}", local_dep); + if stale.is_some() { + let x = dep.fingerprint.local.lock().unwrap(); + for local_dep in (*x).iter() { + println!("{:#?}", local_dep); + } + info!("HASH dep fingerprint {:#?}", &dep.fingerprint.path,); + info!( + "HASHMISS dependency on `{}` is newer than we are {} > {} {:?} {:?}", + dep.name, dep_mtime, max_mtime, pkg_root, dep_path + ); + info!("HASHMISS also {:?}", stale); + return Ok(()); } - info!("HASH dep fingerprint {:#?}", &dep.fingerprint.path,); - info!( - "HASHMISS dependency on `{}` is newer than we are {} > {} {:?} {:?}", - dep.name, dep_mtime, max_mtime, pkg_root, dep_path - ); - info!("HASHMISS also {:?}", stale); - return Ok(()); } } else { // debug!("HASH dep skipped as up to date"); @@ -1544,7 +1582,16 @@ fn calculate_normal(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult, unit: &Unit) -> CargoRes .collect::>>()? }; + let mut output_fileprint = CurrentFileprint::new(FileTime::zero()); + Ok(Fingerprint { local: Mutex::new(local), rustc: util::hash_u64(&cx.bcx.rustc().verbose_version), deps, - outputs: if overridden { Vec::new() } else { vec![output] }, + outputs: if overridden { + Vec::new() + } else { + let size = output_fileprint.size(&output).map(|c| *c); + let hash = output_fileprint + .hash(&output, FileHashAlgorithm::Md5) + .map(|c| c.clone()); + vec![(output, size, hash)] + }, // Most of the other info is blank here as we don't really include it // in the execution of the build script, but... this may be a latent @@ -1698,6 +1755,7 @@ fn build_script_local_fingerprints( // First up, if this build script is entirely overridden, then we just // return the hash of what we overrode it with. This is the easy case! if let Some(fingerprint) = build_script_override_fingerprint(cx, unit) { + debug!("HACK override local fingerprints deps {}", unit.pkg); debug!("override local fingerprints deps {}", unit.pkg); return ( Box::new( @@ -1733,6 +1791,8 @@ fn build_script_local_fingerprints( // (like for a path dependency). Those list of files would // be stored here rather than the the mtime of them. Some(f) => { + println!("HASH HASH OLD MODE DETECTED OLD MODE THIS IS SLOW?"); + let s = f()?; debug!( "old local fingerprints deps {:?} precalculated={:?}", @@ -1762,6 +1822,7 @@ fn build_script_override_fingerprint( ) -> Option { // Build script output is only populated at this stage when it is // overridden. + println!("HASH build script overriden!!!!!"); let build_script_outputs = cx.build_script_outputs.lock().unwrap(); let metadata = cx.get_run_build_script_metadata(unit); // Returns None if it is not overridden. @@ -2024,7 +2085,7 @@ fn find_stale_file( // but not sure it's worth the additional complexity. if *reference_hash == *current_hash { debug!( - "HASH: Hash hit: mtime mismatch but contents match for {:?}", + "HAS: Hash hit: mtime mismatch but contents match for {:?}", &path ); continue; From a9cd845d767ddb4294a040748f12b11bc0e0803e Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Mon, 9 Nov 2020 07:15:46 +0000 Subject: [PATCH 19/39] Less clones --- src/cargo/core/compiler/fingerprint.rs | 133 +++++++++++----------- src/cargo/core/compiler/output_depinfo.rs | 10 +- 2 files changed, 70 insertions(+), 73 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 16d42d5455d..3600b5fba68 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -716,57 +716,66 @@ impl CurrentFileprint { pub(crate) fn size(&mut self, file: &Path) -> Option<&FileSize> { if self.size.is_none() { - self.size = std::fs::metadata(file).map(|metadata| metadata.len()).ok(); + self.size = Self::calc_size(file); } self.size.as_ref() } - //TODO these need to not take self + pub(crate) fn calc_size(file: &Path) -> Option { + std::fs::metadata(file).map(|metadata| metadata.len()).ok() + } + pub(crate) fn hash(&mut self, path: &Path, algo: FileHashAlgorithm) -> Option<&FileHash> { if self.hash.is_none() { - if let Ok(file) = fs::File::open(path) { - let mut reader: io::BufReader = io::BufReader::new(file); - - let hash = match algo { - FileHashAlgorithm::Md5 => { - let mut hasher = Md5::new(); - let mut buffer = [0; 1024]; - loop { - let count = reader.read(&mut buffer).ok()?; - if count == 0 { - break; - } - hasher.input(&buffer[..count]); + self.hash = Self::calc_hash(path, algo); + } + self.hash.as_ref() + } + + // TODO: for direct calls to these can we cache them in the mtimes cache? + pub(crate) fn calc_hash(path: &Path, algo: FileHashAlgorithm) -> Option { + if let Ok(file) = fs::File::open(path) { + let mut reader: io::BufReader = io::BufReader::new(file); + + let hash = match algo { + FileHashAlgorithm::Md5 => { + let mut hasher = Md5::new(); + let mut buffer = [0; 1024]; + loop { + let count = reader.read(&mut buffer).ok()?; + if count == 0 { + break; } - Some(to_hex(&hasher.result())) + hasher.input(&buffer[..count]); } - FileHashAlgorithm::Sha1 => { - let mut hasher = Sha1::new(); - let mut buffer = [0; 1024]; - loop { - let count = reader.read(&mut buffer).ok()?; - if count == 0 { - break; - } - hasher.input(&buffer[..count]); + Some(to_hex(&hasher.result())) + } + FileHashAlgorithm::Sha1 => { + let mut hasher = Sha1::new(); + let mut buffer = [0; 1024]; + loop { + let count = reader.read(&mut buffer).ok()?; + if count == 0 { + break; } - Some(to_hex(&hasher.result())) + hasher.input(&buffer[..count]); } - FileHashAlgorithm::Svh => { - if path.extension() == Some(std::ffi::OsStr::new("rlib")) { - get_svh_from_ar(reader) - } else if path.extension() == Some(std::ffi::OsStr::new("rmeta")) { - get_svh_from_rmeta_file(reader) - } else { - get_svh_from_object_file(reader) - } + Some(to_hex(&hasher.result())) + } + FileHashAlgorithm::Svh => { + if path.extension() == Some(std::ffi::OsStr::new("rlib")) { + get_svh_from_ar(reader) + } else if path.extension() == Some(std::ffi::OsStr::new("rmeta")) { + get_svh_from_rmeta_file(reader) + } else { + get_svh_from_object_file(reader) } - }; + } + }; - self.hash = hash.map(|hash| FileHash { kind: algo, hash }) - } + return hash.map(|hash| FileHash { kind: algo, hash }); } - self.hash.as_ref() + None } } @@ -1084,7 +1093,7 @@ impl Fingerprint { // afterwards based on the `mtime_on_use` flag. Afterwards we want the // minimum mtime as it's the one we'll be comparing to inputs and // dependencies. - for (output, file_size, hash) in self.outputs.iter() { + for (output, _file_size, _hash) in self.outputs.iter() { println!("HASH is this too many files? {:?}", &output); let mtime = match paths::mtime(output) { Ok(mtime) => mtime, @@ -1186,11 +1195,13 @@ impl Fingerprint { if path == dep_in { println!("HASH oh found it {:?} {:?}, {:?}", path, size, hash); - let mut f = CurrentFileprint::new(FileTime::zero()); if size.is_some() && hash.is_some() - && f.size(dep_in) == size.as_ref() - && f.hash(dep_in, FileHashAlgorithm::Md5) == hash.as_ref() + && CurrentFileprint::calc_size(dep_in) == *size + && CurrentFileprint::calc_hash( + dep_in, + FileHashAlgorithm::Md5, + ) == *hash { println!("HASH oh hit {:?} {:?} {:?}", path, size, hash); stale = false; @@ -1207,17 +1218,6 @@ impl Fingerprint { .unwrap() .contains("dep-run-build-script-build-script-build") { - // // let dir_hash = dep_path - // // .to_str() - // // .unwrap() - // // .split('-') - // // .last() - // // .unwrap_or_default(); - // let mut d = dep_path.parent().unwrap().to_path_buf(); - // d = d.join("dep-run-build-script-build-script-build"); - // // d.set_extension("d"); - // d - // //join(format!("build_script_build-{}.d", dir_hash)) let mut ddep_info = PathBuf::new(); let x = dep.fingerprint.local.lock().unwrap(); for local_dep in (*x).iter() { @@ -1231,6 +1231,12 @@ impl Fingerprint { } target_root.join(&ddep_info).to_path_buf() } else { + //TODO: depinfo is sometimes package root relative apparently + //let path = match ty { + // DepInfoPathType::PackageRootRelative => pkg_root.join(fileprint.path), + // // N.B. path might be absolute here in which case the join will have no effect + // DepInfoPathType::TargetRootRelative => target_root.join(fileprint.path), + // }; target_root.join(&dep_info) }; @@ -1681,8 +1687,6 @@ fn calculate_run_custom_build(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoRes .collect::>>()? }; - let mut output_fileprint = CurrentFileprint::new(FileTime::zero()); - Ok(Fingerprint { local: Mutex::new(local), rustc: util::hash_u64(&cx.bcx.rustc().verbose_version), @@ -1690,10 +1694,8 @@ fn calculate_run_custom_build(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoRes outputs: if overridden { Vec::new() } else { - let size = output_fileprint.size(&output).map(|c| *c); - let hash = output_fileprint - .hash(&output, FileHashAlgorithm::Md5) - .map(|c| c.clone()); + let size = CurrentFileprint::calc_size(&output); + let hash = CurrentFileprint::calc_hash(&output, FileHashAlgorithm::Md5); vec![(output, size, hash)] }, @@ -1858,13 +1860,12 @@ fn local_fingerprints_deps( .rerun_if_changed .iter() .map(|p| { - let mut f = CurrentFileprint::new(FileTime::zero()); - let hash = (*f.hash(p, FileHashAlgorithm::Md5).unwrap_or(&FileHash { - kind: FileHashAlgorithm::Md5, - hash: "".into(), - })) - .clone(); - let size = *f.size(p).unwrap_or(&0); + let hash = + CurrentFileprint::calc_hash(p, FileHashAlgorithm::Md5).unwrap_or(FileHash { + kind: FileHashAlgorithm::Md5, + hash: "".into(), + }); + let size = CurrentFileprint::calc_size(p).unwrap_or(0); ( p.strip_prefix(pkg_root).unwrap_or(p).to_path_buf(), size, diff --git a/src/cargo/core/compiler/output_depinfo.rs b/src/cargo/core/compiler/output_depinfo.rs index 43e0a8304b6..b221811d4d5 100644 --- a/src/cargo/core/compiler/output_depinfo.rs +++ b/src/cargo/core/compiler/output_depinfo.rs @@ -32,7 +32,6 @@ use super::{fingerprint, Context, FileFlavor, Unit}; use crate::core::compiler::fingerprint::{CurrentFileprint, Fileprint}; use crate::util::paths; use crate::util::{internal, CargoResult}; -use filetime::FileTime; fn render_filename>(path: P, basedir: Option<&str>) -> CargoResult { let path = path.as_ref(); @@ -90,14 +89,11 @@ fn add_deps_for_unit( .get(unit.pkg.package_id(), metadata) { for path in &output.rerun_if_changed { - let mut file_print = CurrentFileprint::new(FileTime::zero()); deps.insert(Fileprint { path: path.to_path_buf(), - size: *file_print.size(path).unwrap(), - hash: file_print - .hash(path, fingerprint::FileHashAlgorithm::Md5) - .unwrap() - .clone(), + size: CurrentFileprint::calc_size(path).unwrap(), + hash: CurrentFileprint::calc_hash(path, fingerprint::FileHashAlgorithm::Md5) + .unwrap(), }); } } From 15a8f2445e648fe6333e9aef7a922e255ebd6a3c Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Mon, 9 Nov 2020 07:25:45 +0000 Subject: [PATCH 20/39] Simpler eq --- src/cargo/core/compiler/custom_build.rs | 1 - src/cargo/core/compiler/output_depinfo.rs | 8 +------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/cargo/core/compiler/custom_build.rs b/src/cargo/core/compiler/custom_build.rs index 16c9f28aa77..7e02008c8a1 100644 --- a/src/cargo/core/compiler/custom_build.rs +++ b/src/cargo/core/compiler/custom_build.rs @@ -344,7 +344,6 @@ fn build_work(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult { // And now finally, run the build command itself! state.running(&cmd); let timestamp = paths::set_invocation_time(&script_run_dir)?; - println!("HASH EXEC {:?}", &script_run_dir); let prefix = format!("[{} {}] ", id.name(), id.version()); let mut warnings_in_case_of_panic = Vec::new(); let output = cmd diff --git a/src/cargo/core/compiler/output_depinfo.rs b/src/cargo/core/compiler/output_depinfo.rs index b221811d4d5..7d5d159b210 100644 --- a/src/cargo/core/compiler/output_depinfo.rs +++ b/src/cargo/core/compiler/output_depinfo.rs @@ -153,13 +153,7 @@ pub fn output_depinfo(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<()> // If nothing changed don't recreate the file which could alter // its mtime if let Ok(previous) = fingerprint::parse_rustc_dep_info(&output_path) { - if previous - .files - .iter() - // .map(|(path, _size, _hash)| path) - .eq(deps.iter()) - //.map(|f| (Path::new(p), size, hash))) - { + if previous.files == deps { continue; } } From 46bb454c287359d4415ce9c9a865686c4d85c3af Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Mon, 9 Nov 2020 08:22:08 +0000 Subject: [PATCH 21/39] no need for to_string_lossy --- src/cargo/core/compiler/output_depinfo.rs | 34 ++++++++++------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/src/cargo/core/compiler/output_depinfo.rs b/src/cargo/core/compiler/output_depinfo.rs index 7d5d159b210..9b0b4a9b1ec 100644 --- a/src/cargo/core/compiler/output_depinfo.rs +++ b/src/cargo/core/compiler/output_depinfo.rs @@ -24,7 +24,7 @@ use std::collections::{BTreeSet, HashSet}; use std::io::{BufWriter, Write}; -use std::path::{Path, PathBuf}; +use std::path::Path; use log::debug; @@ -132,12 +132,7 @@ pub fn output_depinfo(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<()> }; let deps = deps .iter() - .map(|f| { - render_filename(&f.path, basedir).map(|path| Fileprint { - path: PathBuf::from(path), - ..(*f).clone() - }) - }) + .map(|f| render_filename(&f.path, basedir).map(|rendered| (rendered, f))) .collect::>>()?; for output in cx @@ -153,33 +148,32 @@ pub fn output_depinfo(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<()> // If nothing changed don't recreate the file which could alter // its mtime if let Ok(previous) = fingerprint::parse_rustc_dep_info(&output_path) { - if previous.files == deps { + if previous.files.iter().eq(deps.iter().map(|(_, dep)| *dep)) { continue; } } // Otherwise write it all out - debug!( - "HASH: detected change in dependencies file - rewriting: {:?}", - output_path - ); let mut outfile = BufWriter::new(paths::create(output_path)?); write!(outfile, "{}:", target_fn)?; - for Fileprint { path: dep, .. } in &deps { - write!(outfile, " {}", dep.to_string_lossy())?; + for (rendered_dep, _) in &deps { + write!(outfile, " {}", rendered_dep)?; } writeln!(outfile)?; // Emit a fake target for each input file to the compilation. This // prevents `make` from spitting out an error if a file is later // deleted. For more info see #28735 - for Fileprint { - path: dep, - size, - hash, - } in &deps + for ( + rendered_dep, + Fileprint { + path: _dep, + size, + hash, + }, + ) in &deps { - writeln!(outfile, "{}:", dep.to_string_lossy())?; + writeln!(outfile, "{}:", rendered_dep)?; writeln!(outfile, "# size:{} {}:{}", size, hash.kind, hash.hash)?; } From 73352cd5e95f3144f496532d3738bdddc2e60a96 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Mon, 9 Nov 2020 08:43:37 +0000 Subject: [PATCH 22/39] No point hashing a hash --- src/cargo/core/compiler/fingerprint.rs | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 3600b5fba68..a80efbadccd 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -350,13 +350,23 @@ use super::{BuildContext, Context, FileFlavor, Unit}; // While source files can't currently be > 4Gb, bin files could be. pub type FileSize = u64; -//TODO: implement hash yourself -#[derive(Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Serialize, Deserialize, Hash)] +#[derive(Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Serialize, Deserialize)] pub struct FileHash { pub kind: FileHashAlgorithm, pub hash: String, } +impl hash::Hash for FileHash { + fn hash(&self, hasher: &mut H) + where + H: hash::Hasher, + { + for ch in self.hash.as_bytes() { + hasher.write_u8(*ch); + } + } +} + /// Determines if a `unit` is up-to-date, and if not prepares necessary work to /// update the persisted fingerprint. /// @@ -725,7 +735,7 @@ impl CurrentFileprint { std::fs::metadata(file).map(|metadata| metadata.len()).ok() } - pub(crate) fn hash(&mut self, path: &Path, algo: FileHashAlgorithm) -> Option<&FileHash> { + pub(crate) fn file_hash(&mut self, path: &Path, algo: FileHashAlgorithm) -> Option<&FileHash> { if self.hash.is_none() { self.hash = Self::calc_hash(path, algo); } @@ -1296,7 +1306,7 @@ impl Fingerprint { } let current_hash = - file_facts.hash(dep_in, reference.hash.kind); + file_facts.file_hash(dep_in, reference.hash.kind); //println!("HASH got hash file!!!! {:?}", hash); if let Some(file_facts_hash) = current_hash { @@ -2081,7 +2091,7 @@ fn find_stale_file( if *current_size == *reference_size { // Same size but mtime is different. Probably there's no change... // compute hash and compare to prevent change cascade... - if let Some(current_hash) = current.hash(path, reference_hash.kind) { + if let Some(current_hash) = current.file_hash(path, reference_hash.kind) { // FIXME? We could fail a little faster by seeing if any size discrepencies on _any_ file before checking hashes. // but not sure it's worth the additional complexity. if *reference_hash == *current_hash { From b4000e93ef7117ed25d9a965b4cd2e9028461547 Mon Sep 17 00:00:00 2001 From: Squirrel Date: Mon, 9 Nov 2020 21:36:09 +0000 Subject: [PATCH 23/39] use to_le_bytes Co-authored-by: bjorn3 --- src/cargo/core/compiler/fingerprint.rs | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index a80efbadccd..816079b411f 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -2493,14 +2493,7 @@ impl EncodedDepInfo { } fn write_u64(dst: &mut Vec, val: u64) { - dst.push(val as u8); - dst.push((val >> 8) as u8); - dst.push((val >> 16) as u8); - dst.push((val >> 24) as u8); - dst.push((val >> 32) as u8); - dst.push((val >> 40) as u8); - dst.push((val >> 48) as u8); - dst.push((val >> 56) as u8); + dst.extend_from_slice(&u64::to_le_bytes(val)); } } } From b6425c19ddc28b732c3c51cd04f67ba0e0ebce79 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Mon, 9 Nov 2020 21:46:12 +0000 Subject: [PATCH 24/39] Just use derived hash impl --- src/cargo/core/compiler/fingerprint.rs | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 816079b411f..af787cf17b6 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -350,23 +350,12 @@ use super::{BuildContext, Context, FileFlavor, Unit}; // While source files can't currently be > 4Gb, bin files could be. pub type FileSize = u64; -#[derive(Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Serialize, Deserialize, Hash)] pub struct FileHash { pub kind: FileHashAlgorithm, pub hash: String, } -impl hash::Hash for FileHash { - fn hash(&self, hasher: &mut H) - where - H: hash::Hasher, - { - for ch in self.hash.as_bytes() { - hasher.write_u8(*ch); - } - } -} - /// Determines if a `unit` is up-to-date, and if not prepares necessary work to /// update the persisted fingerprint. /// From 05b9edf4bce4579fe08a84d6ac610e1be02fa3c1 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Mon, 9 Nov 2020 22:53:42 +0000 Subject: [PATCH 25/39] Everything using dep_info_cache now. --- src/cargo/core/compiler/output_depinfo.rs | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/cargo/core/compiler/output_depinfo.rs b/src/cargo/core/compiler/output_depinfo.rs index 9b0b4a9b1ec..8a47ac677ff 100644 --- a/src/cargo/core/compiler/output_depinfo.rs +++ b/src/cargo/core/compiler/output_depinfo.rs @@ -63,12 +63,21 @@ fn add_deps_for_unit( if !unit.mode.is_run_custom_build() { // Add dependencies from rustc dep-info output (stored in fingerprint directory) let dep_info_loc = fingerprint::dep_info_loc(cx, unit); - //TODO: can we use the dep info cache here? - if let Some(paths) = - fingerprint::parse_dep_info(unit.pkg.root(), cx.files().host_root(), &dep_info_loc)? - { - for path in paths.files { - deps.insert(path); + + let mut dep_info = cx.dep_info_cache.get(&dep_info_loc); + if dep_info.is_none() { + if let Some(parsed_dep_info) = + fingerprint::parse_dep_info(unit.pkg.root(), cx.files().host_root(), &dep_info_loc)? + { + cx.dep_info_cache + .insert(dep_info_loc.clone(), parsed_dep_info); + dep_info = cx.dep_info_cache.get(&dep_info_loc); + } + } + + if let Some(paths) = dep_info { + for path in &paths.files { + deps.insert(path.clone()); } } else { debug!( From 46c69530e88c6f7fbca48220fe44101a2b93d8c6 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Mon, 9 Nov 2020 23:08:56 +0000 Subject: [PATCH 26/39] People only care if there's a miss --- src/cargo/core/compiler/fingerprint.rs | 33 +++----------------------- 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index af787cf17b6..8a2f6dfd6cd 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -1093,7 +1093,6 @@ impl Fingerprint { // minimum mtime as it's the one we'll be comparing to inputs and // dependencies. for (output, _file_size, _hash) in self.outputs.iter() { - println!("HASH is this too many files? {:?}", &output); let mtime = match paths::mtime(output) { Ok(mtime) => mtime, @@ -1142,9 +1141,6 @@ impl Fingerprint { }) .expect("failed to find rmeta") } else { - // for (dep, _dep2) in dep_mtimes { - // debug!("HASH had to look at all the files {:?}", &dep); - // } match dep_mtimes.iter().max_by_key(|kv| kv.1) { Some(dep_mtime) => dep_mtime, // If our dependencies is up to date and has no filesystem @@ -1167,7 +1163,6 @@ impl Fingerprint { //todo: need to do raw .rmeta files if dep_mtime > max_mtime { - // } else { @todo here for (dep_in, dep_mtime) in dep_mtimes { if dep.only_requires_rmeta && dep_in.extension().and_then(|s| s.to_str()) != Some("rmeta") @@ -1188,12 +1183,9 @@ impl Fingerprint { .unwrap() .contains("dep-run-build-script-build-script-build") { - println!("HASH output file detected "); let mut stale = true; for (path, size, hash) in &dep.fingerprint.outputs { if path == dep_in { - println!("HASH oh found it {:?} {:?}, {:?}", path, size, hash); - if size.is_some() && hash.is_some() && CurrentFileprint::calc_size(dep_in) == *size @@ -1202,12 +1194,12 @@ impl Fingerprint { FileHashAlgorithm::Md5, ) == *hash { - println!("HASH oh hit {:?} {:?} {:?}", path, size, hash); stale = false; break; } } } + debug!("HASH miss {:?}", dep_in); if stale { return Ok(()); } @@ -1226,7 +1218,6 @@ impl Fingerprint { } _ => {} } - // println!("{:#?}", local_dep.pa); } target_root.join(&ddep_info).to_path_buf() } else { @@ -1239,13 +1230,13 @@ impl Fingerprint { target_root.join(&dep_info) }; - //let dep_info_file = target_root.join(dep_info); println!("dep info file: {:?}", &dep_info_file); let rustc_dep_info = dep_info_cache.get(&dep_info_file); if rustc_dep_info.is_none() { let dep_result = parse_dep_info(pkg_root, target_root, &dep_info_file); + match dep_result { Ok(dep) => { if let Some(dep) = dep { @@ -1259,8 +1250,6 @@ impl Fingerprint { println!("HASH error loading dep info file {}", err) } } - } else { - println!("HAS CACHE hit on dep info file"); } let mut stale = None; @@ -1297,15 +1286,8 @@ impl Fingerprint { let current_hash = file_facts.file_hash(dep_in, reference.hash.kind); - //println!("HASH got hash file!!!! {:?}", hash); if let Some(file_facts_hash) = current_hash { - if reference.hash == *file_facts_hash { - println!( - "HAS hit - same hash! {:?}", - file_facts.hash - ); - } else { - // println!("HASH s {:?}", file_facts.hash); + if reference.hash != *file_facts_hash { stale = Some(format!( "Hash {:?} doesn't match expected: {:?}", &file_facts_hash, &reference.hash @@ -1325,11 +1307,6 @@ impl Fingerprint { stale = Some("HASH dep info file could not be found".into()); } if stale.is_some() { - let x = dep.fingerprint.local.lock().unwrap(); - for local_dep in (*x).iter() { - println!("{:#?}", local_dep); - } - info!("HASH dep fingerprint {:#?}", &dep.fingerprint.path,); info!( "HASHMISS dependency on `{}` is newer than we are {} > {} {:?} {:?}", dep.name, dep_mtime, max_mtime, pkg_root, dep_path @@ -1338,12 +1315,8 @@ impl Fingerprint { return Ok(()); } } - } else { - // debug!("HASH dep skipped as up to date"); } - // debug!("HASH had to look at all the files {:?}", &dep); } - // what's our own output's dependency hash - what are we expecting it to be? } } From f289f82cb08f1ea4e057d948e5cbc70ade164eb4 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Tue, 10 Nov 2020 08:46:26 +0000 Subject: [PATCH 27/39] loop to find --- src/cargo/core/compiler/fingerprint.rs | 41 +++++++++++--------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index cd188359ca9..844d5947dd9 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -695,7 +695,6 @@ enum LocalFingerprint { } /// Cache of file properties that we know to be true. -/// @todo currentfileprint pub struct CurrentFileprint { pub(crate) mtime: FileTime, /// This will be None if not yet looked up. @@ -1153,6 +1152,7 @@ impl Fingerprint { pkg_root, dep_path, dep_mtime ); + let rmeta_ext = std::ffi::OsStr::new("rmeta"); // If the dependency is newer than our own output then it was // recompiled previously. We transitively become stale ourselves in // that case, so bail out. @@ -1160,13 +1160,9 @@ impl Fingerprint { // Note that this comparison should probably be `>=`, not `>`, but // for a discussion of why it's `>` see the discussion about #5918 // below in `find_stale`. - - //todo: need to do raw .rmeta files if dep_mtime > max_mtime { for (dep_in, dep_mtime) in dep_mtimes { - if dep.only_requires_rmeta - && dep_in.extension().and_then(|s| s.to_str()) != Some("rmeta") - { + if dep.only_requires_rmeta && dep_in.extension() != Some(&rmeta_ext) { continue; } @@ -1175,7 +1171,6 @@ impl Fingerprint { .strip_prefix(&target_root) .unwrap() .to_path_buf(); - println!("HASH dep info file {:?}", &dep_info); if dep_path.to_str().unwrap().ends_with("output") && dep_info @@ -1183,23 +1178,21 @@ impl Fingerprint { .unwrap() .contains("dep-run-build-script-build-script-build") { - let mut stale = true; - for (path, size, hash) in &dep.fingerprint.outputs { - if path == dep_in { - if size.is_some() - && hash.is_some() - && CurrentFileprint::calc_size(dep_in) == *size - && CurrentFileprint::calc_hash( - dep_in, - FileHashAlgorithm::Md5, - ) == *hash - { - stale = false; - break; - } - } - } - debug!("HASH miss {:?}", dep_in); + let stale = if let Some((_, Some(size), Some(hash))) = &dep + .fingerprint + .outputs + .iter() + .find(|(path, _, _)| path == dep_in) + { + CurrentFileprint::calc_size(dep_in) != Some(*size) + || CurrentFileprint::calc_hash(dep_in, FileHashAlgorithm::Md5) + .as_ref() + != Some(hash) + } else { + true + }; + + debug!("build.rs output file hash doesn't match {:?}", dep_in); if stale { return Ok(()); } From eb5c0616ea8c9b9240afa2424c436da5e67e9fce Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Tue, 10 Nov 2020 09:02:48 +0000 Subject: [PATCH 28/39] match to if let --- src/cargo/core/compiler/fingerprint.rs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 844d5947dd9..e1835bc3303 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -312,7 +312,7 @@ //! See the `A-rebuild-detection` flag on the issue tracker for more: //! -use std::collections::hash_map::{Entry, HashMap}; +use std::collections::hash_map::HashMap; use std::convert::TryInto; use std::env; use std::fs; @@ -1192,7 +1192,7 @@ impl Fingerprint { true }; - debug!("build.rs output file hash doesn't match {:?}", dep_in); + debug!("build.rs output doesn't match previous hash {:?}", dep_in); if stale { return Ok(()); } @@ -1203,13 +1203,9 @@ impl Fingerprint { .contains("dep-run-build-script-build-script-build") { let mut ddep_info = PathBuf::new(); - let x = dep.fingerprint.local.lock().unwrap(); - for local_dep in (*x).iter() { - match local_dep { - LocalFingerprint::CheckDepInfo { dep_info } => { - ddep_info = dep_info.to_path_buf() - } - _ => {} + for local_dep in (*dep.fingerprint.local.lock().unwrap()).iter() { + if let LocalFingerprint::CheckDepInfo { dep_info } = local_dep { + ddep_info = dep_info.to_path_buf(); } } target_root.join(&ddep_info).to_path_buf() From 5d63d442985f43799df134372be01cab67089687 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Tue, 10 Nov 2020 21:14:36 +0000 Subject: [PATCH 29/39] Less prints --- src/cargo/core/compiler/fingerprint.rs | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index e1835bc3303..43b97e85629 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -325,7 +325,7 @@ use std::time::SystemTime; use anyhow::{bail, format_err}; use filetime::FileTime; -use log::{debug, info}; +use log::{debug, info, warn}; use md5::{Digest, Md5}; use object::Object; use serde::de; @@ -1219,7 +1219,7 @@ impl Fingerprint { target_root.join(&dep_info) }; - println!("dep info file: {:?}", &dep_info_file); + debug!("reading dep info file: {:?}", &dep_info_file); let rustc_dep_info = dep_info_cache.get(&dep_info_file); if rustc_dep_info.is_none() { @@ -1229,23 +1229,22 @@ impl Fingerprint { match dep_result { Ok(dep) => { if let Some(dep) = dep { - println!("HASH dep info file parsed"); dep_info_cache.insert(dep_info_file.clone(), dep); } else { - println!("HASH dep info file could not be parsed"); - } + warn!("Dep info file could not be parsed"); } - Err(err) => { - println!("HASH error loading dep info file {}", err) } + Err(err) => warn!("Error parsing dep info file {}", err), } } let mut stale = None; if let Some(rustc_dep_info) = dep_info_cache.get(&dep_info_file) { - for reference in &rustc_dep_info.files { - //println!("HASH dep info ref {:?}", &reference); - if *dep_in == reference.path { + let ref_file = &rustc_dep_info + .files + .iter() + .find(|reference| *dep_in == reference.path); + if let Some(reference) = ref_file { let mut file_facts = mtime_cache.get_mut(dep_in); if file_facts.is_none() { mtime_cache.insert( @@ -1262,16 +1261,15 @@ impl Fingerprint { "File sizes don't match {:?} expected: {}", current_size, reference.size )); - break; } } else { stale = Some(format!( "File sizes was not obtainable expected: {}", reference.size )); - break; } + if stale.is_none() { let current_hash = file_facts.file_hash(dep_in, reference.hash.kind); @@ -1281,14 +1279,12 @@ impl Fingerprint { "Hash {:?} doesn't match expected: {:?}", &file_facts_hash, &reference.hash )); - break; } } else { stale = Some(format!( "No hash found in the dep info file to compare to {:?}", &reference.hash )); - break; } } } @@ -1318,7 +1314,6 @@ impl Fingerprint { local.find_stale_item(config, mtime_cache, dep_info_cache, pkg_root, target_root)? { item.log(); - println!("HASHMISS we are failing here"); return Ok(()); } } From 03dc307e29165de35b6d9faa272ee498dd079d48 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Tue, 10 Nov 2020 21:39:41 +0000 Subject: [PATCH 30/39] Better log messages --- src/cargo/core/compiler/fingerprint.rs | 132 ++++++++++++++++--------- 1 file changed, 85 insertions(+), 47 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 43b97e85629..9a164a30748 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -779,12 +779,24 @@ impl CurrentFileprint { enum StaleItem { MissingFile(PathBuf), - ChangedFile { + ChangedFileTime { reference: PathBuf, reference_mtime: FileTime, stale: PathBuf, stale_mtime: FileTime, }, + ChangedFileSize { + reference: PathBuf, + reference_size: FileSize, + stale: PathBuf, + stale_size: Option, + }, + ChangedFileHash { + reference: PathBuf, + reference_hash: FileHash, + stale: PathBuf, + stale_hash: Option, + }, ChangedEnv { var: String, previous: Option, @@ -1232,7 +1244,7 @@ impl Fingerprint { dep_info_cache.insert(dep_info_file.clone(), dep); } else { warn!("Dep info file could not be parsed"); - } + } } Err(err) => warn!("Error parsing dep info file {}", err), } @@ -1245,29 +1257,29 @@ impl Fingerprint { .iter() .find(|reference| *dep_in == reference.path); if let Some(reference) = ref_file { - let mut file_facts = mtime_cache.get_mut(dep_in); - if file_facts.is_none() { - mtime_cache.insert( - dep_in.clone(), - CurrentFileprint::new(*dep_mtime), - ); - file_facts = mtime_cache.get_mut(dep_in); - } - let file_facts = file_facts.unwrap(); + let mut file_facts = mtime_cache.get_mut(dep_in); + if file_facts.is_none() { + mtime_cache.insert( + dep_in.clone(), + CurrentFileprint::new(*dep_mtime), + ); + file_facts = mtime_cache.get_mut(dep_in); + } + let file_facts = file_facts.unwrap(); - if let Some(current_size) = file_facts.size(dep_in) { - if *current_size != reference.size { - stale = Some(format!( - "File sizes don't match {:?} expected: {}", - current_size, reference.size - )); - } - } else { + if let Some(current_size) = file_facts.size(dep_in) { + if *current_size != reference.size { stale = Some(format!( - "File sizes was not obtainable expected: {}", - reference.size + "File sizes don't match {:?} expected: {}", + current_size, reference.size )); } + } else { + stale = Some(format!( + "File sizes was not obtainable expected: {}", + reference.size + )); + } if stale.is_none() { let current_hash = @@ -1282,9 +1294,9 @@ impl Fingerprint { } } else { stale = Some(format!( - "No hash found in the dep info file to compare to {:?}", - &reference.hash - )); + "No hash found in the dep info file to compare to {:?}", + &reference.hash + )); } } } @@ -1436,16 +1448,36 @@ impl StaleItem { StaleItem::MissingFile(path) => { info!("stale: missing {:?}", path); } - StaleItem::ChangedFile { + StaleItem::ChangedFileTime { reference, reference_mtime, stale, stale_mtime, } => { - info!("stale: changed {:?}", stale); + info!("stale: time changed {:?}", stale); info!(" (vs) {:?}", reference); info!(" {:?} != {:?}", reference_mtime, stale_mtime); } + StaleItem::ChangedFileSize { + reference, + reference_size, + stale, + stale_size, + } => { + info!("stale: size changed {:?}", stale); + info!(" (vs) {:?}", reference); + info!(" {:?} != {:?}", reference_size, stale_size); + } + StaleItem::ChangedFileHash { + reference, + reference_hash, + stale, + stale_hash, + } => { + info!("stale: hash changed {:?}", stale); + info!(" (vs) {:?}", reference); + info!(" {:?} != {:?}", reference_hash, stale_hash); + } StaleItem::ChangedEnv { var, previous, @@ -2033,30 +2065,36 @@ fn find_stale_file( } if config.cli_unstable().hash_tracking { - if let Some(current_size) = current.size(path) { - if *current_size == *reference_size { - // Same size but mtime is different. Probably there's no change... - // compute hash and compare to prevent change cascade... - if let Some(current_hash) = current.file_hash(path, reference_hash.kind) { - // FIXME? We could fail a little faster by seeing if any size discrepencies on _any_ file before checking hashes. - // but not sure it's worth the additional complexity. - if *reference_hash == *current_hash { - debug!( - "HAS: Hash hit: mtime mismatch but contents match for {:?}", - &path - ); - continue; - } - debug!( - "HASH: Hash miss for {:?}: {} (ref) != {}", - &path, reference_hash.hash, current_hash.hash - ); - } - } + let current_size = current.size(path); + if current_size != Some(reference_size) { + //if *current_size != *reference_size { + return Some(StaleItem::ChangedFileSize { + reference: reference.to_path_buf(), + reference_size: *reference_size, + stale: path.to_path_buf(), + stale_size: current_size.map(|s| *s), + }); } + + // Same size but mtime is different. Probably there's no change... + // compute hash and compare to prevent change cascade... + let current_hash = current.file_hash(path, reference_hash.kind); + if current_hash != Some(reference_hash) { + // FIXME? We could fail a little faster by seeing if any size discrepencies on _any_ file before checking hashes. + // but not sure it's worth the additional complexity. + return Some(StaleItem::ChangedFileHash { + reference: reference.to_path_buf(), + reference_hash: reference_hash.clone(), + stale: path.to_path_buf(), + stale_hash: current_hash.map(|h| h.clone()), + }); + } + + // File has expected content + continue; }; - return Some(StaleItem::ChangedFile { + return Some(StaleItem::ChangedFileTime { reference: reference.to_path_buf(), reference_mtime, stale: path.to_path_buf(), From e73de2aa46cdcf38a28b00d90f8d481d9b47d510 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Tue, 10 Nov 2020 22:01:25 +0000 Subject: [PATCH 31/39] less print stmts --- src/cargo/core/compiler/fingerprint.rs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 9a164a30748..25f2c429da3 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -1781,8 +1781,6 @@ fn build_script_local_fingerprints( // (like for a path dependency). Those list of files would // be stored here rather than the the mtime of them. Some(f) => { - println!("HASH HASH OLD MODE DETECTED OLD MODE THIS IS SLOW?"); - let s = f()?; debug!( "old local fingerprints deps {:?} precalculated={:?}", @@ -1812,7 +1810,6 @@ fn build_script_override_fingerprint( ) -> Option { // Build script output is only populated at this stage when it is // overridden. - println!("HASH build script overriden!!!!!"); let build_script_outputs = cx.build_script_outputs.lock().unwrap(); let metadata = cx.get_run_build_script_metadata(unit); // Returns None if it is not overridden. @@ -1980,14 +1977,14 @@ pub fn parse_dep_info( let data = match paths::read_bytes(dep_info) { Ok(data) => data, Err(err) => { - println!("HASH Couldn't read bytes from dep info file: {}", err); + warn!("could not read bytes from dep info file: {}", err); return Ok(None); } }; let info = match EncodedDepInfo::parse(&data) { Some(info) => info, None => { - println!("HASH failed to parse cargo's dep-info at {:?}", dep_info); + warn!("failed to parse dep-info file at {:?}", dep_info); return Ok(None); } }; @@ -2164,14 +2161,13 @@ fn get_svh_from_rmeta_file(mut reader: R) -> Option { fn parse_svh(data: &[u8]) -> Option { let rust_version_len_pos = 12; - let data = &mut &data[rust_version_len_pos..]; + let data = &data[rust_version_len_pos..]; let rust_version_len = data[0] as usize; - let data = &mut &data[1..]; - //println!("rust version='{}'", String::from_utf8_lossy(&data[..rust_version_len])); + let data = &data[1..]; let data = &data[rust_version_len..]; let svh_len = data[0] as usize; - let data = &mut &data[1..]; + let data = &data[1..]; Some(String::from_utf8_lossy(&data[..svh_len]).to_string()) } From 4b63f8aa6ce7d20d94c8a43dadaa3f8362613ec4 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Thu, 12 Nov 2020 14:53:21 +0000 Subject: [PATCH 32/39] size and hash optional --- src/cargo/core/compiler/fingerprint.rs | 175 +++++++++++----------- src/cargo/core/compiler/output_depinfo.rs | 9 +- 2 files changed, 96 insertions(+), 88 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 25f2c429da3..fe36f7f9f32 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -684,7 +684,7 @@ enum LocalFingerprint { /// `output`, otherwise we need to recompile. RerunIfChanged { output: PathBuf, - paths: Vec<(PathBuf, FileSize, FileHash)>, + paths: Vec, }, /// This represents a single `rerun-if-env-changed` annotation printed by a @@ -862,10 +862,10 @@ impl LocalFingerprint { LocalFingerprint::RerunIfChanged { output, paths } => { let c: Vec<_> = paths .iter() - .map(|(p, size, hash)| Fileprint { - path: pkg_root.join(p), - size: *size, - hash: hash.clone(), + .map(|f| { + let mut f = f.clone(); + f.path = pkg_root.join(f.path); + f }) .collect(); Ok(find_stale_file( @@ -1268,35 +1268,39 @@ impl Fingerprint { let file_facts = file_facts.unwrap(); if let Some(current_size) = file_facts.size(dep_in) { - if *current_size != reference.size { + if Some(*current_size) != reference.size { stale = Some(format!( - "File sizes don't match {:?} expected: {}", + "File sizes don't match {:?} expected: {:?}", current_size, reference.size )); } } else { stale = Some(format!( - "File sizes was not obtainable expected: {}", + "File sizes was not obtainable expected: {:?}", reference.size )); } if stale.is_none() { - let current_hash = - file_facts.file_hash(dep_in, reference.hash.kind); - - if let Some(file_facts_hash) = current_hash { - if reference.hash != *file_facts_hash { + if let Some(reference_hash) = &reference.hash { + let current_hash = + file_facts.file_hash(dep_in, reference_hash.kind); + + if let Some(file_facts_hash) = current_hash { + if reference_hash != file_facts_hash { + stale = Some(format!( + "Hash {:?} doesn't match expected: {:?}", + &file_facts_hash, &reference_hash + )); + } + } else { stale = Some(format!( - "Hash {:?} doesn't match expected: {:?}", - &file_facts_hash, &reference.hash + "No hash found in the dep info file to compare to {:?}", + &reference.hash )); } } else { - stale = Some(format!( - "No hash found in the dep info file to compare to {:?}", - &reference.hash - )); + stale = Some("No reference hash to compare to".into()); } } } @@ -1844,18 +1848,10 @@ fn local_fingerprints_deps( let paths = deps .rerun_if_changed .iter() - .map(|p| { - let hash = - CurrentFileprint::calc_hash(p, FileHashAlgorithm::Md5).unwrap_or(FileHash { - kind: FileHashAlgorithm::Md5, - hash: "".into(), - }); - let size = CurrentFileprint::calc_size(p).unwrap_or(0); - ( - p.strip_prefix(pkg_root).unwrap_or(p).to_path_buf(), - size, - hash, - ) + .map(|p| Fileprint { + path: p.strip_prefix(pkg_root).unwrap_or(p).to_path_buf(), + size: CurrentFileprint::calc_size(p), + hash: CurrentFileprint::calc_hash(p, FileHashAlgorithm::Md5), }) .collect(); local.push(LocalFingerprint::RerunIfChanged { output, paths }); @@ -2062,33 +2058,35 @@ fn find_stale_file( } if config.cli_unstable().hash_tracking { - let current_size = current.size(path); - if current_size != Some(reference_size) { - //if *current_size != *reference_size { - return Some(StaleItem::ChangedFileSize { - reference: reference.to_path_buf(), - reference_size: *reference_size, - stale: path.to_path_buf(), - stale_size: current_size.map(|s| *s), - }); - } + // File has expected content + if let (Some(reference_size), Some(reference_hash)) = (reference_size, reference_hash) { + let current_size = current.size(path); + if current_size != Some(reference_size) { + //if *current_size != *reference_size { + return Some(StaleItem::ChangedFileSize { + reference: reference.to_path_buf(), + reference_size: *reference_size, + stale: path.to_path_buf(), + stale_size: current_size.map(|s| *s), + }); + } - // Same size but mtime is different. Probably there's no change... - // compute hash and compare to prevent change cascade... - let current_hash = current.file_hash(path, reference_hash.kind); - if current_hash != Some(reference_hash) { - // FIXME? We could fail a little faster by seeing if any size discrepencies on _any_ file before checking hashes. - // but not sure it's worth the additional complexity. - return Some(StaleItem::ChangedFileHash { - reference: reference.to_path_buf(), - reference_hash: reference_hash.clone(), - stale: path.to_path_buf(), - stale_hash: current_hash.map(|h| h.clone()), - }); - } + // Same size but mtime is different. Probably there's no change... + // compute hash and compare to prevent change cascade... + let current_hash = current.file_hash(path, reference_hash.kind); + if current_hash != Some(reference_hash) { + // FIXME? We could fail a little faster by seeing if any size discrepencies on _any_ file before checking hashes. + // but not sure it's worth the additional complexity. + return Some(StaleItem::ChangedFileHash { + reference: reference.to_path_buf(), + reference_hash: reference_hash.clone(), + stale: path.to_path_buf(), + stale_hash: current_hash.map(|h| h.clone()), + }); + } - // File has expected content - continue; + continue; + } }; return Some(StaleItem::ChangedFileTime { @@ -2329,11 +2327,11 @@ pub struct RustcDepInfo { } /// A file location with identifying properties: size and hash. -#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Debug)] +#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Debug, Hash, Serialize, Deserialize)] pub struct Fileprint { pub path: PathBuf, //TODO is this field needed on here? - pub size: FileSize, - pub hash: FileHash, + pub size: Option, + pub hash: Option, } // Same as `RustcDepInfo` except avoids absolute paths as much as possible to @@ -2356,20 +2354,27 @@ impl EncodedDepInfo { //FIXME: backward compatibility!!! let eight_bytes: &[u8; 8] = (bytes[0..8]).try_into().ok()?; let size = u64::from_le_bytes(*eight_bytes) as FileSize; + let size = if size == 0 { None } else { Some(size) }; *bytes = &bytes[8..]; - //debug!("read size as {}", size); - let hash_buf = read_bytes(bytes)?; - - let hash = String::from_utf8(hash_buf.to_vec()).unwrap(); - //debug!("read hash as {}", hash); let kind = match read_u8(bytes)? { - 0 => FileHashAlgorithm::Md5, - 1 => FileHashAlgorithm::Sha1, - 2 => FileHashAlgorithm::Svh, + 0 => None, + 1 => Some(FileHashAlgorithm::Md5), + 2 => Some(FileHashAlgorithm::Sha1), + 3 => Some(FileHashAlgorithm::Svh), _ => return None, }; + + //debug!("read size as {}", size); + let hash = if let Some(kind) = kind { + let hash_buf = read_bytes(bytes)?; + let hash = String::from_utf8(hash_buf.to_vec()).unwrap(); + Some(FileHash { kind, hash }) + } else { + None + }; + let ty = match read_u8(bytes)? { 0 => DepInfoPathType::PackageRootRelative, 1 => DepInfoPathType::TargetRootRelative, @@ -2380,7 +2385,7 @@ impl EncodedDepInfo { Fileprint { path: util::bytes2path(bytes).ok()?, size, - hash: FileHash { kind, hash }, + hash, }, ty, )); @@ -2425,19 +2430,24 @@ impl EncodedDepInfo { write_usize(dst, self.files.len()); for (Fileprint { path, size, hash }, ty) in self.files.iter() { //debug!("writing depinfo size as {} ", *size as usize); - write_u64(dst, *size); - //debug!("writing depinfo hash as {} ", hash.hash.len()); - write_bytes(dst, hash.hash.as_bytes()); + write_u64(dst, size.unwrap_or_default()); //write(dst, hash.hash); - match hash.kind { - FileHashAlgorithm::Md5 => dst.push(0), - FileHashAlgorithm::Sha1 => dst.push(1), - FileHashAlgorithm::Svh => dst.push(2), + if let Some(hash) = hash { + match hash.kind { + FileHashAlgorithm::Md5 => dst.push(1), + FileHashAlgorithm::Sha1 => dst.push(2), + FileHashAlgorithm::Svh => dst.push(3), + } + //debug!("writing depinfo hash as {} ", hash.hash.len()); + write_bytes(dst, hash.hash.as_bytes()); + } else { + dst.push(0); //None } match ty { DepInfoPathType::PackageRootRelative => dst.push(0), DepInfoPathType::TargetRootRelative => dst.push(1), } + write_bytes(dst, util::path2bytes(path)?); } @@ -2498,15 +2508,15 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult let file = &prev[0..prev.len() - 1]; for i in 0..ret.files.len() { if ret.files[i].path.to_string_lossy() == file { - let size_and_hash: Vec<_> = line["# size:".len()..].split(' ').collect(); - ret.files[i].size = size_and_hash[0].parse()?; + let size_and_hash: Vec<_> = line["# size:".len()..].split(' ').collect(); //TODO: find/rfind + ret.files[i].size = size_and_hash[0].parse().ok(); let kind_hash: Vec<_> = size_and_hash[1].split(":").collect(); let hash = kind_hash[1]; - ret.files[i].hash = FileHash { + ret.files[i].hash = Some(FileHash { kind: FileHashAlgorithm::from_str(kind_hash[0]) .expect("unknown hashing algo"), hash: hash.to_string(), - }; + }); break; } } @@ -2530,11 +2540,8 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult } ret.files.push(Fileprint { path: file.into(), - size: 0, - hash: FileHash { - kind: FileHashAlgorithm::Md5, - hash: String::new(), //TO DO - }, + size: None, + hash: None, }); } } else { diff --git a/src/cargo/core/compiler/output_depinfo.rs b/src/cargo/core/compiler/output_depinfo.rs index 8a47ac677ff..d87fce255ab 100644 --- a/src/cargo/core/compiler/output_depinfo.rs +++ b/src/cargo/core/compiler/output_depinfo.rs @@ -100,9 +100,8 @@ fn add_deps_for_unit( for path in &output.rerun_if_changed { deps.insert(Fileprint { path: path.to_path_buf(), - size: CurrentFileprint::calc_size(path).unwrap(), - hash: CurrentFileprint::calc_hash(path, fingerprint::FileHashAlgorithm::Md5) - .unwrap(), + size: CurrentFileprint::calc_size(path), + hash: CurrentFileprint::calc_hash(path, fingerprint::FileHashAlgorithm::Md5), }); } } @@ -183,7 +182,9 @@ pub fn output_depinfo(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<()> ) in &deps { writeln!(outfile, "{}:", rendered_dep)?; - writeln!(outfile, "# size:{} {}:{}", size, hash.kind, hash.hash)?; + if let (Some(size), Some(hash)) = (size, hash) { + writeln!(outfile, "# size:{} {}:{}", size, hash.kind, hash.hash)?; + } } // dep-info generation failed, so delete output file. This will From 1411aa5e24ed4d9830233943aaaa25633b663399 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Thu, 12 Nov 2020 15:38:24 +0000 Subject: [PATCH 33/39] size and hash optional --- src/cargo/core/compiler/fingerprint.rs | 46 +++++++++++++---------- src/cargo/core/compiler/output_depinfo.rs | 8 +--- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index fe36f7f9f32..dc115c1228d 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -568,7 +568,7 @@ pub struct Fingerprint { /// fingerprint is out of date if this is missing, or if previous /// fingerprints output files are regenerated and look newer than this one. #[serde(skip)] - outputs: Vec<(PathBuf, Option, Option)>, + outputs: Vec, } /// Indication of the status on the filesystem for a particular unit. @@ -730,7 +730,6 @@ impl CurrentFileprint { self.hash.as_ref() } - // TODO: for direct calls to these can we cache them in the mtimes cache? pub(crate) fn calc_hash(path: &Path, algo: FileHashAlgorithm) -> Option { if let Ok(file) = fs::File::open(path) { let mut reader: io::BufReader = io::BufReader::new(file); @@ -1103,18 +1102,18 @@ impl Fingerprint { // afterwards based on the `mtime_on_use` flag. Afterwards we want the // minimum mtime as it's the one we'll be comparing to inputs and // dependencies. - for (output, _file_size, _hash) in self.outputs.iter() { - let mtime = match paths::mtime(output) { + for Fileprint { path, .. } in self.outputs.iter() { + let mtime = match paths::mtime(path) { Ok(mtime) => mtime, // This path failed to report its `mtime`. It probably doesn't // exists, so leave ourselves as stale and bail out. Err(e) => { - debug!("failed to get mtime of {:?}: {}", output, e); + debug!("failed to get mtime of {:?}: {}", path, e); return Ok(()); } }; - assert!(mtimes.insert(output.clone(), mtime).is_none()); + assert!(mtimes.insert(path.clone(), mtime).is_none()); } let opt_max = mtimes.iter().max_by_key(|kv| kv.1); @@ -1190,11 +1189,15 @@ impl Fingerprint { .unwrap() .contains("dep-run-build-script-build-script-build") { - let stale = if let Some((_, Some(size), Some(hash))) = &dep + let stale = if let Some(Fileprint { + size: Some(size), + hash: Some(hash), + .. + }) = &dep .fingerprint .outputs .iter() - .find(|(path, _, _)| path == dep_in) + .find(|Fileprint { path, .. }| path == dep_in) { CurrentFileprint::calc_size(dep_in) != Some(*size) || CurrentFileprint::calc_hash(dep_in, FileHashAlgorithm::Md5) @@ -1581,14 +1584,13 @@ fn calculate_normal(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult, unit: &Unit) -> CargoRes outputs: if overridden { Vec::new() } else { - let size = CurrentFileprint::calc_size(&output); - let hash = CurrentFileprint::calc_hash(&output, FileHashAlgorithm::Md5); - vec![(output, size, hash)] + vec![Fileprint::from_md5(output)] }, // Most of the other info is blank here as we don't really include it @@ -2334,6 +2334,14 @@ pub struct Fileprint { pub hash: Option, } +impl Fileprint { + pub(crate) fn from_md5(path: PathBuf) -> Self { + let size = CurrentFileprint::calc_size(&path); + let hash = CurrentFileprint::calc_hash(&path, FileHashAlgorithm::Md5); + Self { path, size, hash } + } +} + // Same as `RustcDepInfo` except avoids absolute paths as much as possible to // allow moving around the target directory. // diff --git a/src/cargo/core/compiler/output_depinfo.rs b/src/cargo/core/compiler/output_depinfo.rs index d87fce255ab..7da694bfabe 100644 --- a/src/cargo/core/compiler/output_depinfo.rs +++ b/src/cargo/core/compiler/output_depinfo.rs @@ -29,7 +29,7 @@ use std::path::Path; use log::debug; use super::{fingerprint, Context, FileFlavor, Unit}; -use crate::core::compiler::fingerprint::{CurrentFileprint, Fileprint}; +use crate::core::compiler::fingerprint::Fileprint; use crate::util::paths; use crate::util::{internal, CargoResult}; @@ -98,11 +98,7 @@ fn add_deps_for_unit( .get(unit.pkg.package_id(), metadata) { for path in &output.rerun_if_changed { - deps.insert(Fileprint { - path: path.to_path_buf(), - size: CurrentFileprint::calc_size(path), - hash: CurrentFileprint::calc_hash(path, fingerprint::FileHashAlgorithm::Md5), - }); + deps.insert(Fileprint::from_md5(path.to_path_buf())); } } } From 87600cca75a7ace0b4a211fab782a30d179575a8 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Thu, 12 Nov 2020 15:55:23 +0000 Subject: [PATCH 34/39] Only activate when switched on --- src/cargo/core/compiler/fingerprint.rs | 256 +++++++++++++------------ 1 file changed, 136 insertions(+), 120 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index dc115c1228d..8e8658481e4 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -1096,6 +1096,7 @@ impl Fingerprint { dep_info_loc: PathBuf, ) -> CargoResult<()> { assert!(!self.fs_status.up_to_date()); + let mut mtimes = HashMap::new(); // Get the `mtime` of all outputs. Optionally update their mtime @@ -1133,6 +1134,7 @@ impl Fingerprint { pkg_root, max_path, max_mtime ); + let rmeta_ext = std::ffi::OsStr::new("rmeta"); for dep in self.deps.iter() { let dep_mtimes = match &dep.fingerprint.fs_status { FsStatus::UpToDate { mtimes } => mtimes, @@ -1163,7 +1165,6 @@ impl Fingerprint { pkg_root, dep_path, dep_mtime ); - let rmeta_ext = std::ffi::OsStr::new("rmeta"); // If the dependency is newer than our own output then it was // recompiled previously. We transitively become stale ourselves in // that case, so bail out. @@ -1172,154 +1173,169 @@ impl Fingerprint { // for a discussion of why it's `>` see the discussion about #5918 // below in `find_stale`. if dep_mtime > max_mtime { - for (dep_in, dep_mtime) in dep_mtimes { - if dep.only_requires_rmeta && dep_in.extension() != Some(&rmeta_ext) { - continue; - } - - if dep_mtime > max_mtime { - let dep_info = dep_info_loc - .strip_prefix(&target_root) - .unwrap() - .to_path_buf(); + if config.cli_unstable().hash_tracking { + for (dep_in, dep_mtime) in dep_mtimes { + if dep.only_requires_rmeta && dep_in.extension() != Some(&rmeta_ext) { + continue; + } - if dep_path.to_str().unwrap().ends_with("output") - && dep_info - .to_str() + if dep_mtime > max_mtime { + let dep_info = dep_info_loc + .strip_prefix(&target_root) .unwrap() - .contains("dep-run-build-script-build-script-build") - { - let stale = if let Some(Fileprint { - size: Some(size), - hash: Some(hash), - .. - }) = &dep - .fingerprint - .outputs - .iter() - .find(|Fileprint { path, .. }| path == dep_in) - { - CurrentFileprint::calc_size(dep_in) != Some(*size) - || CurrentFileprint::calc_hash(dep_in, FileHashAlgorithm::Md5) - .as_ref() - != Some(hash) - } else { - true - }; + .to_path_buf(); - debug!("build.rs output doesn't match previous hash {:?}", dep_in); - if stale { - return Ok(()); - } - } else { - let dep_info_file = if dep_info - .to_str() - .unwrap() - .contains("dep-run-build-script-build-script-build") + if dep_path.to_str().unwrap().ends_with("output") + && dep_info + .to_str() + .unwrap() + .contains("dep-run-build-script-build-script-build") { - let mut ddep_info = PathBuf::new(); - for local_dep in (*dep.fingerprint.local.lock().unwrap()).iter() { - if let LocalFingerprint::CheckDepInfo { dep_info } = local_dep { - ddep_info = dep_info.to_path_buf(); - } + let stale = if let Some(Fileprint { + size: Some(size), + hash: Some(hash), + .. + }) = &dep + .fingerprint + .outputs + .iter() + .find(|Fileprint { path, .. }| path == dep_in) + { + CurrentFileprint::calc_size(dep_in) != Some(*size) + || CurrentFileprint::calc_hash( + dep_in, + FileHashAlgorithm::Md5, + ) + .as_ref() + != Some(hash) + } else { + true + }; + + debug!("build.rs output doesn't match previous hash {:?}", dep_in); + if stale { + return Ok(()); } - target_root.join(&ddep_info).to_path_buf() } else { - //TODO: depinfo is sometimes package root relative apparently - //let path = match ty { - // DepInfoPathType::PackageRootRelative => pkg_root.join(fileprint.path), - // // N.B. path might be absolute here in which case the join will have no effect - // DepInfoPathType::TargetRootRelative => target_root.join(fileprint.path), - // }; - target_root.join(&dep_info) - }; - - debug!("reading dep info file: {:?}", &dep_info_file); - - let rustc_dep_info = dep_info_cache.get(&dep_info_file); - if rustc_dep_info.is_none() { - let dep_result = - parse_dep_info(pkg_root, target_root, &dep_info_file); - - match dep_result { - Ok(dep) => { - if let Some(dep) = dep { - dep_info_cache.insert(dep_info_file.clone(), dep); - } else { - warn!("Dep info file could not be parsed"); + let dep_info_file = if dep_info + .to_str() + .unwrap() + .contains("dep-run-build-script-build-script-build") + { + let mut ddep_info = PathBuf::new(); + for local_dep in (*dep.fingerprint.local.lock().unwrap()).iter() + { + if let LocalFingerprint::CheckDepInfo { dep_info } = + local_dep + { + ddep_info = dep_info.to_path_buf(); + } + } + target_root.join(&ddep_info).to_path_buf() + } else { + //TODO: depinfo is sometimes package root relative apparently + //let path = match ty { + // DepInfoPathType::PackageRootRelative => pkg_root.join(fileprint.path), + // // N.B. path might be absolute here in which case the join will have no effect + // DepInfoPathType::TargetRootRelative => target_root.join(fileprint.path), + // }; + target_root.join(&dep_info) + }; + + debug!("reading dep info file: {:?}", &dep_info_file); + + let rustc_dep_info = dep_info_cache.get(&dep_info_file); + if rustc_dep_info.is_none() { + let dep_result = + parse_dep_info(pkg_root, target_root, &dep_info_file); + + match dep_result { + Ok(dep) => { + if let Some(dep) = dep { + dep_info_cache.insert(dep_info_file.clone(), dep); + } else { + warn!("Dep info file could not be parsed"); + } } + Err(err) => warn!("Error parsing dep info file {}", err), } - Err(err) => warn!("Error parsing dep info file {}", err), } - } - let mut stale = None; - if let Some(rustc_dep_info) = dep_info_cache.get(&dep_info_file) { - let ref_file = &rustc_dep_info - .files - .iter() - .find(|reference| *dep_in == reference.path); - if let Some(reference) = ref_file { - let mut file_facts = mtime_cache.get_mut(dep_in); - if file_facts.is_none() { - mtime_cache.insert( - dep_in.clone(), - CurrentFileprint::new(*dep_mtime), - ); - file_facts = mtime_cache.get_mut(dep_in); - } - let file_facts = file_facts.unwrap(); + let mut stale = None; + if let Some(rustc_dep_info) = dep_info_cache.get(&dep_info_file) { + let ref_file = &rustc_dep_info + .files + .iter() + .find(|reference| *dep_in == reference.path); + if let Some(reference) = ref_file { + let mut file_facts = mtime_cache.get_mut(dep_in); + if file_facts.is_none() { + mtime_cache.insert( + dep_in.clone(), + CurrentFileprint::new(*dep_mtime), + ); + file_facts = mtime_cache.get_mut(dep_in); + } + let file_facts = file_facts.unwrap(); - if let Some(current_size) = file_facts.size(dep_in) { - if Some(*current_size) != reference.size { + if let Some(current_size) = file_facts.size(dep_in) { + if Some(*current_size) != reference.size { + stale = Some(format!( + "File sizes don't match {:?} expected: {:?}", + current_size, reference.size + )); + } + } else { stale = Some(format!( - "File sizes don't match {:?} expected: {:?}", - current_size, reference.size + "File sizes was not obtainable expected: {:?}", + reference.size )); } - } else { - stale = Some(format!( - "File sizes was not obtainable expected: {:?}", - reference.size - )); - } - if stale.is_none() { - if let Some(reference_hash) = &reference.hash { - let current_hash = - file_facts.file_hash(dep_in, reference_hash.kind); + if stale.is_none() { + if let Some(reference_hash) = &reference.hash { + let current_hash = file_facts + .file_hash(dep_in, reference_hash.kind); - if let Some(file_facts_hash) = current_hash { - if reference_hash != file_facts_hash { - stale = Some(format!( + if let Some(file_facts_hash) = current_hash { + if reference_hash != file_facts_hash { + stale = Some(format!( "Hash {:?} doesn't match expected: {:?}", &file_facts_hash, &reference_hash )); - } - } else { - stale = Some(format!( + } + } else { + stale = Some(format!( "No hash found in the dep info file to compare to {:?}", &reference.hash )); + } + } else { + stale = + Some("No reference hash to compare to".into()); } - } else { - stale = Some("No reference hash to compare to".into()); } } + } else { + stale = Some("HASH dep info file could not be found".into()); + } + if stale.is_some() { + info!( + "dependency on `{}` is newer than we are {} > {} {:?} {:?}", + dep.name, dep_mtime, max_mtime, pkg_root, dep_path + ); + info!("HASHMISS also {:?}", stale); + return Ok(()); } - } else { - stale = Some("HASH dep info file could not be found".into()); - } - if stale.is_some() { - info!( - "HASHMISS dependency on `{}` is newer than we are {} > {} {:?} {:?}", - dep.name, dep_mtime, max_mtime, pkg_root, dep_path - ); - info!("HASHMISS also {:?}", stale); - return Ok(()); } } } + } else { + info!( + "dependency on `{}` is newer than we are {} > {} {:?}", + dep.name, dep_mtime, max_mtime, pkg_root + ); + return Ok(()); } } } From 13792637d202758ca0aec56db45c83256d5b9e01 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Thu, 12 Nov 2020 17:09:14 +0000 Subject: [PATCH 35/39] Updates to serialiseation format in tests. --- tests/testsuite/dep_info.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/testsuite/dep_info.rs b/tests/testsuite/dep_info.rs index 53f1e3054f0..11b07ba42bf 100644 --- a/tests/testsuite/dep_info.rs +++ b/tests/testsuite/dep_info.rs @@ -32,8 +32,11 @@ fn assert_deps(project: &Project, fingerprint: &str, test_cb: impl Fn(&Path, &[( let _size = u64::from_le_bytes(*eight_bytes); *dep_info = &dep_info[8..]; - str::from_utf8(read_bytes(dep_info)).unwrap(); //hash - read_u8(dep_info); //hashkind + let hash_kind = read_u8(dep_info); //hashkind + + if hash_kind != 0 { + str::from_utf8(read_bytes(dep_info)).unwrap(); //hash + } ( read_u8(dep_info), str::from_utf8(read_bytes(dep_info)).unwrap(), From 58478ec2879b6f68a5cc3f4948320f10effa212e Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Fri, 13 Nov 2020 12:55:12 +0000 Subject: [PATCH 36/39] reduced duplication --- src/cargo/core/compiler/fingerprint.rs | 28 +++++++++----------------- 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 8e8658481e4..2c6c16f2d59 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -1135,6 +1135,8 @@ impl Fingerprint { ); let rmeta_ext = std::ffi::OsStr::new("rmeta"); + let output_dir = std::ffi::OsStr::new("output"); + for dep in self.deps.iter() { let dep_mtimes = match &dep.fingerprint.fs_status { FsStatus::UpToDate { mtimes } => mtimes, @@ -1185,12 +1187,12 @@ impl Fingerprint { .unwrap() .to_path_buf(); - if dep_path.to_str().unwrap().ends_with("output") - && dep_info - .to_str() - .unwrap() - .contains("dep-run-build-script-build-script-build") - { + let is_custom_build = dep_info + .to_str() + .unwrap() + .contains("dep-run-build-script-build-script-build"); + + if dep_path.file_name() == Some(&output_dir) && is_custom_build { let stale = if let Some(Fileprint { size: Some(size), hash: Some(hash), @@ -1217,11 +1219,7 @@ impl Fingerprint { return Ok(()); } } else { - let dep_info_file = if dep_info - .to_str() - .unwrap() - .contains("dep-run-build-script-build-script-build") - { + let dep_info_file = if is_custom_build { let mut ddep_info = PathBuf::new(); for local_dep in (*dep.fingerprint.local.lock().unwrap()).iter() { @@ -1233,13 +1231,7 @@ impl Fingerprint { } target_root.join(&ddep_info).to_path_buf() } else { - //TODO: depinfo is sometimes package root relative apparently - //let path = match ty { - // DepInfoPathType::PackageRootRelative => pkg_root.join(fileprint.path), - // // N.B. path might be absolute here in which case the join will have no effect - // DepInfoPathType::TargetRootRelative => target_root.join(fileprint.path), - // }; - target_root.join(&dep_info) + dep_info_loc.clone() }; debug!("reading dep info file: {:?}", &dep_info_file); From 615dd81319b65116a145bfa7690ae0a1b784e64f Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Mon, 16 Nov 2020 07:50:07 +0000 Subject: [PATCH 37/39] Working following format change of hashes --- Cargo.toml | 5 +- src/cargo/core/compiler/fingerprint.rs | 259 ++++++++++++++-------- src/cargo/core/compiler/output_depinfo.rs | 5 +- 3 files changed, 172 insertions(+), 97 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index bcaa455eaf0..1f1d0277ef9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,7 +45,7 @@ lazycell = "1.2.0" libc = "0.2" log = "0.4.6" libgit2-sys = "0.12.14" -md-5 = "0.8" +md-5 = "0.9" memchr = "2.1.3" num_cpus = "1.0" opener = "0.4" @@ -56,7 +56,8 @@ semver = { version = "0.10", features = ["serde"] } serde = { version = "1.0.82", features = ["derive"] } serde_ignored = "0.1.0" serde_json = { version = "1.0.30", features = ["raw_value"] } -sha-1 = "0.8" +sha-1 = "0.9" +sha2 = "0.9" shell-escape = "0.1.4" strip-ansi-escapes = "0.1.0" tar = { version = "0.4.26", default-features = false } diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 2c6c16f2d59..912c5f8e9f0 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -315,6 +315,7 @@ use std::collections::hash_map::HashMap; use std::convert::TryInto; use std::env; +use std::fmt; use std::fs; use std::hash::{self, Hasher}; use std::io::{self, Read}; @@ -322,16 +323,19 @@ use std::path::{Path, PathBuf}; use std::str::{self, FromStr}; use std::sync::{Arc, Mutex}; use std::time::SystemTime; +use std::num::NonZeroU64; use anyhow::{bail, format_err}; use filetime::FileTime; use log::{debug, info, warn}; use md5::{Digest, Md5}; use object::Object; +use serde; use serde::de; use serde::ser; use serde::{Deserialize, Serialize}; use sha1::Sha1; +use sha2::Sha256; use crate::core::compiler::unit_graph::UnitDep; use crate::core::Package; @@ -348,12 +352,72 @@ use super::job::{ use super::{BuildContext, Context, FileFlavor, Unit}; // While source files can't currently be > 4Gb, bin files could be. -pub type FileSize = u64; +pub type FileSize = NonZeroU64; -#[derive(Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Serialize, Deserialize, Hash)] +#[derive(Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash, Serialize, Deserialize)] pub struct FileHash { - pub kind: FileHashAlgorithm, - pub hash: String, + kind: FileHashAlgorithm, + // arrays > 32 are currently hard work so broken in twain. + hash_front: [u8; 32], + hash_back: [u8; 32], +} + +impl FileHash { + pub fn from_hex_rev(kind: FileHashAlgorithm, hash: &str) -> Option { + let mut decoded = hex::decode(hash).ok()?; + decoded.reverse(); // The slice is stored as little endien. + Some(Self::from_slice(kind, &decoded[..])) + } + + // pub fn from_hex(kind: FileHashAlgorithm, hash: &str) -> Option { + // let decoded = hex::decode(hash).ok()?; + // Some(Self::from_slice(kind, &decoded[..])) + // } + + pub fn from_slice_rev(kind: FileHashAlgorithm, hash: &[u8]) -> FileHash { + let mut v = hash.to_vec(); + v.reverse(); + Self::from_slice(kind, &v) + } + + pub fn from_slice(kind: FileHashAlgorithm, hash: &[u8]) -> FileHash { + let mut result = FileHash { + kind, + hash_front: [0u8; 32], + hash_back: [0u8; 32], + }; + let len = hash.len(); + let front_len = std::cmp::min(len, 32); + (&mut result.hash_front[..front_len]).copy_from_slice(&hash[..front_len]); + if len > 32 { + let back_len = std::cmp::min(len, 64); + (&mut result.hash_back[..back_len - 32]).copy_from_slice(&hash[32..back_len]); + } + result + } + + pub fn write_to_vec(&self, vec: &mut Vec) { + vec.push(match self.kind { + FileHashAlgorithm::Md5 => 1, + FileHashAlgorithm::Sha1 => 2, + FileHashAlgorithm::Sha256 => 3, + FileHashAlgorithm::Svh => 4, + }); + vec.extend_from_slice(&self.hash_front[..]); + vec.extend_from_slice(&self.hash_back[..]); + } +} + +impl fmt::Display for FileHash { + fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> Result<(), fmt::Error> { + write!( + formatter, + "{}:{}{}", + self.kind, + hex::encode(self.hash_front), + hex::encode(self.hash_back) + ) + } } /// Determines if a `unit` is up-to-date, and if not prepares necessary work to @@ -720,7 +784,7 @@ impl CurrentFileprint { } pub(crate) fn calc_size(file: &Path) -> Option { - std::fs::metadata(file).map(|metadata| metadata.len()).ok() + std::fs::metadata(file).map(|metadata| NonZeroU64::new(metadata.len())).ok().flatten() } pub(crate) fn file_hash(&mut self, path: &Path, algo: FileHashAlgorithm) -> Option<&FileHash> { @@ -730,35 +794,31 @@ impl CurrentFileprint { self.hash.as_ref() } + fn invoke_digest(reader: &mut R, kind: FileHashAlgorithm) -> Option + where + D: Digest, + R: Read, + { + let mut hasher = D::new(); + let mut buffer = [0; 1024]; + loop { + let count = reader.read(&mut buffer).ok()?; + if count == 0 { + break; + } + hasher.update(&buffer[..count]); + } + Some(FileHash::from_slice_rev(kind, &hasher.finalize()[..])) + } + pub(crate) fn calc_hash(path: &Path, algo: FileHashAlgorithm) -> Option { if let Ok(file) = fs::File::open(path) { let mut reader: io::BufReader = io::BufReader::new(file); - let hash = match algo { - FileHashAlgorithm::Md5 => { - let mut hasher = Md5::new(); - let mut buffer = [0; 1024]; - loop { - let count = reader.read(&mut buffer).ok()?; - if count == 0 { - break; - } - hasher.input(&buffer[..count]); - } - Some(to_hex(&hasher.result())) - } - FileHashAlgorithm::Sha1 => { - let mut hasher = Sha1::new(); - let mut buffer = [0; 1024]; - loop { - let count = reader.read(&mut buffer).ok()?; - if count == 0 { - break; - } - hasher.input(&buffer[..count]); - } - Some(to_hex(&hasher.result())) - } + match algo { + FileHashAlgorithm::Md5 => Self::invoke_digest::(&mut reader, algo), + FileHashAlgorithm::Sha1 => Self::invoke_digest::(&mut reader, algo), + FileHashAlgorithm::Sha256 => Self::invoke_digest::(&mut reader, algo), FileHashAlgorithm::Svh => { if path.extension() == Some(std::ffi::OsStr::new("rlib")) { get_svh_from_ar(reader) @@ -768,11 +828,11 @@ impl CurrentFileprint { get_svh_from_object_file(reader) } } - }; - - return hash.map(|hash| FileHash { kind: algo, hash }); + } + } else { + debug!("HASH failed to open path {:?}", path); + None } - None } } @@ -2112,16 +2172,7 @@ fn find_stale_file( None } -fn to_hex(bytes: &[u8]) -> String { - let mut result = String::with_capacity(bytes.len() * 2); - for byte in bytes { - result.push_str(&format!("{:x}", byte)); - } - result -} - -type Svh = String; -fn get_svh_from_ar(reader: R) -> Option { +fn get_svh_from_ar(reader: R) -> Option { let mut ar = ar::Archive::new(reader); while let Some(file) = ar.next_entry() { match file { @@ -2129,53 +2180,61 @@ fn get_svh_from_ar(reader: R) -> Option { let s = String::from_utf8_lossy(&file.header().identifier()); if s.ends_with(".rmeta") { if let Some(index) = s.rfind('-') { - return Some(s[index + 1..(s.len() - ".rmeta".len())].to_string()); + return FileHash::from_hex_rev( + FileHashAlgorithm::Svh, + &s[index + 1..(s.len() - ".rmeta".len())], + ); } } } Err(err) => debug!("Error reading ar: {}", err), } } + debug!("HASH svh not found in archive file."); None } -// While this looks expensive this is only invoked when dylibs are compiled against -// and the timestamp is too recent and the file is the expected size. -fn get_svh_from_object_file(mut reader: R) -> Option { +// While this looks expensive, this is only invoked for dylibs +// with an incorrect timestamp the file is the expected size. +fn get_svh_from_object_file(mut reader: R) -> Option { let mut data = vec![]; reader.read_to_end(&mut data).ok()?; let obj = object::read::File::parse(&data).ok()?; for (_idx, sym) in obj.symbols() { if let Some(name) = sym.name() { - if name.starts_with("_rust_svh_") { + if name.starts_with("_rust_svh") { if let Some(index) = name.rfind('_') { - return Some(name[index + 1..].to_string()); + return FileHash::from_hex_rev( + FileHashAlgorithm::Svh, + &name[index + 1..], + ); } } } } + debug!("HASH svh not found in object file"); None } -fn get_svh_from_rmeta_file(mut reader: R) -> Option { +fn get_svh_from_rmeta_file(mut reader: R) -> Option { let mut data = Vec::with_capacity(128); data.resize(128, 0); reader.read_exact(&mut data).ok()?; parse_svh(&data) } -fn parse_svh(data: &[u8]) -> Option { - let rust_version_len_pos = 12; - let data = &data[rust_version_len_pos..]; - let rust_version_len = data[0] as usize; - let data = &data[1..]; - - let data = &data[rust_version_len..]; - let svh_len = data[0] as usize; - let data = &data[1..]; +fn parse_svh(data: &[u8]) -> Option { + const METADATA_VERSION_LOC: usize = 7; - Some(String::from_utf8_lossy(&data[..svh_len]).to_string()) + if data[METADATA_VERSION_LOC] < 6 { + debug!("HASH svh not available as compiler not recent enough."); + return None; + } + let rust_svh_len_pos = 12; + assert_eq!(data[rust_svh_len_pos], 64_u8); + let data = &data[rust_svh_len_pos + 1..]; + Some(FileHash::from_slice(FileHashAlgorithm::Svh, &data[..64])) } #[derive(Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Debug, Serialize, Deserialize, Hash)] @@ -2184,17 +2243,19 @@ pub enum FileHashAlgorithm { Svh, Md5, Sha1, + Sha256, } impl FromStr for FileHashAlgorithm { - type Err = (); + type Err = anyhow::Error; - fn from_str(s: &str) -> Result { + fn from_str(s: &str) -> Result { match s { "md5" => Ok(FileHashAlgorithm::Md5), "svh" => Ok(FileHashAlgorithm::Svh), "sha1" => Ok(FileHashAlgorithm::Sha1), - _ => Err(()), + "sha256" => Ok(FileHashAlgorithm::Sha256), + _ => Err(anyhow::Error::msg("Unknown hash type")), } } } @@ -2205,6 +2266,7 @@ impl std::fmt::Display for FileHashAlgorithm { Self::Md5 => fmt.write_fmt(format_args!("md5"))?, Self::Svh => fmt.write_fmt(format_args!("svh"))?, Self::Sha1 => fmt.write_fmt(format_args!("sha1"))?, + Self::Sha256 => fmt.write_fmt(format_args!("sha256"))?, }; Ok(()) } @@ -2321,8 +2383,8 @@ pub fn translate_dep_info( #[derive(Default)] pub struct RustcDepInfo { - /// The list of files that the main target in the dep-info file depends on. - /// and lower 32bits of size and hash (or 0 if not there). + /// The list of files that the main target in the dep-info file depends on + /// and size and hash of those files. pub files: Vec, //FIXME use Option instead? /// The list of environment variables we found that the rustc compilation /// depends on. @@ -2367,26 +2429,23 @@ impl EncodedDepInfo { let nfiles = read_usize(bytes).unwrap(); let mut files = Vec::with_capacity(nfiles as usize); for _ in 0..nfiles { - //FIXME: backward compatibility!!! let eight_bytes: &[u8; 8] = (bytes[0..8]).try_into().ok()?; - let size = u64::from_le_bytes(*eight_bytes) as FileSize; - let size = if size == 0 { None } else { Some(size) }; + let size = NonZeroU64::new(u64::from_le_bytes(*eight_bytes)); *bytes = &bytes[8..]; - //debug!("read hash as {}", hash); let kind = match read_u8(bytes)? { 0 => None, 1 => Some(FileHashAlgorithm::Md5), 2 => Some(FileHashAlgorithm::Sha1), - 3 => Some(FileHashAlgorithm::Svh), + 3 => Some(FileHashAlgorithm::Sha256), + 4 => Some(FileHashAlgorithm::Svh), _ => return None, }; - //debug!("read size as {}", size); let hash = if let Some(kind) = kind { - let hash_buf = read_bytes(bytes)?; - let hash = String::from_utf8(hash_buf.to_vec()).unwrap(); - Some(FileHash { kind, hash }) + let hash = FileHash::from_slice(kind, &bytes[..64]); + *bytes = &bytes[64..]; + Some(hash) } else { None }; @@ -2442,20 +2501,14 @@ impl EncodedDepInfo { fn serialize(&self) -> CargoResult> { let mut ret = Vec::new(); - let dst = &mut ret; + let mut dst = &mut ret; write_usize(dst, self.files.len()); for (Fileprint { path, size, hash }, ty) in self.files.iter() { //debug!("writing depinfo size as {} ", *size as usize); - write_u64(dst, size.unwrap_or_default()); + write_u64(dst, size.map(|s|u64::from(s)).unwrap_or(0) ); //write(dst, hash.hash); if let Some(hash) = hash { - match hash.kind { - FileHashAlgorithm::Md5 => dst.push(1), - FileHashAlgorithm::Sha1 => dst.push(2), - FileHashAlgorithm::Svh => dst.push(3), - } - //debug!("writing depinfo hash as {} ", hash.hash.len()); - write_bytes(dst, hash.hash.as_bytes()); + hash.write_to_vec(&mut dst); } else { dst.push(0); //None } @@ -2528,11 +2581,8 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult ret.files[i].size = size_and_hash[0].parse().ok(); let kind_hash: Vec<_> = size_and_hash[1].split(":").collect(); let hash = kind_hash[1]; - ret.files[i].hash = Some(FileHash { - kind: FileHashAlgorithm::from_str(kind_hash[0]) - .expect("unknown hashing algo"), - hash: hash.to_string(), - }); + let kind = FileHashAlgorithm::from_str(kind_hash[0])?; + ret.files[i].hash = FileHash::from_hex_rev(kind, hash); break; } } @@ -2591,9 +2641,10 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult #[cfg(test)] mod test { - use super::parse_svh; + use super::{parse_svh, FileHash, FileHashAlgorithm}; + #[test] - fn test() { + fn test_no_svh_below_metadata_version_6() { let vec: Vec = vec![ 114, 117, 115, 116, 0, 0, 0, 5, 0, 13, 201, 29, 16, 114, 117, 115, 116, 99, 32, 49, 46, 52, 57, 46, 48, 45, 100, 101, 118, 16, 49, 100, 54, 102, 97, 101, 54, 56, 102, 54, 100, @@ -2603,7 +2654,31 @@ mod test { 54, 98, 97, 57, 97, 57, 56, 99, 50, 57, 51, 54, 100, 17, 99, 111, 109, 112, 105, 108, 101, 114, 95, 98, 117, 105, 108, ]; - // r u s t / version | base | r u s t c ' ' 1 . 4 9 . 0 - d e v |size| svh--> + // r u s t / metadata version | base | r u s t c ' ' 1 . 4 9 . 0 - d e v |size| svh--> + assert!(parse_svh(&vec).is_none()); + } + #[test] + fn test_svh_in_metadata_version_6() { + let vec: Vec = vec![ + 114, 117, 115, 116, 0, 0, 0, 6, 0, 13, 201, 29, 16, 114, 117, 115, 116, 99, 32, 49, 46, + 52, 57, 46, 48, 45, 100, 101, 118, 16, 49, 100, 54, 102, 97, 101, 54, 56, 102, 54, 100, + 52, 99, 99, 98, 102, 3, 115, 116, 100, 241, 202, 128, 159, 207, 146, 173, 243, 204, 1, + 0, 2, 17, 45, 48, 55, 56, 97, 54, 56, 51, 101, 99, 57, 57, 55, 50, 48, 53, 50, 4, 99, + 111, 114, 101, 190, 159, 241, 243, 142, 194, 224, 233, 82, 0, 2, 17, 45, 51, 101, 97, + 54, 98, 97, 57, 97, 57, 56, 99, 50, 57, 51, 54, 100, 17, 99, 111, 109, 112, 105, 108, + 101, 114, 95, 98, 117, 105, 108, + ]; + // r u s t / metadata version | base | r u s t c ' ' 1 . 4 9 . 0 - d e v |size| svh--> assert!(parse_svh(&vec).is_some()); } + + #[test] + fn file_hash() { + let from_str = FileHash::from_str("svh", "0102030405060708"); + let from_slice = Some(FileHash::from_slice( + FileHashAlgorithm::Svh, + &[1, 2, 3, 4, 5, 6, 7, 8], + )); + assert_eq!(from_str, from_slice); + } } diff --git a/src/cargo/core/compiler/output_depinfo.rs b/src/cargo/core/compiler/output_depinfo.rs index 7da694bfabe..e43544c4529 100644 --- a/src/cargo/core/compiler/output_depinfo.rs +++ b/src/cargo/core/compiler/output_depinfo.rs @@ -30,8 +30,7 @@ use log::debug; use super::{fingerprint, Context, FileFlavor, Unit}; use crate::core::compiler::fingerprint::Fileprint; -use crate::util::paths; -use crate::util::{internal, CargoResult}; +use crate::util::{internal, paths, CargoResult}; fn render_filename>(path: P, basedir: Option<&str>) -> CargoResult { let path = path.as_ref(); @@ -179,7 +178,7 @@ pub fn output_depinfo(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<()> { writeln!(outfile, "{}:", rendered_dep)?; if let (Some(size), Some(hash)) = (size, hash) { - writeln!(outfile, "# size:{} {}:{}", size, hash.kind, hash.hash)?; + writeln!(outfile, "# size:{} {}", size, hash)?; } } From a750137d4684c440e6a8aac8943eabeb694a982b Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Mon, 16 Nov 2020 22:11:35 +0000 Subject: [PATCH 38/39] cargo fmt + fix tests --- src/cargo/core/compiler/fingerprint.rs | 49 +++++++++++++------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 912c5f8e9f0..6f02a2ff962 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -319,11 +319,11 @@ use std::fmt; use std::fs; use std::hash::{self, Hasher}; use std::io::{self, Read}; +use std::num::NonZeroU64; use std::path::{Path, PathBuf}; use std::str::{self, FromStr}; use std::sync::{Arc, Mutex}; use std::time::SystemTime; -use std::num::NonZeroU64; use anyhow::{bail, format_err}; use filetime::FileTime; @@ -365,14 +365,14 @@ pub struct FileHash { impl FileHash { pub fn from_hex_rev(kind: FileHashAlgorithm, hash: &str) -> Option { let mut decoded = hex::decode(hash).ok()?; - decoded.reverse(); // The slice is stored as little endien. + decoded.reverse(); // The slice is stored as little endien. Some(Self::from_slice(kind, &decoded[..])) } - // pub fn from_hex(kind: FileHashAlgorithm, hash: &str) -> Option { - // let decoded = hex::decode(hash).ok()?; - // Some(Self::from_slice(kind, &decoded[..])) - // } + pub fn from_hex(kind: FileHashAlgorithm, hash: &str) -> Option { + let decoded = hex::decode(hash).ok()?; + Some(Self::from_slice(kind, &decoded[..])) + } pub fn from_slice_rev(kind: FileHashAlgorithm, hash: &[u8]) -> FileHash { let mut v = hash.to_vec(); @@ -784,7 +784,10 @@ impl CurrentFileprint { } pub(crate) fn calc_size(file: &Path) -> Option { - std::fs::metadata(file).map(|metadata| NonZeroU64::new(metadata.len())).ok().flatten() + std::fs::metadata(file) + .map(|metadata| NonZeroU64::new(metadata.len())) + .ok() + .flatten() } pub(crate) fn file_hash(&mut self, path: &Path, algo: FileHashAlgorithm) -> Option<&FileHash> { @@ -2205,10 +2208,7 @@ fn get_svh_from_object_file(mut reader: R) -> Option { if let Some(name) = sym.name() { if name.starts_with("_rust_svh") { if let Some(index) = name.rfind('_') { - return FileHash::from_hex_rev( - FileHashAlgorithm::Svh, - &name[index + 1..], - ); + return FileHash::from_hex_rev(FileHashAlgorithm::Svh, &name[index + 1..]); } } } @@ -2225,10 +2225,11 @@ fn get_svh_from_rmeta_file(mut reader: R) -> Option { } fn parse_svh(data: &[u8]) -> Option { + debug!("HASHXX {:?}", data); const METADATA_VERSION_LOC: usize = 7; if data[METADATA_VERSION_LOC] < 6 { - debug!("HASH svh not available as compiler not recent enough."); + debug!("svh not available as compiler not recent enough."); return None; } let rust_svh_len_pos = 12; @@ -2505,7 +2506,7 @@ impl EncodedDepInfo { write_usize(dst, self.files.len()); for (Fileprint { path, size, hash }, ty) in self.files.iter() { //debug!("writing depinfo size as {} ", *size as usize); - write_u64(dst, size.map(|s|u64::from(s)).unwrap_or(0) ); + write_u64(dst, size.map(|s| u64::from(s)).unwrap_or(0)); //write(dst, hash.hash); if let Some(hash) = hash { hash.write_to_vec(&mut dst); @@ -2657,24 +2658,24 @@ mod test { // r u s t / metadata version | base | r u s t c ' ' 1 . 4 9 . 0 - d e v |size| svh--> assert!(parse_svh(&vec).is_none()); } - #[test] + + #[test] //TODO update the bits so svh is before rust version! fn test_svh_in_metadata_version_6() { let vec: Vec = vec![ - 114, 117, 115, 116, 0, 0, 0, 6, 0, 13, 201, 29, 16, 114, 117, 115, 116, 99, 32, 49, 46, - 52, 57, 46, 48, 45, 100, 101, 118, 16, 49, 100, 54, 102, 97, 101, 54, 56, 102, 54, 100, - 52, 99, 99, 98, 102, 3, 115, 116, 100, 241, 202, 128, 159, 207, 146, 173, 243, 204, 1, - 0, 2, 17, 45, 48, 55, 56, 97, 54, 56, 51, 101, 99, 57, 57, 55, 50, 48, 53, 50, 4, 99, - 111, 114, 101, 190, 159, 241, 243, 142, 194, 224, 233, 82, 0, 2, 17, 45, 51, 101, 97, - 54, 98, 97, 57, 97, 57, 56, 99, 50, 57, 51, 54, 100, 17, 99, 111, 109, 112, 105, 108, - 101, 114, 95, 98, 117, 105, 108, + 114, 117, 115, 116, 0, 0, 0, 6, 0, 17, 73, 215, 64, 29, 94, 138, 62, 252, 69, 252, 224, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, + 114, 117, 115, 116, 99, 32, 49, 46, 53, 48, 46, 48, 45, 100, 101, 118, 3, 115, 116, + 100, 220, 173, 135, 163, 173, 242, 162, 182, 228, 1, 0, 2, 17, 45, 48, 55, 56, 97, 54, + 56, 51, 101, 99, 57, 57, 55, 50, 48, 53, 50, ]; - // r u s t / metadata version | base | r u s t c ' ' 1 . 4 9 . 0 - d e v |size| svh--> - assert!(parse_svh(&vec).is_some()); + // r u s t / metadata version | base | size=64 | svh | sizee_of_version | r u s t c ' ' 1 . 4 9 . 0 - d e v | base_pointer_points_here + assert_eq!(parse_svh(&vec), FileHash::from_hex(FileHashAlgorithm::Svh, "1d5e8a3efc45fce0")); } #[test] fn file_hash() { - let from_str = FileHash::from_str("svh", "0102030405060708"); + let from_str = FileHash::from_hex(FileHashAlgorithm::Svh, "0102030405060708"); let from_slice = Some(FileHash::from_slice( FileHashAlgorithm::Svh, &[1, 2, 3, 4, 5, 6, 7, 8], From 533a597b5a8998b2d0be1e836f1100d8e8f98c95 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Tue, 17 Nov 2020 06:25:28 +0000 Subject: [PATCH 39/39] Break out to separate file as fingerprint.rs is big --- src/cargo/core/compiler/content_hash.rs | 327 ++++++++++++++++++++++ src/cargo/core/compiler/context/mod.rs | 3 +- src/cargo/core/compiler/fingerprint.rs | 322 +-------------------- src/cargo/core/compiler/mod.rs | 1 + src/cargo/core/compiler/output_depinfo.rs | 2 +- 5 files changed, 335 insertions(+), 320 deletions(-) create mode 100644 src/cargo/core/compiler/content_hash.rs diff --git a/src/cargo/core/compiler/content_hash.rs b/src/cargo/core/compiler/content_hash.rs new file mode 100644 index 00000000000..76b5f104f21 --- /dev/null +++ b/src/cargo/core/compiler/content_hash.rs @@ -0,0 +1,327 @@ +use std::fmt; +use std::fs; +use std::io::{self, Read}; +use std::num::NonZeroU64; +use std::path::Path; +use std::path::PathBuf; +use std::str::FromStr; + +use filetime::FileTime; +use log::debug; +use md5::{Digest, Md5}; +use object::Object; +use serde; +use serde::{Deserialize, Serialize}; +use sha1::Sha1; +use sha2::Sha256; + +/// A file location with identifying properties: size and hash. +#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Debug, Hash, Serialize, Deserialize)] +pub struct Fileprint { + pub path: PathBuf, //TODO is this field needed on here? + pub size: Option, + pub hash: Option, +} + +impl Fileprint { + pub(crate) fn from_md5(path: PathBuf) -> Self { + let size = CurrentFileprint::calc_size(&path); + let hash = CurrentFileprint::calc_hash(&path, FileHashAlgorithm::Md5); + Self { path, size, hash } + } +} + +#[derive(Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Debug, Serialize, Deserialize, Hash)] +pub enum FileHashAlgorithm { + /// Svh is embedded as a symbol or for rmeta is in the .rmeta filename inside a .rlib. + Svh, + Md5, + Sha1, + Sha256, +} + +impl FromStr for FileHashAlgorithm { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + match s { + "md5" => Ok(FileHashAlgorithm::Md5), + "svh" => Ok(FileHashAlgorithm::Svh), + "sha1" => Ok(FileHashAlgorithm::Sha1), + "sha256" => Ok(FileHashAlgorithm::Sha256), + _ => Err(anyhow::Error::msg("Unknown hash type")), + } + } +} + +impl std::fmt::Display for FileHashAlgorithm { + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> { + match self { + Self::Md5 => fmt.write_fmt(format_args!("md5"))?, + Self::Svh => fmt.write_fmt(format_args!("svh"))?, + Self::Sha1 => fmt.write_fmt(format_args!("sha1"))?, + Self::Sha256 => fmt.write_fmt(format_args!("sha256"))?, + }; + Ok(()) + } +} + +// While source files can't currently be > 4Gb, bin files could be. +pub type FileSize = NonZeroU64; + +#[derive(Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash, Serialize, Deserialize)] +pub struct FileHash { + kind: FileHashAlgorithm, + // arrays > 32 are currently hard work so broken in twain. + hash_front: [u8; 32], + hash_back: [u8; 32], +} + +impl FileHash { + pub fn from_hex_rev(kind: FileHashAlgorithm, hash: &str) -> Option { + let mut decoded = hex::decode(hash).ok()?; + decoded.reverse(); // The slice is stored as little endien. + Some(Self::from_slice(kind, &decoded[..])) + } + + pub fn from_hex(kind: FileHashAlgorithm, hash: &str) -> Option { + let decoded = hex::decode(hash).ok()?; + Some(Self::from_slice(kind, &decoded[..])) + } + + pub fn from_slice_rev(kind: FileHashAlgorithm, hash: &[u8]) -> FileHash { + let mut v = hash.to_vec(); + v.reverse(); + Self::from_slice(kind, &v) + } + + pub fn from_slice(kind: FileHashAlgorithm, hash: &[u8]) -> FileHash { + let mut result = FileHash { + kind, + hash_front: [0u8; 32], + hash_back: [0u8; 32], + }; + let len = hash.len(); + let front_len = std::cmp::min(len, 32); + (&mut result.hash_front[..front_len]).copy_from_slice(&hash[..front_len]); + if len > 32 { + let back_len = std::cmp::min(len, 64); + (&mut result.hash_back[..back_len - 32]).copy_from_slice(&hash[32..back_len]); + } + result + } + + pub fn write_to_vec(&self, vec: &mut Vec) { + vec.push(match self.kind { + FileHashAlgorithm::Md5 => 1, + FileHashAlgorithm::Sha1 => 2, + FileHashAlgorithm::Sha256 => 3, + FileHashAlgorithm::Svh => 4, + }); + vec.extend_from_slice(&self.hash_front[..]); + vec.extend_from_slice(&self.hash_back[..]); + } +} + +impl fmt::Display for FileHash { + fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> Result<(), fmt::Error> { + write!( + formatter, + "{}:{}{}", + self.kind, + hex::encode(self.hash_front), + hex::encode(self.hash_back) + ) + } +} + +fn get_svh_from_ar(reader: R) -> Option { + let mut ar = ar::Archive::new(reader); + while let Some(file) = ar.next_entry() { + match file { + Ok(file) => { + let s = String::from_utf8_lossy(&file.header().identifier()); + if s.ends_with(".rmeta") { + if let Some(index) = s.rfind('-') { + return FileHash::from_hex_rev( + FileHashAlgorithm::Svh, + &s[index + 1..(s.len() - ".rmeta".len())], + ); + } + } + } + Err(err) => debug!("Error reading ar: {}", err), + } + } + debug!("HASH svh not found in archive file."); + None +} + +// While this looks expensive, this is only invoked for dylibs +// with an incorrect timestamp the file is the expected size. +fn get_svh_from_object_file(mut reader: R) -> Option { + let mut data = vec![]; + reader.read_to_end(&mut data).ok()?; + let obj = object::read::File::parse(&data).ok()?; + + for (_idx, sym) in obj.symbols() { + if let Some(name) = sym.name() { + if name.starts_with("_rust_svh") { + if let Some(index) = name.rfind('_') { + return FileHash::from_hex_rev(FileHashAlgorithm::Svh, &name[index + 1..]); + } + } + } + } + debug!("HASH svh not found in object file"); + None +} + +fn get_svh_from_rmeta_file(mut reader: R) -> Option { + let mut data = Vec::with_capacity(128); + data.resize(128, 0); + reader.read_exact(&mut data).ok()?; + parse_svh(&data) +} + +fn parse_svh(data: &[u8]) -> Option { + debug!("HASHXX {:?}", data); + const METADATA_VERSION_LOC: usize = 7; + + if data[METADATA_VERSION_LOC] < 6 { + debug!("svh not available as compiler not recent enough."); + return None; + } + let rust_svh_len_pos = 12; + assert_eq!(data[rust_svh_len_pos], 64_u8); + let data = &data[rust_svh_len_pos + 1..]; + Some(FileHash::from_slice(FileHashAlgorithm::Svh, &data[..64])) +} + +/// Cache of file properties that we know to be true. +pub struct CurrentFileprint { + pub(crate) mtime: FileTime, + /// This will be None if not yet looked up. + size: Option, + /// This will be None if not yet calculated for this file. + hash: Option, +} + +impl CurrentFileprint { + pub(crate) fn new(mtime: FileTime) -> Self { + CurrentFileprint { + mtime, + size: None, + hash: None, + } + } + + pub(crate) fn size(&mut self, file: &Path) -> Option<&FileSize> { + if self.size.is_none() { + self.size = Self::calc_size(file); + } + self.size.as_ref() + } + + pub(crate) fn calc_size(file: &Path) -> Option { + std::fs::metadata(file) + .map(|metadata| NonZeroU64::new(metadata.len())) + .ok() + .flatten() + } + + pub(crate) fn file_hash(&mut self, path: &Path, reference: &FileHash) -> Option<&FileHash> { + if self.hash.is_none() { + self.hash = Self::calc_hash(path, reference.kind); + } + self.hash.as_ref() + } + + fn invoke_digest(reader: &mut R, kind: FileHashAlgorithm) -> Option + where + D: Digest, + R: Read, + { + let mut hasher = D::new(); + let mut buffer = [0; 1024]; + loop { + let count = reader.read(&mut buffer).ok()?; + if count == 0 { + break; + } + hasher.update(&buffer[..count]); + } + Some(FileHash::from_slice_rev(kind, &hasher.finalize()[..])) + } + + pub(crate) fn calc_hash(path: &Path, algo: FileHashAlgorithm) -> Option { + if let Ok(file) = fs::File::open(path) { + let mut reader: io::BufReader = io::BufReader::new(file); + + match algo { + FileHashAlgorithm::Md5 => Self::invoke_digest::(&mut reader, algo), + FileHashAlgorithm::Sha1 => Self::invoke_digest::(&mut reader, algo), + FileHashAlgorithm::Sha256 => Self::invoke_digest::(&mut reader, algo), + FileHashAlgorithm::Svh => { + if path.extension() == Some(std::ffi::OsStr::new("rlib")) { + get_svh_from_ar(reader) + } else if path.extension() == Some(std::ffi::OsStr::new("rmeta")) { + get_svh_from_rmeta_file(reader) + } else { + get_svh_from_object_file(reader) + } + } + } + } else { + debug!("HASH failed to open path {:?}", path); + None + } + } +} + +#[cfg(test)] +mod test { + use super::{parse_svh, FileHash, FileHashAlgorithm}; + + #[test] + fn test_no_svh_below_metadata_version_6() { + let vec: Vec = vec![ + 114, 117, 115, 116, 0, 0, 0, 5, 0, 13, 201, 29, 16, 114, 117, 115, 116, 99, 32, 49, 46, + 52, 57, 46, 48, 45, 100, 101, 118, 16, 49, 100, 54, 102, 97, 101, 54, 56, 102, 54, 100, + 52, 99, 99, 98, 102, 3, 115, 116, 100, 241, 202, 128, 159, 207, 146, 173, 243, 204, 1, + 0, 2, 17, 45, 48, 55, 56, 97, 54, 56, 51, 101, 99, 57, 57, 55, 50, 48, 53, 50, 4, 99, + 111, 114, 101, 190, 159, 241, 243, 142, 194, 224, 233, 82, 0, 2, 17, 45, 51, 101, 97, + 54, 98, 97, 57, 97, 57, 56, 99, 50, 57, 51, 54, 100, 17, 99, 111, 109, 112, 105, 108, + 101, 114, 95, 98, 117, 105, 108, + ]; + // r u s t / metadata version | base | r u s t c ' ' 1 . 4 9 . 0 - d e v |size| svh--> + assert!(parse_svh(&vec).is_none()); + } + + #[test] //TODO update the bits so svh is before rust version! + fn test_svh_in_metadata_version_6() { + let vec: Vec = vec![ + 114, 117, 115, 116, 0, 0, 0, 6, 0, 17, 73, 215, 64, 29, 94, 138, 62, 252, 69, 252, 224, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, + 114, 117, 115, 116, 99, 32, 49, 46, 53, 48, 46, 48, 45, 100, 101, 118, 3, 115, 116, + 100, 220, 173, 135, 163, 173, 242, 162, 182, 228, 1, 0, 2, 17, 45, 48, 55, 56, 97, 54, + 56, 51, 101, 99, 57, 57, 55, 50, 48, 53, 50, + ]; + // r u s t / metadata version | base | size=64 | svh | sizee_of_version | r u s t c ' ' 1 . 4 9 . 0 - d e v | base_pointer_points_here + assert_eq!( + parse_svh(&vec), + FileHash::from_hex(FileHashAlgorithm::Svh, "1d5e8a3efc45fce0") + ); + } + + #[test] + fn file_hash() { + let from_str = FileHash::from_hex(FileHashAlgorithm::Svh, "0102030405060708"); + let from_slice = Some(FileHash::from_slice( + FileHashAlgorithm::Svh, + &[1, 2, 3, 4, 5, 6, 7, 8], + )); + assert_eq!(from_str, from_slice); + } +} diff --git a/src/cargo/core/compiler/context/mod.rs b/src/cargo/core/compiler/context/mod.rs index 0a9c3899d33..03442b4aa2f 100644 --- a/src/cargo/core/compiler/context/mod.rs +++ b/src/cargo/core/compiler/context/mod.rs @@ -4,7 +4,8 @@ use std::sync::{Arc, Mutex}; use jobserver::Client; -use crate::core::compiler::fingerprint::{CurrentFileprint, RustcDepInfo}; +use crate::core::compiler::content_hash::CurrentFileprint; +use crate::core::compiler::fingerprint::RustcDepInfo; use crate::core::compiler::{self, compilation, Unit}; use crate::core::PackageId; use crate::util::errors::{CargoResult, CargoResultExt}; diff --git a/src/cargo/core/compiler/fingerprint.rs b/src/cargo/core/compiler/fingerprint.rs index 6f02a2ff962..aa437ae6ce2 100644 --- a/src/cargo/core/compiler/fingerprint.rs +++ b/src/cargo/core/compiler/fingerprint.rs @@ -315,10 +315,7 @@ use std::collections::hash_map::HashMap; use std::convert::TryInto; use std::env; -use std::fmt; -use std::fs; use std::hash::{self, Hasher}; -use std::io::{self, Read}; use std::num::NonZeroU64; use std::path::{Path, PathBuf}; use std::str::{self, FromStr}; @@ -328,15 +325,12 @@ use std::time::SystemTime; use anyhow::{bail, format_err}; use filetime::FileTime; use log::{debug, info, warn}; -use md5::{Digest, Md5}; -use object::Object; use serde; use serde::de; use serde::ser; use serde::{Deserialize, Serialize}; -use sha1::Sha1; -use sha2::Sha256; +use crate::core::compiler::content_hash::*; use crate::core::compiler::unit_graph::UnitDep; use crate::core::Package; use crate::util; @@ -351,75 +345,6 @@ use super::job::{ }; use super::{BuildContext, Context, FileFlavor, Unit}; -// While source files can't currently be > 4Gb, bin files could be. -pub type FileSize = NonZeroU64; - -#[derive(Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash, Serialize, Deserialize)] -pub struct FileHash { - kind: FileHashAlgorithm, - // arrays > 32 are currently hard work so broken in twain. - hash_front: [u8; 32], - hash_back: [u8; 32], -} - -impl FileHash { - pub fn from_hex_rev(kind: FileHashAlgorithm, hash: &str) -> Option { - let mut decoded = hex::decode(hash).ok()?; - decoded.reverse(); // The slice is stored as little endien. - Some(Self::from_slice(kind, &decoded[..])) - } - - pub fn from_hex(kind: FileHashAlgorithm, hash: &str) -> Option { - let decoded = hex::decode(hash).ok()?; - Some(Self::from_slice(kind, &decoded[..])) - } - - pub fn from_slice_rev(kind: FileHashAlgorithm, hash: &[u8]) -> FileHash { - let mut v = hash.to_vec(); - v.reverse(); - Self::from_slice(kind, &v) - } - - pub fn from_slice(kind: FileHashAlgorithm, hash: &[u8]) -> FileHash { - let mut result = FileHash { - kind, - hash_front: [0u8; 32], - hash_back: [0u8; 32], - }; - let len = hash.len(); - let front_len = std::cmp::min(len, 32); - (&mut result.hash_front[..front_len]).copy_from_slice(&hash[..front_len]); - if len > 32 { - let back_len = std::cmp::min(len, 64); - (&mut result.hash_back[..back_len - 32]).copy_from_slice(&hash[32..back_len]); - } - result - } - - pub fn write_to_vec(&self, vec: &mut Vec) { - vec.push(match self.kind { - FileHashAlgorithm::Md5 => 1, - FileHashAlgorithm::Sha1 => 2, - FileHashAlgorithm::Sha256 => 3, - FileHashAlgorithm::Svh => 4, - }); - vec.extend_from_slice(&self.hash_front[..]); - vec.extend_from_slice(&self.hash_back[..]); - } -} - -impl fmt::Display for FileHash { - fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> Result<(), fmt::Error> { - write!( - formatter, - "{}:{}{}", - self.kind, - hex::encode(self.hash_front), - hex::encode(self.hash_back) - ) - } -} - /// Determines if a `unit` is up-to-date, and if not prepares necessary work to /// update the persisted fingerprint. /// @@ -758,87 +683,6 @@ enum LocalFingerprint { RerunIfEnvChanged { var: String, val: Option }, } -/// Cache of file properties that we know to be true. -pub struct CurrentFileprint { - pub(crate) mtime: FileTime, - /// This will be None if not yet looked up. - size: Option, - /// This will be None if not yet calculated for this file. - hash: Option, -} - -impl CurrentFileprint { - pub(crate) fn new(mtime: FileTime) -> Self { - CurrentFileprint { - mtime, - size: None, - hash: None, - } - } - - pub(crate) fn size(&mut self, file: &Path) -> Option<&FileSize> { - if self.size.is_none() { - self.size = Self::calc_size(file); - } - self.size.as_ref() - } - - pub(crate) fn calc_size(file: &Path) -> Option { - std::fs::metadata(file) - .map(|metadata| NonZeroU64::new(metadata.len())) - .ok() - .flatten() - } - - pub(crate) fn file_hash(&mut self, path: &Path, algo: FileHashAlgorithm) -> Option<&FileHash> { - if self.hash.is_none() { - self.hash = Self::calc_hash(path, algo); - } - self.hash.as_ref() - } - - fn invoke_digest(reader: &mut R, kind: FileHashAlgorithm) -> Option - where - D: Digest, - R: Read, - { - let mut hasher = D::new(); - let mut buffer = [0; 1024]; - loop { - let count = reader.read(&mut buffer).ok()?; - if count == 0 { - break; - } - hasher.update(&buffer[..count]); - } - Some(FileHash::from_slice_rev(kind, &hasher.finalize()[..])) - } - - pub(crate) fn calc_hash(path: &Path, algo: FileHashAlgorithm) -> Option { - if let Ok(file) = fs::File::open(path) { - let mut reader: io::BufReader = io::BufReader::new(file); - - match algo { - FileHashAlgorithm::Md5 => Self::invoke_digest::(&mut reader, algo), - FileHashAlgorithm::Sha1 => Self::invoke_digest::(&mut reader, algo), - FileHashAlgorithm::Sha256 => Self::invoke_digest::(&mut reader, algo), - FileHashAlgorithm::Svh => { - if path.extension() == Some(std::ffi::OsStr::new("rlib")) { - get_svh_from_ar(reader) - } else if path.extension() == Some(std::ffi::OsStr::new("rmeta")) { - get_svh_from_rmeta_file(reader) - } else { - get_svh_from_object_file(reader) - } - } - } - } else { - debug!("HASH failed to open path {:?}", path); - None - } - } -} - enum StaleItem { MissingFile(PathBuf), ChangedFileTime { @@ -1349,8 +1193,8 @@ impl Fingerprint { if stale.is_none() { if let Some(reference_hash) = &reference.hash { - let current_hash = file_facts - .file_hash(dep_in, reference_hash.kind); + let current_hash = + file_facts.file_hash(dep_in, reference_hash); if let Some(file_facts_hash) = current_hash { if reference_hash != file_facts_hash { @@ -2144,7 +1988,7 @@ fn find_stale_file( // Same size but mtime is different. Probably there's no change... // compute hash and compare to prevent change cascade... - let current_hash = current.file_hash(path, reference_hash.kind); + let current_hash = current.file_hash(path, reference_hash); if current_hash != Some(reference_hash) { // FIXME? We could fail a little faster by seeing if any size discrepencies on _any_ file before checking hashes. // but not sure it's worth the additional complexity. @@ -2175,104 +2019,6 @@ fn find_stale_file( None } -fn get_svh_from_ar(reader: R) -> Option { - let mut ar = ar::Archive::new(reader); - while let Some(file) = ar.next_entry() { - match file { - Ok(file) => { - let s = String::from_utf8_lossy(&file.header().identifier()); - if s.ends_with(".rmeta") { - if let Some(index) = s.rfind('-') { - return FileHash::from_hex_rev( - FileHashAlgorithm::Svh, - &s[index + 1..(s.len() - ".rmeta".len())], - ); - } - } - } - Err(err) => debug!("Error reading ar: {}", err), - } - } - debug!("HASH svh not found in archive file."); - None -} - -// While this looks expensive, this is only invoked for dylibs -// with an incorrect timestamp the file is the expected size. -fn get_svh_from_object_file(mut reader: R) -> Option { - let mut data = vec![]; - reader.read_to_end(&mut data).ok()?; - let obj = object::read::File::parse(&data).ok()?; - - for (_idx, sym) in obj.symbols() { - if let Some(name) = sym.name() { - if name.starts_with("_rust_svh") { - if let Some(index) = name.rfind('_') { - return FileHash::from_hex_rev(FileHashAlgorithm::Svh, &name[index + 1..]); - } - } - } - } - debug!("HASH svh not found in object file"); - None -} - -fn get_svh_from_rmeta_file(mut reader: R) -> Option { - let mut data = Vec::with_capacity(128); - data.resize(128, 0); - reader.read_exact(&mut data).ok()?; - parse_svh(&data) -} - -fn parse_svh(data: &[u8]) -> Option { - debug!("HASHXX {:?}", data); - const METADATA_VERSION_LOC: usize = 7; - - if data[METADATA_VERSION_LOC] < 6 { - debug!("svh not available as compiler not recent enough."); - return None; - } - let rust_svh_len_pos = 12; - assert_eq!(data[rust_svh_len_pos], 64_u8); - let data = &data[rust_svh_len_pos + 1..]; - Some(FileHash::from_slice(FileHashAlgorithm::Svh, &data[..64])) -} - -#[derive(Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Debug, Serialize, Deserialize, Hash)] -pub enum FileHashAlgorithm { - /// Svh is embedded as a symbol or for rmeta is in the .rmeta filename inside a .rlib. - Svh, - Md5, - Sha1, - Sha256, -} - -impl FromStr for FileHashAlgorithm { - type Err = anyhow::Error; - - fn from_str(s: &str) -> Result { - match s { - "md5" => Ok(FileHashAlgorithm::Md5), - "svh" => Ok(FileHashAlgorithm::Svh), - "sha1" => Ok(FileHashAlgorithm::Sha1), - "sha256" => Ok(FileHashAlgorithm::Sha256), - _ => Err(anyhow::Error::msg("Unknown hash type")), - } - } -} - -impl std::fmt::Display for FileHashAlgorithm { - fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> { - match self { - Self::Md5 => fmt.write_fmt(format_args!("md5"))?, - Self::Svh => fmt.write_fmt(format_args!("svh"))?, - Self::Sha1 => fmt.write_fmt(format_args!("sha1"))?, - Self::Sha256 => fmt.write_fmt(format_args!("sha256"))?, - }; - Ok(()) - } -} - enum DepInfoPathType { // src/, e.g. src/lib.rs PackageRootRelative, @@ -2397,22 +2143,6 @@ pub struct RustcDepInfo { pub env: Vec<(String, Option)>, } -/// A file location with identifying properties: size and hash. -#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Debug, Hash, Serialize, Deserialize)] -pub struct Fileprint { - pub path: PathBuf, //TODO is this field needed on here? - pub size: Option, - pub hash: Option, -} - -impl Fileprint { - pub(crate) fn from_md5(path: PathBuf) -> Self { - let size = CurrentFileprint::calc_size(&path); - let hash = CurrentFileprint::calc_hash(&path, FileHashAlgorithm::Md5); - Self { path, size, hash } - } -} - // Same as `RustcDepInfo` except avoids absolute paths as much as possible to // allow moving around the target directory. // @@ -2639,47 +2369,3 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult Ok(ret) } } - -#[cfg(test)] -mod test { - use super::{parse_svh, FileHash, FileHashAlgorithm}; - - #[test] - fn test_no_svh_below_metadata_version_6() { - let vec: Vec = vec![ - 114, 117, 115, 116, 0, 0, 0, 5, 0, 13, 201, 29, 16, 114, 117, 115, 116, 99, 32, 49, 46, - 52, 57, 46, 48, 45, 100, 101, 118, 16, 49, 100, 54, 102, 97, 101, 54, 56, 102, 54, 100, - 52, 99, 99, 98, 102, 3, 115, 116, 100, 241, 202, 128, 159, 207, 146, 173, 243, 204, 1, - 0, 2, 17, 45, 48, 55, 56, 97, 54, 56, 51, 101, 99, 57, 57, 55, 50, 48, 53, 50, 4, 99, - 111, 114, 101, 190, 159, 241, 243, 142, 194, 224, 233, 82, 0, 2, 17, 45, 51, 101, 97, - 54, 98, 97, 57, 97, 57, 56, 99, 50, 57, 51, 54, 100, 17, 99, 111, 109, 112, 105, 108, - 101, 114, 95, 98, 117, 105, 108, - ]; - // r u s t / metadata version | base | r u s t c ' ' 1 . 4 9 . 0 - d e v |size| svh--> - assert!(parse_svh(&vec).is_none()); - } - - #[test] //TODO update the bits so svh is before rust version! - fn test_svh_in_metadata_version_6() { - let vec: Vec = vec![ - 114, 117, 115, 116, 0, 0, 0, 6, 0, 17, 73, 215, 64, 29, 94, 138, 62, 252, 69, 252, 224, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, - 114, 117, 115, 116, 99, 32, 49, 46, 53, 48, 46, 48, 45, 100, 101, 118, 3, 115, 116, - 100, 220, 173, 135, 163, 173, 242, 162, 182, 228, 1, 0, 2, 17, 45, 48, 55, 56, 97, 54, - 56, 51, 101, 99, 57, 57, 55, 50, 48, 53, 50, - ]; - // r u s t / metadata version | base | size=64 | svh | sizee_of_version | r u s t c ' ' 1 . 4 9 . 0 - d e v | base_pointer_points_here - assert_eq!(parse_svh(&vec), FileHash::from_hex(FileHashAlgorithm::Svh, "1d5e8a3efc45fce0")); - } - - #[test] - fn file_hash() { - let from_str = FileHash::from_hex(FileHashAlgorithm::Svh, "0102030405060708"); - let from_slice = Some(FileHash::from_slice( - FileHashAlgorithm::Svh, - &[1, 2, 3, 4, 5, 6, 7, 8], - )); - assert_eq!(from_str, from_slice); - } -} diff --git a/src/cargo/core/compiler/mod.rs b/src/cargo/core/compiler/mod.rs index 53849e300e1..00b48e01fc2 100644 --- a/src/cargo/core/compiler/mod.rs +++ b/src/cargo/core/compiler/mod.rs @@ -3,6 +3,7 @@ mod build_context; mod build_plan; mod compilation; mod compile_kind; +mod content_hash; mod context; mod crate_type; mod custom_build; diff --git a/src/cargo/core/compiler/output_depinfo.rs b/src/cargo/core/compiler/output_depinfo.rs index e43544c4529..9971ad08fa4 100644 --- a/src/cargo/core/compiler/output_depinfo.rs +++ b/src/cargo/core/compiler/output_depinfo.rs @@ -29,7 +29,7 @@ use std::path::Path; use log::debug; use super::{fingerprint, Context, FileFlavor, Unit}; -use crate::core::compiler::fingerprint::Fileprint; +use crate::core::compiler::content_hash::Fileprint; use crate::util::{internal, paths, CargoResult}; fn render_filename>(path: P, basedir: Option<&str>) -> CargoResult {