From 4c03fdbc831349ea7f45b68331f554ade859abf5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 28 Sep 2023 22:09:17 +0200 Subject: [PATCH 01/23] feat!: add `hash::bytes_with_header()`, also make it 32bit compatible. That way it's possible to hash entire files as object. Previously it wasn't possible to read more than u32::MAX bytes even on 32 bit system even though we are streaming the data. --- gix-features/src/hash.rs | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/gix-features/src/hash.rs b/gix-features/src/hash.rs index 435e018e906..33054a7836c 100644 --- a/gix-features/src/hash.rs +++ b/gix-features/src/hash.rs @@ -96,7 +96,7 @@ pub fn hasher(kind: gix_hash::Kind) -> Sha1 { #[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))] pub fn bytes_of_file( path: &std::path::Path, - num_bytes_from_start: usize, + num_bytes_from_start: u64, kind: gix_hash::Kind, progress: &mut dyn crate::progress::Progress, should_interrupt: &std::sync::atomic::AtomicBool, @@ -110,28 +110,42 @@ pub fn bytes_of_file( ) } -/// Similar to [`bytes_of_file`], but operates on an already open file. +/// Similar to [`bytes_of_file`], but operates on a stream of bytes. #[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))] pub fn bytes( read: &mut dyn std::io::Read, - num_bytes_from_start: usize, + num_bytes_from_start: u64, kind: gix_hash::Kind, progress: &mut dyn crate::progress::Progress, should_interrupt: &std::sync::atomic::AtomicBool, ) -> std::io::Result { - let mut hasher = hasher(kind); + bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt) +} + +/// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind. +#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))] +pub fn bytes_with_hasher( + read: &mut dyn std::io::Read, + num_bytes_from_start: u64, + mut hasher: Sha1, + progress: &mut dyn crate::progress::Progress, + should_interrupt: &std::sync::atomic::AtomicBool, +) -> std::io::Result { let start = std::time::Instant::now(); // init progress before the possibility for failure, as convenience in case people want to recover - progress.init(Some(num_bytes_from_start), crate::progress::bytes()); + progress.init( + Some(num_bytes_from_start as prodash::progress::Step), + crate::progress::bytes(), + ); const BUF_SIZE: usize = u16::MAX as usize; let mut buf = [0u8; BUF_SIZE]; let mut bytes_left = num_bytes_from_start; while bytes_left > 0 { - let out = &mut buf[..BUF_SIZE.min(bytes_left)]; + let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)]; read.read_exact(out)?; - bytes_left -= out.len(); + bytes_left -= out.len() as u64; progress.inc_by(out.len()); hasher.update(out); if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) { From 9e7c3e12528833023bf37ada4e2e90989d848e99 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 28 Sep 2023 22:20:56 +0200 Subject: [PATCH 02/23] adapt to `gix-features` --- gix-index/src/file/init.rs | 2 +- gix-index/src/file/verify.rs | 2 +- gix-pack/src/data/input/entries_to_bytes.rs | 2 +- gix-pack/src/verify.rs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gix-index/src/file/init.rs b/gix-index/src/file/init.rs index 99f4be258cf..ed13bed6e9b 100644 --- a/gix-index/src/file/init.rs +++ b/gix-index/src/file/init.rs @@ -78,7 +78,7 @@ impl File { let num_bytes_to_hash = meta.len() - object_hash.len_in_bytes() as u64; let actual_hash = gix_features::hash::bytes( &mut file, - num_bytes_to_hash as usize, + num_bytes_to_hash, object_hash, &mut gix_features::progress::Discard, &Default::default(), diff --git a/gix-index/src/file/verify.rs b/gix-index/src/file/verify.rs index 3890acd9524..1c0dc7539b6 100644 --- a/gix-index/src/file/verify.rs +++ b/gix-index/src/file/verify.rs @@ -27,7 +27,7 @@ impl File { let should_interrupt = AtomicBool::new(false); let actual = gix_features::hash::bytes_of_file( &self.path, - num_bytes_to_hash as usize, + num_bytes_to_hash, checksum.kind(), &mut gix_features::progress::Discard, &should_interrupt, diff --git a/gix-pack/src/data/input/entries_to_bytes.rs b/gix-pack/src/data/input/entries_to_bytes.rs index 27cd046482f..dc2ac58848a 100644 --- a/gix-pack/src/data/input/entries_to_bytes.rs +++ b/gix-pack/src/data/input/entries_to_bytes.rs @@ -97,7 +97,7 @@ where let interrupt_never = std::sync::atomic::AtomicBool::new(false); let digest = hash::bytes( &mut self.output, - num_bytes_written as usize, + num_bytes_written, self.object_hash, &mut gix_features::progress::Discard, &interrupt_never, diff --git a/gix-pack/src/verify.rs b/gix-pack/src/verify.rs index d502ada389a..fff99c75d12 100644 --- a/gix-pack/src/verify.rs +++ b/gix-pack/src/verify.rs @@ -39,7 +39,7 @@ pub fn checksum_on_disk_or_mmap( let data_len_without_trailer = data.len() - object_hash.len_in_bytes(); let actual = match gix_features::hash::bytes_of_file( data_path, - data_len_without_trailer, + data_len_without_trailer as u64, object_hash, progress, should_interrupt, From 15689482351691079652d7b9b80b5aaecb7da863 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 28 Sep 2023 08:08:57 +0200 Subject: [PATCH 03/23] add more traces to potentially longer-running operations --- gix-index/src/file/write.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gix-index/src/file/write.rs b/gix-index/src/file/write.rs index 47a4cde9661..0623f6c59c3 100644 --- a/gix-index/src/file/write.rs +++ b/gix-index/src/file/write.rs @@ -22,6 +22,7 @@ impl File { mut out: impl std::io::Write, options: write::Options, ) -> std::io::Result<(Version, gix_hash::ObjectId)> { + let _span = gix_features::trace::detail!("gix_index::File::write_to()", skip_hash = options.skip_hash); let (version, hash) = if options.skip_hash { let out: &mut dyn std::io::Write = &mut out; let version = self.state.write_to(out, options)?; @@ -40,6 +41,7 @@ impl File { /// /// Note that the hash produced will be stored which is why we need to be mutable. pub fn write(&mut self, options: write::Options) -> Result<(), Error> { + let _span = gix_features::trace::detail!("gix_index::File::write()", path = ?self.path); let mut lock = std::io::BufWriter::with_capacity( 64 * 1024, gix_lock::File::acquire_to_update_resource(&self.path, gix_lock::acquire::Fail::Immediately, None)?, From a1794b537c036e5469f7c2808d460bffcc6eeb5a Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 28 Sep 2023 10:26:43 +0200 Subject: [PATCH 04/23] fix!: use `PathStorageRef` in place of `&PathStorage` --- gix-index/src/access/mod.rs | 8 ++++---- gix-index/src/entry/mod.rs | 8 ++++++-- gix-index/src/entry/stat.rs | 31 ++++++++++++++++--------------- gix-index/src/lib.rs | 7 ++++--- 4 files changed, 30 insertions(+), 24 deletions(-) diff --git a/gix-index/src/access/mod.rs b/gix-index/src/access/mod.rs index 08cb2302011..2a3e85f1120 100644 --- a/gix-index/src/access/mod.rs +++ b/gix-index/src/access/mod.rs @@ -4,7 +4,7 @@ use std::ops::Range; use bstr::{BStr, ByteSlice, ByteVec}; use filetime::FileTime; -use crate::{entry, extension, Entry, PathStorage, State, Version}; +use crate::{entry, extension, Entry, PathStorage, PathStorageRef, State, Version}; // TODO: integrate this somehow, somewhere, depending on later usage. #[allow(dead_code)] @@ -41,7 +41,7 @@ impl State { &self.entries } /// Return our path backing, the place which keeps all paths one after another, with entries storing only the range to access them. - pub fn path_backing(&self) -> &PathStorage { + pub fn path_backing(&self) -> &PathStorageRef { &self.path_backing } @@ -58,7 +58,7 @@ impl State { /// Return mutable entries along with their path, as obtained from `backing`. pub fn entries_mut_with_paths_in<'state, 'backing>( &'state mut self, - backing: &'backing PathStorage, + backing: &'backing PathStorageRef, ) -> impl Iterator { self.entries.iter_mut().map(move |e| { let path = backing[e.path.clone()].as_bstr(); @@ -279,7 +279,7 @@ impl State { } /// Return a writable slice to entries and read-access to their path storage at the same time. - pub fn entries_mut_and_pathbacking(&mut self) -> (&mut [Entry], &PathStorage) { + pub fn entries_mut_and_pathbacking(&mut self) -> (&mut [Entry], &PathStorageRef) { (&mut self.entries, &self.path_backing) } diff --git a/gix-index/src/entry/mod.rs b/gix-index/src/entry/mod.rs index e680e08b06d..2f257ef633d 100644 --- a/gix-index/src/entry/mod.rs +++ b/gix-index/src/entry/mod.rs @@ -1,4 +1,8 @@ -/// The stage of an entry, one of 0 = base, 1 = ours, 2 = theirs +/// The stage of an entry, one of… +/// * 0 = no conflict, +/// * 1 = base, +/// * 2 = ours, +/// * 3 = theirs pub type Stage = u32; /// @@ -71,7 +75,7 @@ mod access { backing[self.path.clone()].as_bstr() } - /// Return an entry's stage. + /// Return an entry's stage. See [entry::Stage] for possible values. pub fn stage(&self) -> entry::Stage { self.flags.stage() } diff --git a/gix-index/src/entry/stat.rs b/gix-index/src/entry/stat.rs index 7bde717633f..5e60f8540be 100644 --- a/gix-index/src/entry/stat.rs +++ b/gix-index/src/entry/stat.rs @@ -92,21 +92,22 @@ impl Stat { size: fstat.len() as u32, }; #[cfg(unix)] - use std::os::unix::fs::MetadataExt; - #[cfg(unix)] - let res = Stat { - mtime: mtime.try_into().unwrap_or_default(), - ctime: ctime.try_into().unwrap_or_default(), - // truncating to 32 bits is fine here because - // that's what the linux syscalls returns - // just rust upcasts to 64 bits for some reason? - // numbers this large are impractical anyway (that's a lot of hard-drives). - dev: fstat.dev() as u32, - ino: fstat.ino() as u32, - uid: fstat.uid(), - gid: fstat.gid(), - // truncation to 32 bits is on purpose (git does the same). - size: fstat.len() as u32, + let res = { + use std::os::unix::fs::MetadataExt; + Stat { + mtime: mtime.try_into().unwrap_or_default(), + ctime: ctime.try_into().unwrap_or_default(), + // truncating to 32 bits is fine here because + // that's what the linux syscalls returns + // just rust upcasts to 64 bits for some reason? + // numbers this large are impractical anyway (that's a lot of hard-drives). + dev: fstat.dev() as u32, + ino: fstat.ino() as u32, + uid: fstat.uid(), + gid: fstat.gid(), + // truncation to 32 bits is on purpose (git does the same). + size: fstat.len() as u32, + } }; Ok(res) diff --git a/gix-index/src/lib.rs b/gix-index/src/lib.rs index 1fb1f68009b..55b332a8280 100644 --- a/gix-index/src/lib.rs +++ b/gix-index/src/lib.rs @@ -121,9 +121,10 @@ mod impls { f, "{} {}{:?} {} {}", match entry.flags.stage() { - 0 => "BASE ", - 1 => "OURS ", - 2 => "THEIRS ", + 0 => " ", + 1 => "BASE ", + 2 => "OURS ", + 3 => "THEIRS ", _ => "UNKNOWN", }, if entry.flags.is_empty() { From c044919383b14f9355cf279add64297c2acedeed Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 23 Sep 2023 11:04:49 +0200 Subject: [PATCH 05/23] add symlink checking for `gix status` The motivation for this is https://github.com/git/git/commit/f62ce3de9dd4803f50f65e17f5fc03c7bdb49c40 . --- gix-status/src/index_as_worktree/function.rs | 16 +-- gix-status/src/lib.rs | 10 ++ gix-status/src/stack.rs | 59 ++++++++++ .../fixtures/generated-archives/.gitignore | 1 + gix-status/tests/fixtures/symlink_stack.sh | 20 ++++ gix-status/tests/stack/mod.rs | 107 ++++++++++++++++++ gix-status/tests/worktree.rs | 4 + 7 files changed, 209 insertions(+), 8 deletions(-) create mode 100644 gix-status/src/stack.rs create mode 100755 gix-status/tests/fixtures/symlink_stack.sh create mode 100644 gix-status/tests/stack/mod.rs diff --git a/gix-status/src/index_as_worktree/function.rs b/gix-status/src/index_as_worktree/function.rs index e576cc35c44..a22071e8862 100644 --- a/gix-status/src/index_as_worktree/function.rs +++ b/gix-status/src/index_as_worktree/function.rs @@ -72,9 +72,9 @@ where State { buf: Vec::new(), odb_buf: Vec::new(), + path_stack: crate::SymlinkCheck::new(worktree.to_owned()), timestamp, path_backing, - worktree, options, }, compare, @@ -104,9 +104,8 @@ struct State<'a, 'b> { buf: Vec, odb_buf: Vec, timestamp: FileTime, - // path_cache: fs::Cache TODO path cache + path_stack: crate::SymlinkCheck, path_backing: &'b [u8], - worktree: &'a Path, options: &'a Options, } @@ -195,12 +194,13 @@ impl<'index> State<'_, 'index> { E: std::error::Error + Send + Sync + 'static, Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E>, { - // TODO fs cache let worktree_path = gix_path::try_from_bstr(git_path).map_err(|_| Error::IllformedUtf8)?; - let worktree_path = self.worktree.join(worktree_path); + let worktree_path = match self.path_stack.verified_path(worktree_path.as_ref()) { + Ok(path) => path, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(Some(Change::Removed)), + Err(err) => return Err(Error::Io(err)), + }; let metadata = match worktree_path.symlink_metadata() { - // TODO: check if any parent directory is a symlink - // we need to use fs::Cache for that Ok(metadata) if metadata.is_dir() => { // index entries are normally only for files/symlinks // if a file turned into a directory it was removed @@ -256,7 +256,7 @@ impl<'index> State<'_, 'index> { let read_file = WorktreeBlob { buf: &mut self.buf, - path: &worktree_path, + path: worktree_path, entry, options: self.options, }; diff --git a/gix-status/src/lib.rs b/gix-status/src/lib.rs index 225668ced78..95b32e5dec2 100644 --- a/gix-status/src/lib.rs +++ b/gix-status/src/lib.rs @@ -31,3 +31,13 @@ pub trait Pathspec { /// `is_dir` is `true` if `relative_path` is a directory. fn is_included(&mut self, relative_path: &BStr, is_dir: Option) -> bool; } + +/// A stack that validates we are not going through a symlink in a way that is read-only. +/// +/// It can efficiently validate paths when these are queried in sort-order, which leads to each component +/// to only be checked once. +pub struct SymlinkCheck { + inner: gix_fs::Stack, +} + +mod stack; diff --git a/gix-status/src/stack.rs b/gix-status/src/stack.rs new file mode 100644 index 00000000000..3f3cd8f9797 --- /dev/null +++ b/gix-status/src/stack.rs @@ -0,0 +1,59 @@ +use crate::SymlinkCheck; +use gix_fs::Stack; +use std::path::{Path, PathBuf}; + +impl SymlinkCheck { + /// Create a new stack that starts operating at `root`. + pub fn new(root: PathBuf) -> Self { + Self { + inner: gix_fs::Stack::new(root), + } + } + + /// Return a valid filesystem path located in our root by appending `relative_path`, which is guaranteed to + /// not pass through a symbolic link. That way the caller can be sure to not be misled by an attacker that + /// tries to make us reach outside of the repository. + /// + /// Note that the file pointed to by `relative_path` may still be a symbolic link, or not exist at all, + /// and that an error may also be produced if directories on the path leading to the leaf + /// component of `relative_path` are missing. + /// + /// ### Note + /// + /// On windows, no verification is performed, instead only the combined path is provided as usual. + pub fn verified_path(&mut self, relative_path: &Path) -> std::io::Result<&Path> { + self.inner.make_relative_path_current(relative_path, &mut Delegate)?; + Ok(self.inner.current()) + } +} + +struct Delegate; + +impl gix_fs::stack::Delegate for Delegate { + fn push_directory(&mut self, _stack: &Stack) -> std::io::Result<()> { + Ok(()) + } + + fn push(&mut self, is_last_component: bool, stack: &Stack) -> std::io::Result<()> { + #[cfg(windows)] + { + Ok(()) + } + #[cfg(not(windows))] + { + if is_last_component { + return Ok(()); + } + + if stack.current().symlink_metadata()?.is_symlink() { + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + "Cannot step through symlink to perform an lstat", + )); + } + Ok(()) + } + } + + fn pop_directory(&mut self) {} +} diff --git a/gix-status/tests/fixtures/generated-archives/.gitignore b/gix-status/tests/fixtures/generated-archives/.gitignore index ee165f3a1c5..c0de66a1edd 100644 --- a/gix-status/tests/fixtures/generated-archives/.gitignore +++ b/gix-status/tests/fixtures/generated-archives/.gitignore @@ -1,2 +1,3 @@ status_unchanged.tar.xz status_changed.tar.xz +symlink_stack.tar.xz diff --git a/gix-status/tests/fixtures/symlink_stack.sh b/gix-status/tests/fixtures/symlink_stack.sh new file mode 100755 index 00000000000..479460c00a9 --- /dev/null +++ b/gix-status/tests/fixtures/symlink_stack.sh @@ -0,0 +1,20 @@ +#!/bin/bash +set -eu -o pipefail + +mkdir base; +(cd base + touch file + mkdir dir + touch dir/file-in-dir + (cd dir + ln -s file-in-dir filelink + mkdir subdir + ln -s subdir dirlink + ) + + ln -s file root-filelink + ln -s dir root-dirlink +) + +ln -s base symlink-base + diff --git a/gix-status/tests/stack/mod.rs b/gix-status/tests/stack/mod.rs new file mode 100644 index 00000000000..edc4d52028a --- /dev/null +++ b/gix-status/tests/stack/mod.rs @@ -0,0 +1,107 @@ +fn stack() -> gix_status::SymlinkCheck { + stack_in("base") +} + +fn stack_in(dir: &str) -> gix_status::SymlinkCheck { + gix_status::SymlinkCheck::new( + gix_testtools::scripted_fixture_read_only_standalone("symlink_stack.sh") + .expect("valid script") + .join(dir), + ) +} + +#[test] +fn paths_not_going_through_symlink_directories_are_ok_and_point_to_correct_item() -> crate::Result { + for root in ["base", "symlink-base"] { + let mut stack = stack_in(root); + for (rela_path, expectation) in [ + ("root-filelink", is_symlink as fn(&std::fs::Metadata) -> bool), + ("root-dirlink", is_symlinked_dir), + ("file", is_file), + ("dir/file-in-dir", is_file), + ("dir", is_dir), + ("dir/subdir", is_dir), + ("dir/filelink", is_symlink), + ("dir/dirlink", is_symlinked_dir), + ] { + assert!( + expectation(&stack.verified_path(rela_path.as_ref())?.symlink_metadata()?), + "{rela_path:?} expectation failed" + ); + } + } + Ok(()) +} + +#[test] +fn leaf_file_does_not_have_to_exist() -> crate::Result { + assert!(!stack().verified_path("dir/does-not-exist".as_ref())?.exists()); + Ok(()) +} + +#[test] +#[cfg(not(windows))] +fn intermediate_directories_have_to_exist_or_not_found_error() -> crate::Result { + assert_eq!( + stack() + .verified_path("nonexisting-dir/file".as_ref()) + .unwrap_err() + .kind(), + std::io::ErrorKind::NotFound + ); + Ok(()) +} + +#[test] +#[cfg(windows)] +fn intermediate_directories_do_not_have_exist_for_success() -> crate::Result { + assert!(stack().verified_path("nonexisting-dir/file".as_ref()).is_ok()); + Ok(()) +} + +#[test] +#[cfg_attr( + windows, + ignore = "on windows, symlinks appear to be files or dirs, is_symlink() doesn't work" +)] +fn paths_leading_through_symlinks_are_rejected() { + let mut stack = stack(); + assert_eq!( + stack + .verified_path("root-dirlink/file-in-dir".as_ref()) + .unwrap_err() + .kind(), + std::io::ErrorKind::Other, + "root-dirlink is a symlink to a directory" + ); + + assert_eq!( + stack.verified_path("dir/dirlink/nothing".as_ref()).unwrap_err().kind(), + std::io::ErrorKind::Other, + "root-dirlink is a symlink to a directory" + ); +} + +fn is_symlink(m: &std::fs::Metadata) -> bool { + if cfg!(windows) { + // On windows, symlinks can't be seen, at least not through std + m.is_file() + } else { + m.is_symlink() + } +} + +fn is_symlinked_dir(m: &std::fs::Metadata) -> bool { + if cfg!(windows) { + // On windows, symlinks can't be seen, at least not through std + m.is_dir() + } else { + m.is_symlink() + } +} +fn is_file(m: &std::fs::Metadata) -> bool { + m.is_file() +} +fn is_dir(m: &std::fs::Metadata) -> bool { + m.is_dir() +} diff --git a/gix-status/tests/worktree.rs b/gix-status/tests/worktree.rs index 05bcf94ca2b..d5fbdb9b732 100644 --- a/gix-status/tests/worktree.rs +++ b/gix-status/tests/worktree.rs @@ -1,2 +1,6 @@ +pub use gix_testtools::Result; + mod status; use status::*; + +mod stack; From 53de126ea571ef9ed911e66c26a4c36cfdc7e0dd Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 27 Sep 2023 08:40:06 +0200 Subject: [PATCH 06/23] feat!: add support for submodule status Previously, submodules where ignored. Now they are treated correctly as 'directory' which is compared to what's in the worktree. We also simplify blob handling. --- gitoxide-core/src/repository/status.rs | 2 +- gix-status/src/index_as_worktree/function.rs | 76 +++++++---- gix-status/src/index_as_worktree/mod.rs | 6 +- gix-status/src/index_as_worktree/recorder.rs | 35 ++++- .../{content.rs => traits.rs} | 47 ++++--- gix-status/src/index_as_worktree/types.rs | 26 +++- .../status_submodule.tar.xz | 3 + gix-status/tests/fixtures/status_submodule.sh | 34 +++++ gix-status/tests/status/index_as_worktree.rs | 129 ++++++++++++++---- 9 files changed, 272 insertions(+), 86 deletions(-) rename gix-status/src/index_as_worktree/{content.rs => traits.rs} (65%) create mode 100644 gix-status/tests/fixtures/generated-archives/status_submodule.tar.xz create mode 100644 gix-status/tests/fixtures/status_submodule.sh diff --git a/gitoxide-core/src/repository/status.rs b/gitoxide-core/src/repository/status.rs index 2b892d53190..05849d8185f 100644 --- a/gitoxide-core/src/repository/status.rs +++ b/gitoxide-core/src/repository/status.rs @@ -4,7 +4,7 @@ use gix::bstr::{BStr, BString}; use gix::index::Entry; use gix::prelude::FindExt; use gix::Progress; -use gix_status::index_as_worktree::content::FastEq; +use gix_status::index_as_worktree::traits::FastEq; use gix_status::index_as_worktree::Change; pub enum Submodules { diff --git a/gix-status/src/index_as_worktree/function.rs b/gix-status/src/index_as_worktree/function.rs index a22071e8862..31a532678a0 100644 --- a/gix-status/src/index_as_worktree/function.rs +++ b/gix-status/src/index_as_worktree/function.rs @@ -7,8 +7,8 @@ use gix_features::parallel::{in_parallel_if, Reduce}; use crate::{ index_as_worktree::{ - content, - content::CompareBlobs, + traits, + traits::{CompareBlobs, SubmoduleStatus}, types::{Error, Options}, Change, VisitEntry, }, @@ -26,11 +26,12 @@ use crate::{ /// like whether a file has conflicts, and files that were added with `git add` are shown as a special /// changes despite not technically requiring a change to the index since `git add` already added the file to the index. #[allow(clippy::too_many_arguments)] -pub fn index_as_worktree<'index, T, Find, E>( +pub fn index_as_worktree<'index, T, U, Find, E1, E2>( index: &'index mut gix_index::State, worktree: &Path, - collector: &mut impl VisitEntry<'index, ContentChange = T>, + collector: &mut impl VisitEntry<'index, ContentChange = T, SubmoduleStatus = U>, compare: impl CompareBlobs + Send + Clone, + submodule: impl SubmoduleStatus + Send + Clone, find: Find, progress: &mut dyn gix_features::progress::Progress, pathspec: impl Pathspec + Send + Clone, @@ -38,8 +39,10 @@ pub fn index_as_worktree<'index, T, Find, E>( ) -> Result<(), Error> where T: Send, - E: std::error::Error + Send + Sync + 'static, - Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E> + Send + Clone, + U: Send, + E1: std::error::Error + Send + Sync + 'static, + E2: std::error::Error + Send + Sync + 'static, + Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E1> + Send + Clone, { // the order is absolutely critical here we use the old timestamp to detect racy index entries // (modified at or after the last index update) during the index update we then set those @@ -78,16 +81,17 @@ where options, }, compare, + submodule, find, pathspec, ) } }, - |entries, (state, diff, find, pathspec)| { + |entries, (state, blobdiff, submdule, find, pathspec)| { entries .iter_mut() .filter_map(|entry| { - let res = state.process(entry, diff, find, pathspec); + let res = state.process(entry, blobdiff, submdule, find, pathspec); count.fetch_add(1, Ordering::Relaxed); res }) @@ -109,19 +113,21 @@ struct State<'a, 'b> { options: &'a Options, } -type StatusResult<'index, T> = Result<(&'index gix_index::Entry, &'index BStr, Option>, bool), Error>; +type StatusResult<'index, T, U> = Result<(&'index gix_index::Entry, &'index BStr, Option>, bool), Error>; impl<'index> State<'_, 'index> { - fn process( + fn process( &mut self, entry: &'index mut gix_index::Entry, diff: &mut impl CompareBlobs, + submodule: &mut impl SubmoduleStatus, find: &mut Find, pathspec: &mut impl Pathspec, - ) -> Option> + ) -> Option> where - E: std::error::Error + Send + Sync + 'static, - Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E>, + E1: std::error::Error + Send + Sync + 'static, + E2: std::error::Error + Send + Sync + 'static, + Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E1>, { let conflict = match entry.stage() { 0 => false, @@ -140,7 +146,7 @@ impl<'index> State<'_, 'index> { if !pathspec.is_included(path, Some(false)) { return None; } - let status = self.compute_status(&mut *entry, path, diff, find); + let status = self.compute_status(&mut *entry, path, diff, submodule, find); Some(status.map(move |status| (&*entry, path, status, conflict))) } @@ -183,18 +189,20 @@ impl<'index> State<'_, 'index> { /// which is a constant. /// /// Adapted from [here](https://github.com/Byron/gitoxide/pull/805#discussion_r1164676777). - fn compute_status( + fn compute_status( &mut self, entry: &mut gix_index::Entry, - git_path: &BStr, + rela_path: &BStr, diff: &mut impl CompareBlobs, + submodule: &mut impl SubmoduleStatus, find: &mut Find, - ) -> Result>, Error> + ) -> Result>, Error> where - E: std::error::Error + Send + Sync + 'static, - Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E>, + E1: std::error::Error + Send + Sync + 'static, + E2: std::error::Error + Send + Sync + 'static, + Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E1>, { - let worktree_path = gix_path::try_from_bstr(git_path).map_err(|_| Error::IllformedUtf8)?; + let worktree_path = gix_path::try_from_bstr(rela_path).map_err(|_| Error::IllformedUtf8)?; let worktree_path = match self.path_stack.verified_path(worktree_path.as_ref()) { Ok(path) => path, Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(Some(Change::Removed)), @@ -206,11 +214,17 @@ impl<'index> State<'_, 'index> { // if a file turned into a directory it was removed // the only exception here are submodules which are // part of the index despite being directories - // - // TODO: submodules: - // if entry.mode.contains(Mode::COMMIT) && - // resolve_gitlink_ref(ce->name, "HEAD", &sub)) - return Ok(Some(Change::Removed)); + if entry.mode.is_submodule() { + let status = submodule + .status(entry, rela_path) + .map_err(|err| Error::SubmoduleStatus { + rela_path: rela_path.into(), + source: Box::new(err), + })?; + return Ok(status.map(|status| Change::SubmoduleModification(status))); + } else { + return Ok(Some(Change::Removed)); + } } Ok(metadata) => metadata, Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Some(Change::Removed)), @@ -265,7 +279,7 @@ impl<'index> State<'_, 'index> { id: &entry.id, find, }; - let content_change = diff.compare_blobs::(entry, metadata.len() as usize, read_file, read_blob)?; + let content_change = diff.compare_blobs(entry, metadata.len() as usize, read_file, read_blob)?; // This file is racy clean! Set the size to 0 so we keep detecting this as the file is updated. if content_change.is_some() && racy_clean { entry.stat.size = 0; @@ -288,8 +302,10 @@ struct ReduceChange<'a, 'index, T: VisitEntry<'index>> { phantom: PhantomData, } -impl<'index, T, C: VisitEntry<'index, ContentChange = T>> Reduce for ReduceChange<'_, 'index, C> { - type Input = Vec>; +impl<'index, T, U, C: VisitEntry<'index, ContentChange = T, SubmoduleStatus = U>> Reduce + for ReduceChange<'_, 'index, C> +{ + type Input = Vec>; type FeedProduce = (); @@ -327,7 +343,7 @@ where find: Find, } -impl<'a> content::ReadDataOnce<'a, Error> for WorktreeBlob<'a> { +impl<'a> traits::ReadDataOnce<'a> for WorktreeBlob<'a> { fn read_data(self) -> Result<&'a [u8], Error> { let res = read::data_to_buf_with_meta( self.path, @@ -339,7 +355,7 @@ impl<'a> content::ReadDataOnce<'a, Error> for WorktreeBlob<'a> { } } -impl<'a, Find, E> content::ReadDataOnce<'a, Error> for OdbBlob<'a, Find, E> +impl<'a, Find, E> traits::ReadDataOnce<'a> for OdbBlob<'a, Find, E> where E: std::error::Error + Send + Sync + 'static, Find: FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E>, diff --git a/gix-status/src/index_as_worktree/mod.rs b/gix-status/src/index_as_worktree/mod.rs index 8294a54e8ac..bd2369cb5f1 100644 --- a/gix-status/src/index_as_worktree/mod.rs +++ b/gix-status/src/index_as_worktree/mod.rs @@ -4,8 +4,8 @@ mod types; pub use types::{Change, Error, Options, VisitEntry}; mod recorder; -pub use recorder::Recorder; +pub use recorder::{Record, Recorder}; -/// -pub mod content; pub(crate) mod function; +/// +pub mod traits; diff --git a/gix-status/src/index_as_worktree/recorder.rs b/gix-status/src/index_as_worktree/recorder.rs index 48beb25a313..7f098e86214 100644 --- a/gix-status/src/index_as_worktree/recorder.rs +++ b/gix-status/src/index_as_worktree/recorder.rs @@ -3,25 +3,46 @@ use gix_index as index; use crate::index_as_worktree::{Change, VisitEntry}; +/// A record of a change. +/// +/// It's created either if there is a conflict or a change, or both. +#[derive(Debug)] +pub struct Record<'index, T, U> { + /// The index entry that is changed. + pub entry: &'index index::Entry, + /// The path to the entry. + pub relative_path: &'index BStr, + /// The change itself, or `None` if there is only a conflict. + pub change: Option>, + /// information about the conflict itself + pub conflict: bool, +} + /// Convenience implementation of [`VisitEntry`] that collects all non-trivial changes into a `Vec`. #[derive(Debug, Default)] -pub struct Recorder<'index, T = ()> { +pub struct Recorder<'index, T = (), U = ()> { /// collected changes, index entries without conflicts or changes are excluded. - pub records: Vec<(&'index BStr, Option>, bool)>, + pub records: Vec>, } -impl<'index, T: Send> VisitEntry<'index> for Recorder<'index, T> { +impl<'index, T: Send, U: Send> VisitEntry<'index> for Recorder<'index, T, U> { type ContentChange = T; + type SubmoduleStatus = U; fn visit_entry( &mut self, - _entry: &'index index::Entry, + entry: &'index index::Entry, rela_path: &'index BStr, - status: Option>, + change: Option>, conflict: bool, ) { - if conflict || status.is_some() { - self.records.push((rela_path, status, conflict)) + if conflict || change.is_some() { + self.records.push(Record { + entry, + relative_path: rela_path, + change, + conflict, + }) } } } diff --git a/gix-status/src/index_as_worktree/content.rs b/gix-status/src/index_as_worktree/traits.rs similarity index 65% rename from gix-status/src/index_as_worktree/content.rs rename to gix-status/src/index_as_worktree/traits.rs index aa775821a7a..fa0a39370fa 100644 --- a/gix-status/src/index_as_worktree/content.rs +++ b/gix-status/src/index_as_worktree/traits.rs @@ -1,3 +1,5 @@ +use crate::index_as_worktree::Error; +use bstr::BStr; use gix_hash::ObjectId; use gix_index as index; use index::Entry; @@ -11,22 +13,33 @@ pub trait CompareBlobs { /// and allow reading its bytes using `worktree_blob`. /// If this function returns `None` the `entry` and the `worktree_blob` are assumed to be identical. /// Use `entry_blob` to obtain the data for the blob referred to by `entry`, allowing comparisons of the data itself. - fn compare_blobs<'a, E>( + fn compare_blobs<'a, 'b>( &mut self, - entry: &'a gix_index::Entry, + entry: &gix_index::Entry, worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a, E>, - entry_blob: impl ReadDataOnce<'a, E>, - ) -> Result, E>; + worktree_blob: impl ReadDataOnce<'a>, + entry_blob: impl ReadDataOnce<'b>, + ) -> Result, Error>; +} + +/// Determine the status of a submodule, which always indicates that it changed if present. +pub trait SubmoduleStatus { + /// The status result, describing in which way the submodule changed. + type Output; + /// A custom error that may occur while computing the submodule status. + type Error: std::error::Error + Send + Sync + 'static; + + /// Compute the status of the submodule at `entry` and `rela_path`, or return `None` if no change was detected. + fn status(&mut self, entry: &gix_index::Entry, rela_path: &BStr) -> Result, Self::Error>; } /// Lazy borrowed access to blob data. -pub trait ReadDataOnce<'a, E> { +pub trait ReadDataOnce<'a> { /// Returns the contents of this blob. /// /// This potentially performs IO and other expensive operations /// and should only be called when necessary. - fn read_data(self) -> Result<&'a [u8], E>; + fn read_data(self) -> Result<&'a [u8], Error>; } /// Compares to blobs by comparing their size and oid, and only looks at the file if @@ -37,13 +50,13 @@ pub struct FastEq; impl CompareBlobs for FastEq { type Output = (); - fn compare_blobs<'a, E>( + fn compare_blobs<'a, 'b>( &mut self, - entry: &'a Entry, + entry: &Entry, worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a, E>, - _entry_blob: impl ReadDataOnce<'a, E>, - ) -> Result, E> { + worktree_blob: impl ReadDataOnce<'a>, + _entry_blob: impl ReadDataOnce<'b>, + ) -> Result, Error> { // make sure to account for racily smudged entries here so that they don't always keep // showing up as modified even after their contents have changed again, to a potentially // unmodified state. That means that we want to ignore stat.size == 0 for non_empty_blobs. @@ -66,13 +79,13 @@ pub struct HashEq; impl CompareBlobs for HashEq { type Output = ObjectId; - fn compare_blobs<'a, E>( + fn compare_blobs<'a, 'b>( &mut self, - entry: &'a Entry, + entry: &Entry, _worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a, E>, - _entry_blob: impl ReadDataOnce<'a, E>, - ) -> Result, E> { + worktree_blob: impl ReadDataOnce<'a>, + _entry_blob: impl ReadDataOnce<'b>, + ) -> Result, Error> { let blob = worktree_blob.read_data()?; let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, blob); Ok((entry.id != file_hash).then_some(file_hash)) diff --git a/gix-status/src/index_as_worktree/types.rs b/gix-status/src/index_as_worktree/types.rs index 35cf45401b1..b3324379b97 100644 --- a/gix-status/src/index_as_worktree/types.rs +++ b/gix-status/src/index_as_worktree/types.rs @@ -1,4 +1,4 @@ -use bstr::BStr; +use bstr::{BStr, BString}; /// The error returned by [`status()`](crate::index_as_worktree()). #[derive(Debug, thiserror::Error)] @@ -12,6 +12,11 @@ pub enum Error { Io(#[from] std::io::Error), #[error("Failed to obtain blob from object database")] Find(#[source] Box), + #[error("Could not determine status for submodule at '{rela_path}'")] + SubmoduleStatus { + rela_path: BString, + source: Box, + }, } #[derive(Clone, Debug, Default)] @@ -29,7 +34,7 @@ pub struct Options { /// How an index entry needs to be changed to obtain the destination worktree state, i.e. `entry.apply(this_change) == worktree-entry`. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] -pub enum Change { +pub enum Change { /// This corresponding file does not exist in the worktree anymore. Removed, /// The type of file changed compared to the worktree, i.e. a symlink s now a file. @@ -40,11 +45,20 @@ pub enum Change { /// Indicates that one of the stat changes was an executable bit change /// which is a significant change itself. executable_bit_changed: bool, - /// The output of the [`CompareBlobs`](crate::index_as_worktree::content::CompareBlobs) run on this entry. + /// The output of the [`CompareBlobs`](crate::index_as_worktree::traits::CompareBlobs) run on this entry. /// If there is no content change and only the executable bit - /// changed than this is `None`. + /// changed then this is `None`. content_change: Option, }, + /// A submodule is initialized and checked out, and there was modification to either: + /// + /// * the `HEAD` as compared to the superproject's desired commit for `HEAD` + /// * the worktree has at least one modified file + /// * there is at least one untracked file + /// + /// The exact nature of the modification is handled by the caller which may retain information per submodule or + /// re-compute details as needed when seeing this variant. + SubmoduleModification(U), /// An index entry that correspond to an untracked worktree file marked with `git add --intent-to-add`. /// /// This means it's not available in the object database yet or the index was created from, @@ -56,6 +70,8 @@ pub enum Change { pub trait VisitEntry<'index> { /// Data generated by comparing an entry with a file. type ContentChange; + /// Data obtained when checking the submodule status. + type SubmoduleStatus; /// Observe the `change` of `entry` at the repository-relative `rela_path`, indicating whether /// or not it has a `conflict`. /// If `change` is `None`, there is no change. @@ -63,7 +79,7 @@ pub trait VisitEntry<'index> { &mut self, entry: &'index gix_index::Entry, rela_path: &'index BStr, - change: Option>, + change: Option>, conflict: bool, ); } diff --git a/gix-status/tests/fixtures/generated-archives/status_submodule.tar.xz b/gix-status/tests/fixtures/generated-archives/status_submodule.tar.xz new file mode 100644 index 00000000000..1c7546c8c07 --- /dev/null +++ b/gix-status/tests/fixtures/generated-archives/status_submodule.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2d7b75f62872a98ad670d1239e5ff6867b004a84ffacb050c1f4bc06b5a1d17 +size 14876 diff --git a/gix-status/tests/fixtures/status_submodule.sh b/gix-status/tests/fixtures/status_submodule.sh new file mode 100644 index 00000000000..280612d6a97 --- /dev/null +++ b/gix-status/tests/fixtures/status_submodule.sh @@ -0,0 +1,34 @@ +#!/bin/bash +set -eu -o pipefail + +git init -q module1 +(cd module1 + touch this + git add . + git commit -q -m c1 + echo hello >> this + git commit -q -am c2 +) + +git init no-change +(cd no-change + git submodule add ../module1 m1 + git commit -m "add module 1" +) + +cp -R no-change deleted-dir +(cd deleted-dir + rm -Rf m1 +) + +cp -R no-change type-change +(cd type-change + rm -Rf m1 + touch m1 +) + +cp -R no-change empty-dir-no-change +(cd empty-dir-no-change + rm -Rf m1 + mkdir m1 +) diff --git a/gix-status/tests/status/index_as_worktree.rs b/gix-status/tests/status/index_as_worktree.rs index 49ede98167d..3fedec2574e 100644 --- a/gix-status/tests/status/index_as_worktree.rs +++ b/gix-status/tests/status/index_as_worktree.rs @@ -7,11 +7,13 @@ use bstr::BStr; use filetime::{set_file_mtime, FileTime}; use gix_index as index; use gix_index::Entry; +use gix_status::index_as_worktree::traits::SubmoduleStatus; +use gix_status::index_as_worktree::Record; use gix_status::{ index_as_worktree, index_as_worktree::{ - content::{CompareBlobs, FastEq, ReadDataOnce}, - Change, Options, Recorder, + traits::{CompareBlobs, FastEq, ReadDataOnce}, + Change as WorktreeChange, Options, Recorder, }, }; @@ -28,12 +30,32 @@ const TEST_OPTIONS: index::entry::stat::Options = index::entry::stat::Options { use_stdev: false, }; +type Change = WorktreeChange<(), ()>; + fn fixture(name: &str, expected_status: &[(&BStr, Option, bool)]) { fixture_filtered(name, &[], expected_status) } +fn submodule_fixture(name: &str, expected_status: &[(&BStr, Option, bool)]) { + fixture_filtered_detailed("status_submodule", name, &[], expected_status, false) +} + +fn submodule_fixture_status(name: &str, expected_status: &[(&BStr, Option, bool)], submodule_dirty: bool) { + fixture_filtered_detailed("status_submodule", name, &[], expected_status, submodule_dirty) +} + fn fixture_filtered(name: &str, pathspecs: &[&str], expected_status: &[(&BStr, Option, bool)]) { - let worktree = fixture_path(name); + fixture_filtered_detailed(name, "", pathspecs, expected_status, false) +} + +fn fixture_filtered_detailed( + name: &str, + subdir: &str, + pathspecs: &[&str], + expected_status: &[(&BStr, Option, bool)], + submodule_dirty: bool, +) { + let worktree = fixture_path(name).join(subdir); let git_dir = worktree.join(".git"); let mut index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, false, Default::default()).unwrap(); @@ -45,6 +67,7 @@ fn fixture_filtered(name: &str, pathspecs: &[&str], expected_status: &[(&BStr, O &worktree, &mut recorder, FastEq, + SubmoduleStatusMock { dirty: submodule_dirty }, |_, _| Ok::<_, std::convert::Infallible>(gix_object::BlobRef { data: &[] }), &mut gix_features::progress::Discard, Pathspec(search), @@ -55,8 +78,31 @@ fn fixture_filtered(name: &str, pathspecs: &[&str], expected_status: &[(&BStr, O }, ) .unwrap(); - recorder.records.sort_unstable_by_key(|(name, _, _)| *name); - assert_eq!(recorder.records, expected_status) + recorder.records.sort_unstable_by_key(|r| r.relative_path); + assert_eq!(records_to_tuple(recorder.records), expected_status) +} + +fn records_to_tuple<'index>( + records: impl IntoIterator>, +) -> Vec<(&'index BStr, Option, bool)> { + records + .into_iter() + .map(|r| (r.relative_path, r.change, r.conflict)) + .collect() +} + +#[derive(Clone)] +struct SubmoduleStatusMock { + dirty: bool, +} + +impl SubmoduleStatus for SubmoduleStatusMock { + type Output = (); + type Error = std::convert::Infallible; + + fn status(&mut self, _entry: &Entry, _rela_path: &BStr) -> Result, Self::Error> { + Ok(self.dirty.then_some(())) + } } fn to_pathspecs(input: &[&str]) -> Vec { @@ -71,10 +117,10 @@ fn removed() { fixture( "status_removed", &[ - (BStr::new(b"dir/content"), Some(Change::Removed), false), - (BStr::new(b"dir/sub-dir/symlink"), Some(Change::Removed), false), - (BStr::new(b"empty"), Some(Change::Removed), false), - (BStr::new(b"executable"), Some(Change::Removed), false), + (BStr::new(b"dir/content"), Some(Change::Removed), NO_CONFLICT), + (BStr::new(b"dir/sub-dir/symlink"), Some(Change::Removed), NO_CONFLICT), + (BStr::new(b"empty"), Some(Change::Removed), NO_CONFLICT), + (BStr::new(b"executable"), Some(Change::Removed), NO_CONFLICT), ], ); @@ -82,17 +128,46 @@ fn removed() { "status_removed", &["dir"], &[ - (BStr::new(b"dir/content"), Some(Change::Removed), false), - (BStr::new(b"dir/sub-dir/symlink"), Some(Change::Removed), false), + (BStr::new(b"dir/content"), Some(Change::Removed), NO_CONFLICT), + (BStr::new(b"dir/sub-dir/symlink"), Some(Change::Removed), NO_CONFLICT), ], ); } +#[test] +fn subomdule_nochange() { + submodule_fixture("no-change", &[]); +} + +#[test] +fn subomdule_deleted_dir() { + submodule_fixture("deleted-dir", &[(BStr::new(b"m1"), Some(Change::Removed), NO_CONFLICT)]); +} + +#[test] +fn subomdule_typechange() { + submodule_fixture("type-change", &[(BStr::new(b"m1"), Some(Change::Type), NO_CONFLICT)]); +} + +#[test] +fn subomdule_empty_dir_no_change() { + submodule_fixture("empty-dir-no-change", &[]); +} + +#[test] +fn subomdule_empty_dir_no_change_is_passed_to_submodule_handler() { + submodule_fixture_status( + "empty-dir-no-change", + &[(BStr::new(b"m1"), Some(Change::SubmoduleModification(())), NO_CONFLICT)], + true, + ); +} + #[test] fn intent_to_add() { fixture( "status_intent_to_add", - &[(BStr::new(b"content"), Some(Change::IntentToAdd), false)], + &[(BStr::new(b"content"), Some(Change::IntentToAdd), NO_CONFLICT)], ); } @@ -131,7 +206,7 @@ fn modified() { executable_bit_changed: true, content_change: None, }), - false, + NO_CONFLICT, ), ( BStr::new(b"dir/content2"), @@ -139,21 +214,23 @@ fn modified() { executable_bit_changed: false, content_change: Some(()), }), - false, + NO_CONFLICT, ), - (BStr::new(b"empty"), Some(Change::Type), false), + (BStr::new(b"empty"), Some(Change::Type), NO_CONFLICT), ( BStr::new(b"executable"), Some(Change::Modification { executable_bit_changed: true, content_change: Some(()), }), - false, + NO_CONFLICT, ), ], ); } +const NO_CONFLICT: bool = false; + #[test] fn racy_git() { let timestamp = 940040400; @@ -172,13 +249,13 @@ fn racy_git() { impl CompareBlobs for CountCalls { type Output = (); - fn compare_blobs<'a, E>( + fn compare_blobs<'a, 'b>( &mut self, - entry: &'a Entry, + entry: &Entry, worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a, E>, - entry_blob: impl ReadDataOnce<'a, E>, - ) -> Result, E> { + worktree_blob: impl ReadDataOnce<'a>, + entry_blob: impl ReadDataOnce<'b>, + ) -> Result, gix_status::index_as_worktree::Error> { self.0.fetch_add(1, Ordering::Relaxed); self.1 .compare_blobs(entry, worktree_blob_size, worktree_blob, entry_blob) @@ -203,6 +280,7 @@ fn racy_git() { worktree, &mut recorder, counter.clone(), + SubmoduleStatusMock { dirty: false }, |_, _| Err(std::io::Error::new(std::io::ErrorKind::Other, "no odb access expected")), &mut gix_features::progress::Discard, Pathspec::default(), @@ -214,7 +292,11 @@ fn racy_git() { ) .unwrap(); assert_eq!(count.load(Ordering::Relaxed), 0, "no blob content is accessed"); - assert_eq!(recorder.records, &[], "the testcase triggers racy git"); + assert_eq!( + records_to_tuple(recorder.records), + &[], + "the testcase triggers racy git" + ); // Now we also backdate the index timestamp to match the artificially created // mtime above this is now a realistic realworld race-condition which should trigger racy git @@ -226,6 +308,7 @@ fn racy_git() { worktree, &mut recorder, counter, + SubmoduleStatusMock { dirty: false }, |_, _| Err(std::io::Error::new(std::io::ErrorKind::Other, "no odb access expected")), &mut gix_features::progress::Discard, Pathspec::default(), @@ -242,7 +325,7 @@ fn racy_git() { "no we needed to access the blob content" ); assert_eq!( - recorder.records, + records_to_tuple(recorder.records), &[( BStr::new(b"content"), Some(Change::Modification { From 0d01eb28ebb2305873726ba1892204fd151f4c4f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 27 Sep 2023 11:32:26 +0200 Subject: [PATCH 07/23] feat!: provide statistics at the end of a index status operation --- gix-status/src/index_as_worktree/function.rs | 90 ++++++- gix-status/src/index_as_worktree/mod.rs | 2 +- gix-status/src/index_as_worktree/types.rs | 43 +++- gix-status/src/read.rs | 4 - gix-status/tests/status/index_as_worktree.rs | 242 ++++++++++++++----- 5 files changed, 302 insertions(+), 79 deletions(-) diff --git a/gix-status/src/index_as_worktree/function.rs b/gix-status/src/index_as_worktree/function.rs index 31a532678a0..5cb4cb8ef0c 100644 --- a/gix-status/src/index_as_worktree/function.rs +++ b/gix-status/src/index_as_worktree/function.rs @@ -1,4 +1,4 @@ -use std::sync::atomic::Ordering; +use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; use std::{io, marker::PhantomData, path::Path}; use bstr::BStr; @@ -10,21 +10,29 @@ use crate::{ traits, traits::{CompareBlobs, SubmoduleStatus}, types::{Error, Options}, - Change, VisitEntry, + Change, Outcome, VisitEntry, }, read, Pathspec, }; /// Calculates the changes that need to be applied to an `index` to match the state of the `worktree` and makes them -/// observable in `collector`, along with information produced by `compare` which gets to see blobs that may have changes. +/// observable in `collector`, along with information produced by `compare` which gets to see blobs that may have changes, and +/// `submodule` which can take a look at submodules in detail to produce status information. /// `options` are used to configure the operation. /// /// Note that `index` is updated with the latest seen stat information from the worktree, and its timestamp is adjusted to -/// the current time for which it will be considered fresh. +/// the current time for which it will be considered fresh as long as it is included which depends on `pathspec`. /// -/// Note that this isn't technically quite what this function does as this also provides some additional information, -/// like whether a file has conflicts, and files that were added with `git add` are shown as a special -/// changes despite not technically requiring a change to the index since `git add` already added the file to the index. +/// ### Note +/// +/// Technically, this function does more as this also provides additional information, like whether a file has conflicts, +/// and files that were added with `git add` are shown as a special as well. It also updates index entry stats like `git status` would +/// if it had to determine the hash. If that happened, the index should be written back, see [Outcome::skipped] +/// The latter is a 'change' that is not technically requiring a change to the index since `git add` already added the +/// file to the index, but didn't hash it. +/// +/// Thus some care has to be taken to do the right thing when letting the index match the worktree by evaluating the changes observed +/// by the `collector`. #[allow(clippy::too_many_arguments)] pub fn index_as_worktree<'index, T, U, Find, E1, E2>( index: &'index mut gix_index::State, @@ -36,7 +44,7 @@ pub fn index_as_worktree<'index, T, U, Find, E1, E2>( progress: &mut dyn gix_features::progress::Progress, pathspec: impl Pathspec + Send + Clone, options: Options, -) -> Result<(), Error> +) -> Result where T: Send, U: Send, @@ -60,7 +68,18 @@ where .prefixed_entries_range(pathspec.common_prefix()) .unwrap_or(0..index.entries().len()); let (entries, path_backing) = index.entries_mut_and_pathbacking(); + let num_entries = entries.len(); let entries = &mut entries[range]; + + let _span = gix_features::trace::detail!("gix_status::index_as_worktree", + num_entries = entries.len(), + chunk_size = chunk_size, + thread_limit = ?thread_limit); + + let entries_skipped_by_common_prefix = num_entries - entries.len(); + let (skipped_by_pathspec, skipped_by_entry_flags, symlink_metadata_calls, entries_updated) = Default::default(); + let (worktree_bytes, worktree_reads, odb_bytes, odb_reads, racy_clean) = Default::default(); + progress.init(entries.len().into(), gix_features::progress::count("files")); let count = progress.counter(); @@ -70,6 +89,10 @@ where thread_limit, { let options = &options; + let (skipped_by_pathspec, skipped_by_entry_flags) = (&skipped_by_pathspec, &skipped_by_entry_flags); + let (symlink_metadata_calls, entries_updated) = (&symlink_metadata_calls, &entries_updated); + let (racy_clean, worktree_bytes) = (&racy_clean, &worktree_bytes); + let (worktree_reads, odb_bytes, odb_reads) = (&worktree_reads, &odb_bytes, &odb_reads); move |_| { ( State { @@ -79,6 +102,16 @@ where timestamp, path_backing, options, + + skipped_by_pathspec, + skipped_by_entry_flags, + symlink_metadata_calls, + entries_updated, + racy_clean, + worktree_reads, + worktree_bytes, + odb_reads, + odb_bytes, }, compare, submodule, @@ -101,7 +134,20 @@ where collector, phantom: PhantomData, }, - ) + )?; + + Ok(Outcome { + entries_skipped_by_common_prefix, + entries_skipped_by_pathspec: skipped_by_pathspec.load(Ordering::Relaxed), + entries_skipped_by_entry_flags: skipped_by_entry_flags.load(Ordering::Relaxed), + entries_updated: entries_updated.load(Ordering::Relaxed), + symlink_metadata_calls: symlink_metadata_calls.load(Ordering::Relaxed), + racy_clean: racy_clean.load(Ordering::Relaxed), + worktree_files_read: worktree_reads.load(Ordering::Relaxed), + worktree_bytes: worktree_bytes.load(Ordering::Relaxed), + odb_objects_read: odb_reads.load(Ordering::Relaxed), + odb_bytes: odb_bytes.load(Ordering::Relaxed), + }) } struct State<'a, 'b> { @@ -111,6 +157,16 @@ struct State<'a, 'b> { path_stack: crate::SymlinkCheck, path_backing: &'b [u8], options: &'a Options, + + skipped_by_pathspec: &'a AtomicUsize, + skipped_by_entry_flags: &'a AtomicUsize, + symlink_metadata_calls: &'a AtomicUsize, + entries_updated: &'a AtomicUsize, + racy_clean: &'a AtomicUsize, + worktree_bytes: &'a AtomicU64, + worktree_reads: &'a AtomicUsize, + odb_bytes: &'a AtomicU64, + odb_reads: &'a AtomicUsize, } type StatusResult<'index, T, U> = Result<(&'index gix_index::Entry, &'index BStr, Option>, bool), Error>; @@ -140,10 +196,12 @@ impl<'index> State<'_, 'index> { | gix_index::entry::Flags::ASSUME_VALID | gix_index::entry::Flags::FSMONITOR_VALID, ) { + self.skipped_by_entry_flags.fetch_add(1, Ordering::Relaxed); return None; } let path = entry.path_in(self.path_backing); if !pathspec.is_included(path, Some(false)) { + self.skipped_by_pathspec.fetch_add(1, Ordering::Relaxed); return None; } let status = self.compute_status(&mut *entry, path, diff, submodule, find); @@ -208,6 +266,7 @@ impl<'index> State<'_, 'index> { Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(Some(Change::Removed)), Err(err) => return Err(Error::Io(err)), }; + self.symlink_metadata_calls.fetch_add(1, Ordering::Relaxed); let metadata = match worktree_path.symlink_metadata() { Ok(metadata) if metadata.is_dir() => { // index entries are normally only for files/symlinks @@ -265,21 +324,33 @@ impl<'index> State<'_, 'index> { racy_clean = new_stat.is_racy(self.timestamp, self.options.stat); if !racy_clean { return Ok(None); + } else { + self.racy_clean.fetch_add(1, Ordering::Relaxed); } } + self.buf.clear(); let read_file = WorktreeBlob { buf: &mut self.buf, path: worktree_path, entry, options: self.options, }; + self.odb_buf.clear(); let read_blob = OdbBlob { buf: &mut self.odb_buf, id: &entry.id, find, }; let content_change = diff.compare_blobs(entry, metadata.len() as usize, read_file, read_blob)?; + if !self.buf.is_empty() { + self.worktree_reads.fetch_add(1, Ordering::Relaxed); + self.worktree_bytes.fetch_add(self.buf.len() as u64, Ordering::Relaxed); + } + if !self.odb_buf.is_empty() { + self.odb_reads.fetch_add(1, Ordering::Relaxed); + self.odb_bytes.fetch_add(self.odb_buf.len() as u64, Ordering::Relaxed); + } // This file is racy clean! Set the size to 0 so we keep detecting this as the file is updated. if content_change.is_some() && racy_clean { entry.stat.size = 0; @@ -292,6 +363,7 @@ impl<'index> State<'_, 'index> { } else { // don't diff against this file next time since we know the file is unchanged. entry.stat = new_stat; + self.entries_updated.fetch_add(1, Ordering::Relaxed); Ok(None) } } diff --git a/gix-status/src/index_as_worktree/mod.rs b/gix-status/src/index_as_worktree/mod.rs index bd2369cb5f1..9fe169c05de 100644 --- a/gix-status/src/index_as_worktree/mod.rs +++ b/gix-status/src/index_as_worktree/mod.rs @@ -1,7 +1,7 @@ //! Changes between an index and a worktree. /// mod types; -pub use types::{Change, Error, Options, VisitEntry}; +pub use types::{Change, Error, Options, Outcome, VisitEntry}; mod recorder; pub use recorder::{Record, Recorder}; diff --git a/gix-status/src/index_as_worktree/types.rs b/gix-status/src/index_as_worktree/types.rs index b3324379b97..e8f57e776dc 100644 --- a/gix-status/src/index_as_worktree/types.rs +++ b/gix-status/src/index_as_worktree/types.rs @@ -1,6 +1,6 @@ use bstr::{BStr, BString}; -/// The error returned by [`status()`](crate::index_as_worktree()). +/// The error returned by [index_as_worktree()`](crate::index_as_worktree()). #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { @@ -19,8 +19,8 @@ pub enum Error { }, } -#[derive(Clone, Debug, Default)] /// Options that control how the index status with a worktree is computed. +#[derive(Clone, Debug, Default)] pub struct Options { /// Capabilities of the file system which affect the status computation. pub fs: gix_fs::Capabilities, @@ -32,6 +32,43 @@ pub struct Options { pub stat: gix_index::entry::stat::Options, } +/// Provide additional information collected during the runtime of [`index_as_worktree()`](crate::index_as_worktree()). +#[derive(Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd)] +pub struct Outcome { + /// The amount of entries we didn't even traverse (and thus update with stat) due to a common prefix in pathspecs. + /// This is similar to the current working directory. + pub entries_skipped_by_common_prefix: usize, + /// The amount of entries that were skipped due to exclusion by *pathspecs*. + pub entries_skipped_by_pathspec: usize, + /// The amount of entries that were skipped as the entry flag indicated this. + pub entries_skipped_by_entry_flags: usize, + /// The amount of times we queried symlink-metadata for a file on disk. + pub symlink_metadata_calls: usize, + /// The amount of entries whose stats have been updated as its modification couldn't be determined without an expensive calculation. + /// + /// With these updates, this calculation will be avoided next time the status runs. + pub entries_updated: usize, + /// The amount of entries that were considered racy-clean - they will need thorough checking to see if they are truly clean, + /// i.e. didn't change. + pub racy_clean: usize, + + /// The amount of bytes read from the worktree in order to determine if an entry changed, across all files. + pub worktree_bytes: u64, + /// The amount of files read in full from the worktree (and into memory). + pub worktree_files_read: usize, + /// The amount of bytes read from the object database in order to determine if an entry changed, across all objects. + pub odb_bytes: u64, + /// The amount of objects read from the object database. + pub odb_objects_read: usize, +} + +impl Outcome { + /// The total amount of skipped entries, i.e. those that weren't processed at all. + pub fn skipped(&self) -> usize { + self.entries_skipped_by_common_prefix + self.entries_skipped_by_pathspec + self.entries_skipped_by_entry_flags + } +} + /// How an index entry needs to be changed to obtain the destination worktree state, i.e. `entry.apply(this_change) == worktree-entry`. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] pub enum Change { @@ -61,7 +98,7 @@ pub enum Change { SubmoduleModification(U), /// An index entry that correspond to an untracked worktree file marked with `git add --intent-to-add`. /// - /// This means it's not available in the object database yet or the index was created from, + /// This means it's not available in the object database yet /// even though now an entry exists that represents the worktree file. IntentToAdd, } diff --git a/gix-status/src/read.rs b/gix-status/src/read.rs index fd336817525..6d28ad33bf9 100644 --- a/gix-status/src/read.rs +++ b/gix-status/src/read.rs @@ -50,10 +50,6 @@ pub fn data_to_buf_with_meta<'a>( // on unix (by git) so no reason to use the try version here let symlink_path = gix_path::into_bstr(read_link(path)?); buf.extend_from_slice(&symlink_path); - // TODO: there is no reason this should be a clone - // std isn't great about allowing users to avoid allocations but we could - // simply write our own wrapper around libc::readlink which reuses the - // buffer. This would require unsafe code tough (obviously) } else { buf.clear(); File::open(path)?.read_to_end(buf)?; diff --git a/gix-status/tests/status/index_as_worktree.rs b/gix-status/tests/status/index_as_worktree.rs index 3fedec2574e..70aab86f50d 100644 --- a/gix-status/tests/status/index_as_worktree.rs +++ b/gix-status/tests/status/index_as_worktree.rs @@ -8,7 +8,7 @@ use filetime::{set_file_mtime, FileTime}; use gix_index as index; use gix_index::Entry; use gix_status::index_as_worktree::traits::SubmoduleStatus; -use gix_status::index_as_worktree::Record; +use gix_status::index_as_worktree::{Outcome, Record}; use gix_status::{ index_as_worktree, index_as_worktree::{ @@ -32,19 +32,23 @@ const TEST_OPTIONS: index::entry::stat::Options = index::entry::stat::Options { type Change = WorktreeChange<(), ()>; -fn fixture(name: &str, expected_status: &[(&BStr, Option, bool)]) { +fn fixture(name: &str, expected_status: &[(&BStr, Option, bool)]) -> Outcome { fixture_filtered(name, &[], expected_status) } -fn submodule_fixture(name: &str, expected_status: &[(&BStr, Option, bool)]) { +fn submodule_fixture(name: &str, expected_status: &[(&BStr, Option, bool)]) -> Outcome { fixture_filtered_detailed("status_submodule", name, &[], expected_status, false) } -fn submodule_fixture_status(name: &str, expected_status: &[(&BStr, Option, bool)], submodule_dirty: bool) { +fn submodule_fixture_status( + name: &str, + expected_status: &[(&BStr, Option, bool)], + submodule_dirty: bool, +) -> Outcome { fixture_filtered_detailed("status_submodule", name, &[], expected_status, submodule_dirty) } -fn fixture_filtered(name: &str, pathspecs: &[&str], expected_status: &[(&BStr, Option, bool)]) { +fn fixture_filtered(name: &str, pathspecs: &[&str], expected_status: &[(&BStr, Option, bool)]) -> Outcome { fixture_filtered_detailed(name, "", pathspecs, expected_status, false) } @@ -54,7 +58,7 @@ fn fixture_filtered_detailed( pathspecs: &[&str], expected_status: &[(&BStr, Option, bool)], submodule_dirty: bool, -) { +) -> Outcome { let worktree = fixture_path(name).join(subdir); let git_dir = worktree.join(".git"); let mut index = @@ -62,7 +66,7 @@ fn fixture_filtered_detailed( let mut recorder = Recorder::default(); let search = gix_pathspec::Search::from_specs(to_pathspecs(pathspecs), None, std::path::Path::new("")) .expect("valid specs can be normalized"); - index_as_worktree( + let outcome = index_as_worktree( &mut index, &worktree, &mut recorder, @@ -79,7 +83,8 @@ fn fixture_filtered_detailed( ) .unwrap(); recorder.records.sort_unstable_by_key(|r| r.relative_path); - assert_eq!(records_to_tuple(recorder.records), expected_status) + assert_eq!(records_to_tuple(recorder.records), expected_status); + outcome } fn records_to_tuple<'index>( @@ -114,7 +119,7 @@ fn to_pathspecs(input: &[&str]) -> Vec { #[test] fn removed() { - fixture( + let out = fixture( "status_removed", &[ (BStr::new(b"dir/content"), Some(Change::Removed), NO_CONFLICT), @@ -123,8 +128,15 @@ fn removed() { (BStr::new(b"executable"), Some(Change::Removed), NO_CONFLICT), ], ); + assert_eq!( + out, + Outcome { + symlink_metadata_calls: 4, + ..Default::default() + } + ); - fixture_filtered( + let out = fixture_filtered( "status_removed", &["dir"], &[ @@ -132,57 +144,130 @@ fn removed() { (BStr::new(b"dir/sub-dir/symlink"), Some(Change::Removed), NO_CONFLICT), ], ); + assert_eq!( + out, + Outcome { + entries_skipped_by_common_prefix: 2, + symlink_metadata_calls: 2, + ..Default::default() + } + ); } #[test] fn subomdule_nochange() { - submodule_fixture("no-change", &[]); + assert_eq!( + ignore_racyclean(submodule_fixture("no-change", &[])), + Outcome { + entries_updated: 1, + symlink_metadata_calls: 2, + worktree_bytes: 46, + worktree_files_read: 1, + ..Default::default() + } + ); } #[test] fn subomdule_deleted_dir() { - submodule_fixture("deleted-dir", &[(BStr::new(b"m1"), Some(Change::Removed), NO_CONFLICT)]); + assert_eq!( + ignore_racyclean(submodule_fixture( + "deleted-dir", + &[(BStr::new(b"m1"), Some(Change::Removed), NO_CONFLICT)] + )), + Outcome { + entries_updated: 1, + symlink_metadata_calls: 2, + worktree_files_read: 1, + worktree_bytes: 46, + ..Default::default() + } + ); } #[test] fn subomdule_typechange() { - submodule_fixture("type-change", &[(BStr::new(b"m1"), Some(Change::Type), NO_CONFLICT)]); + assert_eq!( + ignore_racyclean(submodule_fixture( + "type-change", + &[(BStr::new(b"m1"), Some(Change::Type), NO_CONFLICT)] + )), + Outcome { + entries_updated: 1, + symlink_metadata_calls: 2, + worktree_files_read: 1, + worktree_bytes: 46, + ..Default::default() + } + ) } #[test] fn subomdule_empty_dir_no_change() { - submodule_fixture("empty-dir-no-change", &[]); + assert_eq!( + ignore_racyclean(submodule_fixture("empty-dir-no-change", &[])), + Outcome { + entries_updated: 1, + symlink_metadata_calls: 2, + worktree_files_read: 1, + worktree_bytes: 46, + ..Default::default() + } + ); } #[test] fn subomdule_empty_dir_no_change_is_passed_to_submodule_handler() { - submodule_fixture_status( - "empty-dir-no-change", - &[(BStr::new(b"m1"), Some(Change::SubmoduleModification(())), NO_CONFLICT)], - true, + assert_eq!( + ignore_racyclean(submodule_fixture_status( + "empty-dir-no-change", + &[(BStr::new(b"m1"), Some(Change::SubmoduleModification(())), NO_CONFLICT)], + true, + )), + Outcome { + entries_updated: 1, + symlink_metadata_calls: 2, + worktree_files_read: 1, + worktree_bytes: 46, + ..Default::default() + } ); } #[test] fn intent_to_add() { - fixture( - "status_intent_to_add", - &[(BStr::new(b"content"), Some(Change::IntentToAdd), NO_CONFLICT)], + assert_eq!( + fixture( + "status_intent_to_add", + &[(BStr::new(b"content"), Some(Change::IntentToAdd), NO_CONFLICT)], + ), + Outcome { + symlink_metadata_calls: 1, + ..Default::default() + } ); } #[test] fn conflict() { - fixture( - "status_conflict", - &[( - BStr::new(b"content"), - Some(Change::Modification { - executable_bit_changed: false, - content_change: Some(()), - }), - true, - )], + assert_eq!( + fixture( + "status_conflict", + &[( + BStr::new(b"content"), + Some(Change::Modification { + executable_bit_changed: false, + content_change: Some(()), + }), + true, + )], + ), + Outcome { + symlink_metadata_calls: 1, + worktree_files_read: 1, + worktree_bytes: 51, + ..Default::default() + } ); } @@ -197,35 +282,45 @@ fn unchanged() { ignore = "needs work, on windows plenty of additional files are considered modified for some reason" )] fn modified() { - fixture( - "status_changed", - &[ - ( - BStr::new(b"dir/content"), - Some(Change::Modification { - executable_bit_changed: true, - content_change: None, - }), - NO_CONFLICT, - ), - ( - BStr::new(b"dir/content2"), - Some(Change::Modification { - executable_bit_changed: false, - content_change: Some(()), - }), - NO_CONFLICT, - ), - (BStr::new(b"empty"), Some(Change::Type), NO_CONFLICT), - ( - BStr::new(b"executable"), - Some(Change::Modification { - executable_bit_changed: true, - content_change: Some(()), - }), - NO_CONFLICT, - ), - ], + assert_eq!( + fixture( + "status_changed", + &[ + ( + BStr::new(b"dir/content"), + Some(Change::Modification { + executable_bit_changed: true, + content_change: None, + }), + NO_CONFLICT, + ), + ( + BStr::new(b"dir/content2"), + Some(Change::Modification { + executable_bit_changed: false, + content_change: Some(()), + }), + NO_CONFLICT, + ), + (BStr::new(b"empty"), Some(Change::Type), NO_CONFLICT), + ( + BStr::new(b"executable"), + Some(Change::Modification { + executable_bit_changed: true, + content_change: Some(()), + }), + NO_CONFLICT, + ), + ], + ), + Outcome { + symlink_metadata_calls: 5, + entries_updated: 1, + worktree_files_read: 2, + worktree_bytes: 23, + racy_clean: 1, + ..Default::default() + } ); } @@ -275,7 +370,7 @@ fn racy_git() { let count = Arc::new(AtomicUsize::new(0)); let counter = CountCalls(count.clone(), FastEq); - index_as_worktree( + let out = index_as_worktree( &mut index, worktree, &mut recorder, @@ -291,6 +386,13 @@ fn racy_git() { }, ) .unwrap(); + assert_eq!( + out, + Outcome { + symlink_metadata_calls: 1, + ..Default::default() + } + ); assert_eq!(count.load(Ordering::Relaxed), 0, "no blob content is accessed"); assert_eq!( records_to_tuple(recorder.records), @@ -303,7 +405,7 @@ fn racy_git() { // and cause proper output. index.set_timestamp(FileTime::from_unix_time(timestamp as i64, 0)); let mut recorder = Recorder::default(); - index_as_worktree( + let out = index_as_worktree( &mut index, worktree, &mut recorder, @@ -319,6 +421,16 @@ fn racy_git() { }, ) .unwrap(); + assert_eq!( + out, + Outcome { + symlink_metadata_calls: 1, + racy_clean: 1, + worktree_bytes: 3, + worktree_files_read: 1, + ..Default::default() + } + ); assert_eq!( count.load(Ordering::Relaxed), 1, @@ -362,3 +474,9 @@ impl gix_status::Pathspec for Pathspec { .map_or(false, |m| !m.is_excluded()) } } + +// This can easily happen in some fixtures, which can cause flakyness. It's time-dependent after all. +fn ignore_racyclean(mut out: Outcome) -> Outcome { + out.racy_clean = 0; + out +} From 0e10b62557fbd5b33a4aebab24e442e23304ac0a Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 27 Sep 2023 19:41:16 +0200 Subject: [PATCH 08/23] feat: a way for `status` to stop early. That way, 'is_dirty()` scenarios can be done without wasting too much time. --- gix-status/src/index_as_worktree/function.rs | 12 ++++++--- gix-status/src/index_as_worktree/types.rs | 4 +++ gix-status/tests/status/index_as_worktree.rs | 28 ++++++++++++++++++++ 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/gix-status/src/index_as_worktree/function.rs b/gix-status/src/index_as_worktree/function.rs index 5cb4cb8ef0c..f0bd80c2bdf 100644 --- a/gix-status/src/index_as_worktree/function.rs +++ b/gix-status/src/index_as_worktree/function.rs @@ -1,4 +1,4 @@ -use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}; use std::{io, marker::PhantomData, path::Path}; use bstr::BStr; @@ -23,6 +23,8 @@ use crate::{ /// Note that `index` is updated with the latest seen stat information from the worktree, and its timestamp is adjusted to /// the current time for which it will be considered fresh as long as it is included which depends on `pathspec`. /// +/// `should_interrupt` can be used to stop all processing. +/// /// ### Note /// /// Technically, this function does more as this also provides additional information, like whether a file has conflicts, @@ -43,6 +45,7 @@ pub fn index_as_worktree<'index, T, U, Find, E1, E2>( find: Find, progress: &mut dyn gix_features::progress::Progress, pathspec: impl Pathspec + Send + Clone, + should_interrupt: &AtomicBool, options: Options, ) -> Result where @@ -68,7 +71,7 @@ where .prefixed_entries_range(pathspec.common_prefix()) .unwrap_or(0..index.entries().len()); let (entries, path_backing) = index.entries_mut_and_pathbacking(); - let num_entries = entries.len(); + let mut num_entries = entries.len(); let entries = &mut entries[range]; let _span = gix_features::trace::detail!("gix_status::index_as_worktree", @@ -80,12 +83,13 @@ where let (skipped_by_pathspec, skipped_by_entry_flags, symlink_metadata_calls, entries_updated) = Default::default(); let (worktree_bytes, worktree_reads, odb_bytes, odb_reads, racy_clean) = Default::default(); + num_entries = entries.len(); progress.init(entries.len().into(), gix_features::progress::count("files")); let count = progress.counter(); in_parallel_if( || true, // TODO: heuristic: when is parallelization not worth it? Git says 500 items per thread, but to 20 threads, we can be more fine-grained though. - entries.chunks_mut(chunk_size), + gix_features::interrupt::Iter::new(entries.chunks_mut(chunk_size), should_interrupt), thread_limit, { let options = &options; @@ -137,6 +141,8 @@ where )?; Ok(Outcome { + entries_to_process: num_entries, + entries_processed: count.load(Ordering::Relaxed), entries_skipped_by_common_prefix, entries_skipped_by_pathspec: skipped_by_pathspec.load(Ordering::Relaxed), entries_skipped_by_entry_flags: skipped_by_entry_flags.load(Ordering::Relaxed), diff --git a/gix-status/src/index_as_worktree/types.rs b/gix-status/src/index_as_worktree/types.rs index e8f57e776dc..808b1747d11 100644 --- a/gix-status/src/index_as_worktree/types.rs +++ b/gix-status/src/index_as_worktree/types.rs @@ -35,6 +35,10 @@ pub struct Options { /// Provide additional information collected during the runtime of [`index_as_worktree()`](crate::index_as_worktree()). #[derive(Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd)] pub struct Outcome { + /// The total amount of entries that is to be processed. + pub entries_to_process: usize, + /// The amount of entries we actually processed. If this isn't the entire set, the operation was interrupted. + pub entries_processed: usize, /// The amount of entries we didn't even traverse (and thus update with stat) due to a common prefix in pathspecs. /// This is similar to the current working directory. pub entries_skipped_by_common_prefix: usize, diff --git a/gix-status/tests/status/index_as_worktree.rs b/gix-status/tests/status/index_as_worktree.rs index 70aab86f50d..5658404cf60 100644 --- a/gix-status/tests/status/index_as_worktree.rs +++ b/gix-status/tests/status/index_as_worktree.rs @@ -1,3 +1,4 @@ +use std::sync::atomic::AtomicBool; use std::sync::{ atomic::{AtomicUsize, Ordering}, Arc, @@ -75,6 +76,7 @@ fn fixture_filtered_detailed( |_, _| Ok::<_, std::convert::Infallible>(gix_object::BlobRef { data: &[] }), &mut gix_features::progress::Discard, Pathspec(search), + &AtomicBool::default(), Options { fs: gix_fs::Capabilities::probe(&git_dir), stat: TEST_OPTIONS, @@ -131,6 +133,8 @@ fn removed() { assert_eq!( out, Outcome { + entries_to_process: 4, + entries_processed: 4, symlink_metadata_calls: 4, ..Default::default() } @@ -147,6 +151,8 @@ fn removed() { assert_eq!( out, Outcome { + entries_to_process: 2, + entries_processed: 2, entries_skipped_by_common_prefix: 2, symlink_metadata_calls: 2, ..Default::default() @@ -159,6 +165,8 @@ fn subomdule_nochange() { assert_eq!( ignore_racyclean(submodule_fixture("no-change", &[])), Outcome { + entries_to_process: 2, + entries_processed: 2, entries_updated: 1, symlink_metadata_calls: 2, worktree_bytes: 46, @@ -176,6 +184,8 @@ fn subomdule_deleted_dir() { &[(BStr::new(b"m1"), Some(Change::Removed), NO_CONFLICT)] )), Outcome { + entries_to_process: 2, + entries_processed: 2, entries_updated: 1, symlink_metadata_calls: 2, worktree_files_read: 1, @@ -193,6 +203,8 @@ fn subomdule_typechange() { &[(BStr::new(b"m1"), Some(Change::Type), NO_CONFLICT)] )), Outcome { + entries_to_process: 2, + entries_processed: 2, entries_updated: 1, symlink_metadata_calls: 2, worktree_files_read: 1, @@ -207,6 +219,8 @@ fn subomdule_empty_dir_no_change() { assert_eq!( ignore_racyclean(submodule_fixture("empty-dir-no-change", &[])), Outcome { + entries_to_process: 2, + entries_processed: 2, entries_updated: 1, symlink_metadata_calls: 2, worktree_files_read: 1, @@ -225,6 +239,8 @@ fn subomdule_empty_dir_no_change_is_passed_to_submodule_handler() { true, )), Outcome { + entries_to_process: 2, + entries_processed: 2, entries_updated: 1, symlink_metadata_calls: 2, worktree_files_read: 1, @@ -242,6 +258,8 @@ fn intent_to_add() { &[(BStr::new(b"content"), Some(Change::IntentToAdd), NO_CONFLICT)], ), Outcome { + entries_to_process: 1, + entries_processed: 1, symlink_metadata_calls: 1, ..Default::default() } @@ -263,6 +281,8 @@ fn conflict() { )], ), Outcome { + entries_to_process: 3, + entries_processed: 3, symlink_metadata_calls: 1, worktree_files_read: 1, worktree_bytes: 51, @@ -314,6 +334,8 @@ fn modified() { ], ), Outcome { + entries_to_process: 5, + entries_processed: 5, symlink_metadata_calls: 5, entries_updated: 1, worktree_files_read: 2, @@ -379,6 +401,7 @@ fn racy_git() { |_, _| Err(std::io::Error::new(std::io::ErrorKind::Other, "no odb access expected")), &mut gix_features::progress::Discard, Pathspec::default(), + &AtomicBool::default(), Options { fs, stat: TEST_OPTIONS, @@ -389,6 +412,8 @@ fn racy_git() { assert_eq!( out, Outcome { + entries_to_process: 1, + entries_processed: 1, symlink_metadata_calls: 1, ..Default::default() } @@ -414,6 +439,7 @@ fn racy_git() { |_, _| Err(std::io::Error::new(std::io::ErrorKind::Other, "no odb access expected")), &mut gix_features::progress::Discard, Pathspec::default(), + &AtomicBool::default(), Options { fs, stat: TEST_OPTIONS, @@ -424,6 +450,8 @@ fn racy_git() { assert_eq!( out, Outcome { + entries_to_process: 1, + entries_processed: 1, symlink_metadata_calls: 1, racy_clean: 1, worktree_bytes: 3, From f1597756e5f6294f4757d90484f63e8d68bc05fa Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 28 Sep 2023 09:16:33 +0200 Subject: [PATCH 09/23] feat!: add `Stack::from_state_and_ignore_case()`. It adds `Stack::from_state_and_ignore_case()` as utility to more easily instantiate a stack the is configured correctly. This also removes the `stack::State::for_status()` method as it's not actually suitable for status retrieval per se. --- gix-worktree/src/lib.rs | 1 + gix-worktree/src/stack/delegate.rs | 18 ++----------- gix-worktree/src/stack/mod.rs | 21 +++++++++++++++ gix-worktree/src/stack/state/mod.rs | 5 ---- .../fixtures/generated-archives/.gitignore | 1 + gix-worktree/tests/fixtures/symlink_stack.sh | 26 +++++++++++++++++++ justfile | 5 +++- 7 files changed, 55 insertions(+), 22 deletions(-) create mode 100644 gix-worktree/tests/fixtures/symlink_stack.sh diff --git a/gix-worktree/src/lib.rs b/gix-worktree/src/lib.rs index 32d1d7c0e3b..b8e2e89746b 100644 --- a/gix-worktree/src/lib.rs +++ b/gix-worktree/src/lib.rs @@ -10,6 +10,7 @@ #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] #![deny(missing_docs, rust_2018_idioms, unsafe_code)] use bstr::BString; +pub use gix_glob; /// A cache for efficiently executing operations on directories and files which are encountered in sorted order. /// That way, these operations can be re-used for subsequent invocations in the same directory. diff --git a/gix-worktree/src/stack/delegate.rs b/gix-worktree/src/stack/delegate.rs index 28d8ecf348d..8e48057dea7 100644 --- a/gix-worktree/src/stack/delegate.rs +++ b/gix-worktree/src/stack/delegate.rs @@ -56,7 +56,7 @@ impl<'a, 'find> gix_fs::stack::Delegate for StackDelegate<'a, 'find> { }; match &mut self.state { #[cfg(feature = "attributes")] - State::CreateDirectoryAndAttributesStack { attributes, .. } => { + State::CreateDirectoryAndAttributesStack { attributes, .. } | State::AttributesStack(attributes) => { attributes.push_directory( stack.root(), stack.current(), @@ -89,16 +89,6 @@ impl<'a, 'find> gix_fs::stack::Delegate for StackDelegate<'a, 'find> { &mut self.statistics.ignore, )? } - #[cfg(feature = "attributes")] - State::AttributesStack(attributes) => attributes.push_directory( - stack.root(), - stack.current(), - rela_dir, - self.buf, - self.id_mappings, - &mut self.find, - &mut self.statistics.attributes, - )?, State::IgnoreStack(ignore) => ignore.push_directory( stack.root(), stack.current(), @@ -139,7 +129,7 @@ impl<'a, 'find> gix_fs::stack::Delegate for StackDelegate<'a, 'find> { self.statistics.delegate.pop_directory += 1; match &mut self.state { #[cfg(feature = "attributes")] - State::CreateDirectoryAndAttributesStack { attributes, .. } => { + State::CreateDirectoryAndAttributesStack { attributes, .. } | State::AttributesStack(attributes) => { attributes.pop_directory(); } #[cfg(feature = "attributes")] @@ -147,10 +137,6 @@ impl<'a, 'find> gix_fs::stack::Delegate for StackDelegate<'a, 'find> { attributes.pop_directory(); ignore.pop_directory(); } - #[cfg(feature = "attributes")] - State::AttributesStack(attributes) => { - attributes.pop_directory(); - } State::IgnoreStack(ignore) => { ignore.pop_directory(); } diff --git a/gix-worktree/src/stack/mod.rs b/gix-worktree/src/stack/mod.rs index c103201996f..228467f5bee 100644 --- a/gix-worktree/src/stack/mod.rs +++ b/gix-worktree/src/stack/mod.rs @@ -77,6 +77,27 @@ impl Stack { statistics: Statistics::default(), } } + + /// Create a new stack that takes into consideration the `ignore_case` result of a filesystem probe in `root`. It takes a configured + /// `state` to control what it can do, while initializing attribute or ignore files that are to be queried from the ODB using + /// `index` and `path_backing`. + /// + /// This is the easiest way to correctly setup a stack. + pub fn from_state_and_ignore_case( + root: impl Into, + ignore_case: bool, + state: State, + index: &gix_index::State, + path_backing: &gix_index::PathStorageRef, + ) -> Self { + let case = if ignore_case { + gix_glob::pattern::Case::Fold + } else { + gix_glob::pattern::Case::Sensitive + }; + let attribute_files = state.id_mappings_from_index(index, path_backing, case); + Stack::new(root, state, case, Vec::with_capacity(512), attribute_files) + } } /// Entry points for attribute query diff --git a/gix-worktree/src/stack/state/mod.rs b/gix-worktree/src/stack/state/mod.rs index 0b371425acd..30e3c609f1b 100644 --- a/gix-worktree/src/stack/state/mod.rs +++ b/gix-worktree/src/stack/state/mod.rs @@ -71,11 +71,6 @@ impl State { pub fn for_add(attributes: Attributes, ignore: Ignore) -> Self { State::AttributesAndIgnoreStack { attributes, ignore } } - - /// Configure a state for status retrieval, which needs access to ignore files only. - pub fn for_status(ignore: Ignore) -> Self { - State::IgnoreStack(ignore) - } } /// Utilities diff --git a/gix-worktree/tests/fixtures/generated-archives/.gitignore b/gix-worktree/tests/fixtures/generated-archives/.gitignore index 6f631797de0..b39f9769fde 100644 --- a/gix-worktree/tests/fixtures/generated-archives/.gitignore +++ b/gix-worktree/tests/fixtures/generated-archives/.gitignore @@ -1,2 +1,3 @@ make_ignore_and_attributes_setup.tar.xz make_attributes_baseline.tar.xz +symlink_stack.tar.xz diff --git a/gix-worktree/tests/fixtures/symlink_stack.sh b/gix-worktree/tests/fixtures/symlink_stack.sh new file mode 100644 index 00000000000..0bddba20d8d --- /dev/null +++ b/gix-worktree/tests/fixtures/symlink_stack.sh @@ -0,0 +1,26 @@ +#!/bin/bash +set -eu -o pipefail + +git init base; +(cd base + touch file + mkdir dir + touch dir/file-in-dir + (cd dir + ln -s file-in-dir filelink + mkdir subdir + ln -s subdir dirlink + ) + + ln -s file root-filelink + ln -s dir root-dirlink + +cat < .gitattributes +/file file-attr +/dir/file-in-dir dir-file-attr +EOF + git add . && git commit -m "init" +) + +ln -s base symlink-base + diff --git a/justfile b/justfile index be503bb193e..f3d6a4e1746 100755 --- a/justfile +++ b/justfile @@ -66,7 +66,10 @@ check: cargo check --features verbose-object-parsing-errors cd gix-attributes && cargo check --features serde cd gix-glob && cargo check --features serde - cd gix-worktree && cargo check --features serde + cd gix-worktree; \ + set -ex; \ + cargo check --features serde; \ + cargo check --no-default-features; cd gix-actor && cargo check --features serde cd gix-date && cargo check --features serde cargo check -p gix-tempfile --features signals From 34318fade6555af8ac6945b2366e5bb69fc823fa Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 28 Sep 2023 10:39:58 +0200 Subject: [PATCH 10/23] use latest utilities from `gix-worktree` --- gix-worktree-state/src/checkout/function.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/gix-worktree-state/src/checkout/function.rs b/gix-worktree-state/src/checkout/function.rs index 4e3f454ccdd..8b0ea923b55 100644 --- a/gix-worktree-state/src/checkout/function.rs +++ b/gix-worktree-state/src/checkout/function.rs @@ -54,11 +54,6 @@ where let num_files = files.counter(); let num_bytes = bytes.counter(); let dir = dir.into(); - let case = if options.fs.ignore_case { - gix_glob::pattern::Case::Fold - } else { - gix_glob::pattern::Case::Sensitive - }; let (chunk_size, thread_limit, num_threads) = gix_features::parallel::optimize_chunk_size_and_thread_limit( 100, index.entries().len().into(), @@ -66,12 +61,16 @@ where None, ); - let state = stack::State::for_checkout(options.overwrite_existing, std::mem::take(&mut options.attributes)); - let attribute_files = state.id_mappings_from_index(index, paths, case); let mut ctx = chunk::Context { buf: Vec::new(), options: (&options).into(), - path_cache: Stack::new(dir, state, case, Vec::with_capacity(512), attribute_files), + path_cache: Stack::from_state_and_ignore_case( + dir, + options.fs.ignore_case, + stack::State::for_checkout(options.overwrite_existing, std::mem::take(&mut options.attributes)), + index, + paths, + ), filters: options.filters, find, }; From 260c78191e88d2e2c744d85e6f75e7941c24ef43 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 28 Sep 2023 17:05:16 +0200 Subject: [PATCH 11/23] fix!: don't provide path to object-retrieval callback of `Pipeline::convert::to_git::IndexObjectFn()`. It implies that one has to be ready to fetch any kind of path from the index, even though it's always the path to the file that is currently converted. Also fix a bug that could cause it to return input as unchanged even though it was read into a buffer already. --- gix-filter/src/pipeline/convert.rs | 25 +++++++++------------ gix-filter/tests/pipeline/convert_to_git.rs | 9 +++----- 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/gix-filter/src/pipeline/convert.rs b/gix-filter/src/pipeline/convert.rs index 1294944cb8c..c98bac3e80b 100644 --- a/gix-filter/src/pipeline/convert.rs +++ b/gix-filter/src/pipeline/convert.rs @@ -21,12 +21,9 @@ pub mod configuration { /// pub mod to_git { - use bstr::BStr; - - /// A function that writes a buffer like `fn(rela_path, &mut buf)` with by tes of an object in the index that is the one - /// that should be converted. + /// A function that fills `buf` `fn(&mut buf)` with the data stored in the index of the file that should be converted. pub type IndexObjectFn<'a> = - dyn FnMut(&BStr, &mut Vec) -> Result, Box> + 'a; + dyn FnMut(&mut Vec) -> Result, Box> + 'a; /// The error returned by [Pipeline::convert_to_git()][super::Pipeline::convert_to_git()]. #[derive(Debug, thiserror::Error)] @@ -91,7 +88,7 @@ impl Pipeline { self.options.eol_config, )?; - let mut changed = false; + let mut in_buffer = false; // this is just an approximation, but it's as good as it gets without reading the actual input. let would_convert_eol = eol::convert_to_git( b"\r\n", @@ -119,12 +116,13 @@ impl Pipeline { } self.bufs.clear(); read.read_to_end(&mut self.bufs.src)?; - changed = true; + in_buffer = true; } } - if !changed && (apply_ident_filter || encoding.is_some() || would_convert_eol) { + if !in_buffer && (apply_ident_filter || encoding.is_some() || would_convert_eol) { self.bufs.clear(); src.read_to_end(&mut self.bufs.src)?; + in_buffer = true; } if let Some(encoding) = encoding { @@ -139,28 +137,25 @@ impl Pipeline { }, )?; self.bufs.swap(); - changed = true; } if eol::convert_to_git( &self.bufs.src, digest, &mut self.bufs.dest, - &mut |buf| index_object(bstr_path.as_ref(), buf), + &mut |buf| index_object(buf), eol::convert_to_git::Options { round_trip_check: self.options.crlf_roundtrip_check.to_eol_roundtrip_check(rela_path), config: self.options.eol_config, }, )? { self.bufs.swap(); - changed = true; } if apply_ident_filter && ident::undo(&self.bufs.src, &mut self.bufs.dest) { self.bufs.swap(); - changed = true; } - Ok(if changed { + Ok(if in_buffer { ToGitOutcome::Buffer(&self.bufs.src) } else { ToGitOutcome::Unchanged(src) @@ -325,12 +320,12 @@ where } } -impl ToGitOutcome<'_, R> +impl<'a, R> ToGitOutcome<'a, R> where R: std::io::Read, { /// If we contain a buffer, and not a stream, return it. - pub fn as_bytes(&self) -> Option<&[u8]> { + pub fn as_bytes(&self) -> Option<&'a [u8]> { match self { ToGitOutcome::Unchanged(_) | ToGitOutcome::Process(_) => None, ToGitOutcome::Buffer(b) => Some(b), diff --git a/gix-filter/tests/pipeline/convert_to_git.rs b/gix-filter/tests/pipeline/convert_to_git.rs index 3a627f28d43..1bc5f64c0bd 100644 --- a/gix-filter/tests/pipeline/convert_to_git.rs +++ b/gix-filter/tests/pipeline/convert_to_git.rs @@ -1,6 +1,6 @@ use std::{io::Read, path::Path}; -use bstr::{BStr, ByteSlice}; +use bstr::ByteSlice; use gix_filter::{eol, pipeline::CrlfRoundTripCheck}; use crate::{driver::apply::driver_with_process, pipeline::pipeline}; @@ -132,14 +132,11 @@ fn no_filter_means_reader_is_returned_unchanged() -> gix_testtools::Result { } #[allow(clippy::ptr_arg)] -fn no_call(_path: &BStr, _buf: &mut Vec) -> Result, Box> { +fn no_call(_buf: &mut Vec) -> Result, Box> { unreachable!("index function will not be called") } #[allow(clippy::ptr_arg)] -fn no_object_in_index( - _path: &BStr, - _buf: &mut Vec, -) -> Result, Box> { +fn no_object_in_index(_buf: &mut Vec) -> Result, Box> { Ok(None) } From 9283a9d2ec460c1380cf6c40d876f477cb552826 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 28 Sep 2023 21:23:34 +0200 Subject: [PATCH 12/23] fix!: `encode::loose_header()` now supports large objects even on 32 bit systems. Previously, larger than 4GB files wouldn't be supported, which causes problems when genrating hashes even when streaming data. --- gix-object/src/blob.rs | 6 +++--- gix-object/src/commit/write.rs | 12 ++++++------ gix-object/src/encode.rs | 2 +- gix-object/src/lib.rs | 4 ++-- gix-object/src/object/mod.rs | 8 +++++--- gix-object/src/tag/write.rs | 12 ++++++------ gix-object/src/traits.rs | 4 ++-- gix-object/src/tree/write.rs | 10 ++++++---- 8 files changed, 31 insertions(+), 27 deletions(-) diff --git a/gix-object/src/blob.rs b/gix-object/src/blob.rs index d0a42092c5c..57db54758f6 100644 --- a/gix-object/src/blob.rs +++ b/gix-object/src/blob.rs @@ -12,8 +12,8 @@ impl<'a> crate::WriteTo for BlobRef<'a> { Kind::Blob } - fn size(&self) -> usize { - self.data.len() + fn size(&self) -> u64 { + self.data.len() as u64 } } @@ -27,7 +27,7 @@ impl crate::WriteTo for Blob { Kind::Blob } - fn size(&self) -> usize { + fn size(&self) -> u64 { self.to_ref().size() } } diff --git a/gix-object/src/commit/write.rs b/gix-object/src/commit/write.rs index 667d257634d..efad2b6db53 100644 --- a/gix-object/src/commit/write.rs +++ b/gix-object/src/commit/write.rs @@ -27,9 +27,9 @@ impl crate::WriteTo for Commit { Kind::Commit } - fn size(&self) -> usize { + fn size(&self) -> u64 { let hash_in_hex = self.tree.kind().len_in_hex(); - b"tree".len() + 1 /*space*/ + hash_in_hex + 1 /* nl */ + (b"tree".len() + 1 /*space*/ + hash_in_hex + 1 /* nl */ + self.parents.iter().count() * (b"parent".len() + 1 + hash_in_hex + 1) + b"author".len() + 1 /* space */ + self.author.size() + 1 /* nl */ + b"committer".len() + 1 /* space */ + self.committer.size() + 1 /* nl */ @@ -46,7 +46,7 @@ impl crate::WriteTo for Commit { }) .sum::() + 1 /* nl */ - + self.message.len() + + self.message.len()) as u64 } } @@ -73,9 +73,9 @@ impl<'a> crate::WriteTo for CommitRef<'a> { Kind::Commit } - fn size(&self) -> usize { + fn size(&self) -> u64 { let hash_in_hex = self.tree().kind().len_in_hex(); - b"tree".len() + 1 /* space */ + hash_in_hex + 1 /* nl */ + (b"tree".len() + 1 /* space */ + hash_in_hex + 1 /* nl */ + self.parents.iter().count() * (b"parent".len() + 1 /* space */ + hash_in_hex + 1 /* nl */) + b"author".len() + 1 /* space */ + self.author.size() + 1 /* nl */ + b"committer".len() + 1 /* space */ + self.committer.size() + 1 /* nl */ @@ -92,6 +92,6 @@ impl<'a> crate::WriteTo for CommitRef<'a> { }) .sum::() + 1 /* nl */ - + self.message.len() + + self.message.len()) as u64 } } diff --git a/gix-object/src/encode.rs b/gix-object/src/encode.rs index 3ba7db0b58a..04a79194863 100644 --- a/gix-object/src/encode.rs +++ b/gix-object/src/encode.rs @@ -19,7 +19,7 @@ macro_rules! check { }; } /// Generates a loose header buffer -pub fn loose_header(kind: crate::Kind, size: usize) -> smallvec::SmallVec<[u8; 28]> { +pub fn loose_header(kind: crate::Kind, size: u64) -> smallvec::SmallVec<[u8; 28]> { let mut v = smallvec::SmallVec::new(); check!(v.write_all(kind.as_bytes())); check!(v.write_all(SPACE)); diff --git a/gix-object/src/lib.rs b/gix-object/src/lib.rs index 56e0019fd8c..63a356450b6 100644 --- a/gix-object/src/lib.rs +++ b/gix-object/src/lib.rs @@ -345,7 +345,7 @@ pub mod decode { /// ([`kind`](super::Kind), `size`, `consumed bytes`). /// /// `size` is the uncompressed size of the payload in bytes. - pub fn loose_header(input: &[u8]) -> Result<(super::Kind, usize, usize), LooseHeaderDecodeError> { + pub fn loose_header(input: &[u8]) -> Result<(super::Kind, u64, usize), LooseHeaderDecodeError> { use LooseHeaderDecodeError::*; let kind_end = input.find_byte(0x20).ok_or(InvalidHeader { message: "Expected ' '", @@ -366,7 +366,7 @@ pub mod decode { /// A standalone function to compute a hash of kind `hash_kind` for an object of `object_kind` and its `data`. pub fn compute_hash(hash_kind: gix_hash::Kind, object_kind: Kind, data: &[u8]) -> gix_hash::ObjectId { - let header = encode::loose_header(object_kind, data.len()); + let header = encode::loose_header(object_kind, data.len() as u64); let mut hasher = gix_features::hash::hasher(hash_kind); hasher.update(&header); diff --git a/gix-object/src/object/mod.rs b/gix-object/src/object/mod.rs index bebc1cc6522..53e9b4178e0 100644 --- a/gix-object/src/object/mod.rs +++ b/gix-object/src/object/mod.rs @@ -24,7 +24,7 @@ mod write { self.kind() } - fn size(&self) -> usize { + fn size(&self) -> u64 { use crate::ObjectRef::*; match self { Tree(v) => v.size(), @@ -52,7 +52,7 @@ mod write { self.kind() } - fn size(&self) -> usize { + fn size(&self) -> u64 { use crate::Object::*; match self { Tree(v) => v.size(), @@ -185,6 +185,8 @@ pub enum LooseDecodeError { InvalidHeader(#[from] LooseHeaderDecodeError), #[error(transparent)] InvalidContent(#[from] DecodeError), + #[error("Object sized {size} does not fit into memory - this can happen on 32 bit systems")] + OutOfMemory { size: u64 }, } impl<'a> ObjectRef<'a> { @@ -193,7 +195,7 @@ impl<'a> ObjectRef<'a> { let (kind, size, offset) = loose_header(data)?; let body = &data[offset..] - .get(..size) + .get(..size.try_into().map_err(|_| LooseDecodeError::OutOfMemory { size })?) .ok_or(LooseHeaderDecodeError::InvalidHeader { message: "object data was shorter than its size declared in the header", })?; diff --git a/gix-object/src/tag/write.rs b/gix-object/src/tag/write.rs index cee9e587c69..dc8fd1ba336 100644 --- a/gix-object/src/tag/write.rs +++ b/gix-object/src/tag/write.rs @@ -44,8 +44,8 @@ impl crate::WriteTo for Tag { Kind::Tag } - fn size(&self) -> usize { - b"object".len() + 1 /* space */ + self.target.kind().len_in_hex() + 1 /* nl */ + fn size(&self) -> u64 { + (b"object".len() + 1 /* space */ + self.target.kind().len_in_hex() + 1 /* nl */ + b"type".len() + 1 /* space */ + self.target_kind.as_bytes().len() + 1 /* nl */ + b"tag".len() + 1 /* space */ + self.name.len() + 1 /* nl */ + self @@ -53,7 +53,7 @@ impl crate::WriteTo for Tag { .as_ref() .map_or(0, |t| b"tagger".len() + 1 /* space */ + t.size() + 1 /* nl */) + 1 /* nl */ + self.message.len() - + self.pgp_signature.as_ref().map_or(0, |m| 1 /* nl */ + m.len()) + + self.pgp_signature.as_ref().map_or(0, |m| 1 /* nl */ + m.len())) as u64 } } @@ -81,8 +81,8 @@ impl<'a> crate::WriteTo for TagRef<'a> { Kind::Tag } - fn size(&self) -> usize { - b"object".len() + 1 /* space */ + self.target().kind().len_in_hex() + 1 /* nl */ + fn size(&self) -> u64 { + (b"object".len() + 1 /* space */ + self.target().kind().len_in_hex() + 1 /* nl */ + b"type".len() + 1 /* space */ + self.target_kind.as_bytes().len() + 1 /* nl */ + b"tag".len() + 1 /* space */ + self.name.len() + 1 /* nl */ + self @@ -90,7 +90,7 @@ impl<'a> crate::WriteTo for TagRef<'a> { .as_ref() .map_or(0, |t| b"tagger".len() + 1 /* space */ + t.size() + 1 /* nl */) + 1 /* nl */ + self.message.len() - + self.pgp_signature.as_ref().map_or(0, |m| 1 /* nl */ + m.len()) + + self.pgp_signature.as_ref().map_or(0, |m| 1 /* nl */ + m.len())) as u64 } } diff --git a/gix-object/src/traits.rs b/gix-object/src/traits.rs index c0c4adee2f5..ce0463c988c 100644 --- a/gix-object/src/traits.rs +++ b/gix-object/src/traits.rs @@ -17,7 +17,7 @@ pub trait WriteTo { /// the object, as such it's possible for [`size`](Self::size) to /// return a sensible value but [`write_to`](Self::write_to) to /// fail because the object was not actually valid in some way. - fn size(&self) -> usize; + fn size(&self) -> u64; /// Returns a loose object header based on the object's data fn loose_header(&self) -> smallvec::SmallVec<[u8; 28]> { @@ -37,7 +37,7 @@ where ::kind(self) } - fn size(&self) -> usize { + fn size(&self) -> u64 { ::size(self) } } diff --git a/gix-object/src/tree/write.rs b/gix-object/src/tree/write.rs index 91e1dc2e0ea..e1a82720f1f 100644 --- a/gix-object/src/tree/write.rs +++ b/gix-object/src/tree/write.rs @@ -57,10 +57,12 @@ impl crate::WriteTo for Tree { Kind::Tree } - fn size(&self) -> usize { + fn size(&self) -> u64 { self.entries .iter() - .map(|Entry { mode, filename, oid }| mode.as_bytes().len() + 1 + filename.len() + 1 + oid.as_bytes().len()) + .map(|Entry { mode, filename, oid }| { + (mode.as_bytes().len() + 1 + filename.len() + 1 + oid.as_bytes().len()) as u64 + }) .sum() } } @@ -100,11 +102,11 @@ impl<'a> crate::WriteTo for TreeRef<'a> { Kind::Tree } - fn size(&self) -> usize { + fn size(&self) -> u64 { self.entries .iter() .map(|EntryRef { mode, filename, oid }| { - mode.as_bytes().len() + 1 + filename.len() + 1 + oid.as_bytes().len() + (mode.as_bytes().len() + 1 + filename.len() + 1 + oid.as_bytes().len()) as u64 }) .sum() } From ffcb110135e4597d8953b97da3db9ecc3cf12e34 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 28 Sep 2023 21:44:59 +0200 Subject: [PATCH 13/23] adapt to changes in `gix-object` --- gix-odb/src/find.rs | 6 ++--- gix-odb/src/sink.rs | 10 ++++----- gix-odb/src/store_impls/loose/find.rs | 31 ++++++++++++++------------ gix-odb/src/store_impls/loose/write.rs | 19 +++++++--------- gix-odb/tests/odb/store/loose.rs | 2 +- 5 files changed, 33 insertions(+), 35 deletions(-) diff --git a/gix-odb/src/find.rs b/gix-odb/src/find.rs index bf807e27c0d..196845c4d7c 100644 --- a/gix-odb/src/find.rs +++ b/gix-odb/src/find.rs @@ -104,11 +104,11 @@ mod header { } } - impl From<(usize, gix_object::Kind)> for Header { - fn from((object_size, kind): (usize, gix_object::Kind)) -> Self { + impl From<(u64, gix_object::Kind)> for Header { + fn from((object_size, kind): (u64, gix_object::Kind)) -> Self { Header::Loose { kind, - size: object_size as u64, + size: object_size, } } } diff --git a/gix-odb/src/sink.rs b/gix-odb/src/sink.rs index f6334a51c5b..7784901a8c9 100644 --- a/gix-odb/src/sink.rs +++ b/gix-odb/src/sink.rs @@ -1,6 +1,5 @@ use std::{ cell::RefCell, - convert::TryInto, io::{self, Write}, }; @@ -24,11 +23,10 @@ impl crate::traits::Write for Sink { fn write_stream( &self, kind: gix_object::Kind, - size: u64, + mut size: u64, from: &mut dyn io::Read, ) -> Result { - let mut size = size.try_into().expect("object size to fit into usize"); - let mut buf = [0u8; 8096]; + let mut buf = [0u8; u16::MAX as usize]; let header = gix_object::encode::loose_header(kind, size); let possibly_compress = |buf: &[u8]| -> io::Result<()> { @@ -43,11 +41,11 @@ impl crate::traits::Write for Sink { possibly_compress(&header).map_err(Box::new)?; while size != 0 { - let bytes = size.min(buf.len()); + let bytes = (size as usize).min(buf.len()); from.read_exact(&mut buf[..bytes]).map_err(Box::new)?; hasher.update(&buf[..bytes]); possibly_compress(&buf[..bytes]).map_err(Box::new)?; - size -= bytes; + size -= bytes as u64; } if let Some(compressor) = self.compressor.as_ref() { let mut c = compressor.borrow_mut(); diff --git a/gix-odb/src/store_impls/loose/find.rs b/gix-odb/src/store_impls/loose/find.rs index 91bf0ba871d..4116a045348 100644 --- a/gix-odb/src/store_impls/loose/find.rs +++ b/gix-odb/src/store_impls/loose/find.rs @@ -14,13 +14,11 @@ pub enum Error { path: PathBuf, }, #[error("file at '{path}' showed invalid size of inflated data, expected {expected}, got {actual}")] - SizeMismatch { - actual: usize, - expected: usize, - path: PathBuf, - }, + SizeMismatch { actual: u64, expected: u64, path: PathBuf }, #[error(transparent)] Decode(#[from] gix_object::decode::LooseHeaderDecodeError), + #[error("Cannot store {size} in memory as it's not representable")] + OutOfMemory { size: u64 }, #[error("Could not {action} data at '{path}'")] Io { source: std::io::Error, @@ -137,7 +135,7 @@ impl Store { /// Return only the decompressed size of the object and its kind without fully reading it into memory as tuple of `(size, kind)`. /// Returns `None` if `id` does not exist in the database. - pub fn try_header(&self, id: &gix_hash::oid) -> Result, Error> { + pub fn try_header(&self, id: &gix_hash::oid) -> Result, Error> { const BUF_SIZE: usize = 256; let mut buf = [0_u8; BUF_SIZE]; let path = hash_path(id, self.path.clone()); @@ -224,16 +222,17 @@ impl Store { let decompressed_body_bytes_sans_header = decompressed_start + header_size..decompressed_start + consumed_out; - if consumed_out != size + header_size { + if consumed_out as u64 != size + header_size as u64 { return Err(Error::SizeMismatch { - expected: size + header_size, - actual: consumed_out, + expected: size + header_size as u64, + actual: consumed_out as u64, path, }); } buf.copy_within(decompressed_body_bytes_sans_header, 0); } else { - buf.resize(bytes_read + size + header_size, 0); + let new_len = bytes_read as u64 + size + header_size as u64; + buf.resize(new_len.try_into().map_err(|_| Error::OutOfMemory { size: new_len })?, 0); { let (input, output) = buf.split_at_mut(bytes_read); let num_decompressed_bytes = zlib::stream::inflate::read( @@ -246,17 +245,21 @@ impl Store { action: "deflate", path: path.to_owned(), })?; - if num_decompressed_bytes + consumed_out != size + header_size { + if num_decompressed_bytes as u64 + consumed_out as u64 != size + header_size as u64 { return Err(Error::SizeMismatch { - expected: size + header_size, - actual: num_decompressed_bytes + consumed_out, + expected: size + header_size as u64, + actual: num_decompressed_bytes as u64 + consumed_out as u64, path, }); } }; buf.copy_within(decompressed_start + header_size.., 0); } - buf.resize(size, 0); + buf.resize( + size.try_into() + .expect("BUG: here the size is already confirmed to fit into memory"), + 0, + ); Ok(gix_object::Data { kind, data: buf }) } } diff --git a/gix-odb/src/store_impls/loose/write.rs b/gix-odb/src/store_impls/loose/write.rs index e537eda9264..2cac12d1814 100644 --- a/gix-odb/src/store_impls/loose/write.rs +++ b/gix-odb/src/store_impls/loose/write.rs @@ -1,4 +1,4 @@ -use std::{convert::TryInto, fs, io, io::Write, path::PathBuf}; +use std::{fs, io, io::Write, path::PathBuf}; use gix_features::{hash, zlib::stream::deflate}; use gix_object::WriteTo; @@ -48,7 +48,7 @@ impl crate::traits::Write for Store { /// This will cost at least 4 IO operations. fn write_buf(&self, kind: gix_object::Kind, from: &[u8]) -> Result { let mut to = self.dest().map_err(Box::new)?; - to.write_all(&gix_object::encode::loose_header(kind, from.len())) + to.write_all(&gix_object::encode::loose_header(kind, from.len() as u64)) .map_err(|err| Error::Io { source: err, message: "write header to tempfile in", @@ -74,15 +74,12 @@ impl crate::traits::Write for Store { mut from: &mut dyn io::Read, ) -> Result { let mut to = self.dest().map_err(Box::new)?; - to.write_all(&gix_object::encode::loose_header( - kind, - size.try_into().expect("object size to fit into usize"), - )) - .map_err(|err| Error::Io { - source: err, - message: "write header to tempfile in", - path: self.path.to_owned(), - })?; + to.write_all(&gix_object::encode::loose_header(kind, size)) + .map_err(|err| Error::Io { + source: err, + message: "write header to tempfile in", + path: self.path.to_owned(), + })?; io::copy(&mut from, &mut to) .map_err(|err| Error::Io { diff --git a/gix-odb/tests/odb/store/loose.rs b/gix-odb/tests/odb/store/loose.rs index 77a26cb8daf..c998522100d 100644 --- a/gix-odb/tests/odb/store/loose.rs +++ b/gix-odb/tests/odb/store/loose.rs @@ -420,7 +420,7 @@ cjHJZXWmV4CcRfmLsXzU8s2cR9A0DBvOxhPD1TlKC2JhBFXigjuL9U4Rbq9tdegB let id = id?; let expected = db.try_find(&id, &mut buf)?.expect("exists"); let (size, kind) = db.try_header(&id)?.expect("header exists"); - assert_eq!(size, expected.data.len()); + assert_eq!(size, expected.data.len() as u64); assert_eq!(kind, expected.kind); } Ok(()) From de66b4c26a937a4f6462dff5ec58275dae01813a Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 28 Sep 2023 08:42:35 +0200 Subject: [PATCH 14/23] feat: `status` now supports filters. This is important as it allows to streaming-read from the worktree and correctly change, for example, `git-lfs` files back into their manifests, and to arrive at the correct hash. --- Cargo.lock | 2 + gix-status/Cargo.toml | 2 + gix-status/src/index_as_worktree/function.rs | 229 ++++++++++++------- gix-status/src/index_as_worktree/traits.rs | 119 ++++++++-- gix-status/src/index_as_worktree/types.rs | 7 +- gix-status/src/lib.rs | 5 +- gix-status/src/read.rs | 59 ----- gix-status/tests/status/index_as_worktree.rs | 173 ++++++++++++-- gix-status/tests/worktree.rs | 4 +- 9 files changed, 405 insertions(+), 195 deletions(-) delete mode 100644 gix-status/src/read.rs diff --git a/Cargo.lock b/Cargo.lock index 25d1ddeb03b..dee7ce38012 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2247,12 +2247,14 @@ dependencies = [ "bstr", "filetime", "gix-features 0.35.0", + "gix-filter", "gix-fs 0.7.0", "gix-hash 0.13.0", "gix-index 0.25.0", "gix-object 0.37.0", "gix-path 0.10.0", "gix-pathspec", + "gix-worktree 0.26.0", "thiserror", ] diff --git a/gix-status/Cargo.toml b/gix-status/Cargo.toml index 911e5860cc6..d068f349458 100644 --- a/gix-status/Cargo.toml +++ b/gix-status/Cargo.toml @@ -21,6 +21,8 @@ gix-object = { version = "^0.37.0", path = "../gix-object" } gix-path = { version = "^0.10.0", path = "../gix-path" } gix-features = { version = "^0.35.0", path = "../gix-features" } gix-pathspec = { version = "^0.3.0", path = "../gix-pathspec" } +gix-filter = { version = "^0.5.0", path = "../gix-filter" } +gix-worktree = { version = "^0.26.0", path = "../gix-worktree", default-features = false, features = ["attributes"] } thiserror = "1.0.26" filetime = "0.2.15" diff --git a/gix-status/src/index_as_worktree/function.rs b/gix-status/src/index_as_worktree/function.rs index f0bd80c2bdf..bae8e6b7769 100644 --- a/gix-status/src/index_as_worktree/function.rs +++ b/gix-status/src/index_as_worktree/function.rs @@ -4,7 +4,9 @@ use std::{io, marker::PhantomData, path::Path}; use bstr::BStr; use filetime::FileTime; use gix_features::parallel::{in_parallel_if, Reduce}; +use gix_filter::pipeline::convert::ToGitOutcome; +use crate::index_as_worktree::traits::read_data::Stream; use crate::{ index_as_worktree::{ traits, @@ -12,7 +14,7 @@ use crate::{ types::{Error, Options}, Change, Outcome, VisitEntry, }, - read, Pathspec, + Pathspec, SymlinkCheck, }; /// Calculates the changes that need to be applied to an `index` to match the state of the `worktree` and makes them @@ -24,6 +26,8 @@ use crate::{ /// the current time for which it will be considered fresh as long as it is included which depends on `pathspec`. /// /// `should_interrupt` can be used to stop all processing. +/// `filter` is used to convert worktree files back to their internal git representation. For this to be correct, +/// [`Options::attributes`] must be configured as well. /// /// ### Note /// @@ -45,8 +49,9 @@ pub fn index_as_worktree<'index, T, U, Find, E1, E2>( find: Find, progress: &mut dyn gix_features::progress::Progress, pathspec: impl Pathspec + Send + Clone, + filter: gix_filter::Pipeline, should_interrupt: &AtomicBool, - options: Options, + mut options: Options, ) -> Result where T: Send, @@ -70,6 +75,14 @@ where let range = index .prefixed_entries_range(pathspec.common_prefix()) .unwrap_or(0..index.entries().len()); + + let stack = gix_worktree::Stack::from_state_and_ignore_case( + worktree, + options.fs.ignore_case, + gix_worktree::stack::State::AttributesStack(std::mem::take(&mut options.attributes)), + index, + index.path_backing(), + ); let (entries, path_backing) = index.entries_mut_and_pathbacking(); let mut num_entries = entries.len(); let entries = &mut entries[range]; @@ -87,48 +100,51 @@ where progress.init(entries.len().into(), gix_features::progress::count("files")); let count = progress.counter(); + let new_state = { + let options = &options; + let (skipped_by_pathspec, skipped_by_entry_flags) = (&skipped_by_pathspec, &skipped_by_entry_flags); + let (symlink_metadata_calls, entries_updated) = (&symlink_metadata_calls, &entries_updated); + let (racy_clean, worktree_bytes) = (&racy_clean, &worktree_bytes); + let (worktree_reads, odb_bytes, odb_reads) = (&worktree_reads, &odb_bytes, &odb_reads); + move |_| { + ( + State { + buf: Vec::new(), + buf2: Vec::new(), + attr_stack: stack, + path_stack: SymlinkCheck::new(worktree.into()), + timestamp, + path_backing, + filter, + options, + + skipped_by_pathspec, + skipped_by_entry_flags, + symlink_metadata_calls, + entries_updated, + racy_clean, + worktree_reads, + worktree_bytes, + odb_reads, + odb_bytes, + }, + compare, + submodule, + find, + pathspec, + ) + } + }; in_parallel_if( || true, // TODO: heuristic: when is parallelization not worth it? Git says 500 items per thread, but to 20 threads, we can be more fine-grained though. gix_features::interrupt::Iter::new(entries.chunks_mut(chunk_size), should_interrupt), thread_limit, - { - let options = &options; - let (skipped_by_pathspec, skipped_by_entry_flags) = (&skipped_by_pathspec, &skipped_by_entry_flags); - let (symlink_metadata_calls, entries_updated) = (&symlink_metadata_calls, &entries_updated); - let (racy_clean, worktree_bytes) = (&racy_clean, &worktree_bytes); - let (worktree_reads, odb_bytes, odb_reads) = (&worktree_reads, &odb_bytes, &odb_reads); - move |_| { - ( - State { - buf: Vec::new(), - odb_buf: Vec::new(), - path_stack: crate::SymlinkCheck::new(worktree.to_owned()), - timestamp, - path_backing, - options, - - skipped_by_pathspec, - skipped_by_entry_flags, - symlink_metadata_calls, - entries_updated, - racy_clean, - worktree_reads, - worktree_bytes, - odb_reads, - odb_bytes, - }, - compare, - submodule, - find, - pathspec, - ) - } - }, + new_state, |entries, (state, blobdiff, submdule, find, pathspec)| { entries .iter_mut() .filter_map(|entry| { - let res = state.process(entry, blobdiff, submdule, find, pathspec); + let res = state.process(entry, pathspec, blobdiff, submdule, find); count.fetch_add(1, Ordering::Relaxed); res }) @@ -158,9 +174,16 @@ where struct State<'a, 'b> { buf: Vec, - odb_buf: Vec, + buf2: Vec, timestamp: FileTime, - path_stack: crate::SymlinkCheck, + /// This is the cheap stack that only assure that we don't go through symlinks. + /// It's always used to get the path to perform an lstat on. + path_stack: SymlinkCheck, + /// This is the expensive stack that will need to check for `.gitattributes` files each time + /// it changes directory. It's only used when we know we have to read a worktree file, which in turn + /// requires attributes to drive the filter configuration. + attr_stack: gix_worktree::Stack, + filter: gix_filter::Pipeline, path_backing: &'b [u8], options: &'a Options, @@ -181,10 +204,10 @@ impl<'index> State<'_, 'index> { fn process( &mut self, entry: &'index mut gix_index::Entry, + pathspec: &mut impl Pathspec, diff: &mut impl CompareBlobs, submodule: &mut impl SubmoduleStatus, find: &mut Find, - pathspec: &mut impl Pathspec, ) -> Option> where E1: std::error::Error + Send + Sync + 'static, @@ -266,10 +289,9 @@ impl<'index> State<'_, 'index> { E2: std::error::Error + Send + Sync + 'static, Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E1>, { - let worktree_path = gix_path::try_from_bstr(rela_path).map_err(|_| Error::IllformedUtf8)?; - let worktree_path = match self.path_stack.verified_path(worktree_path.as_ref()) { + let worktree_path = match self.path_stack.verified_path(gix_path::from_bstr(rela_path).as_ref()) { Ok(path) => path, - Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(Some(Change::Removed)), + Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Some(Change::Removed)), Err(err) => return Err(Error::Io(err)), }; self.symlink_metadata_calls.fetch_add(1, Ordering::Relaxed); @@ -336,27 +358,24 @@ impl<'index> State<'_, 'index> { } self.buf.clear(); - let read_file = WorktreeBlob { + self.buf2.clear(); + let fetch_data = ReadDataImpl { buf: &mut self.buf, path: worktree_path, + rela_path, entry, + file_len: metadata.len(), + filter: &mut self.filter, + attr_stack: &mut self.attr_stack, options: self.options, - }; - self.odb_buf.clear(); - let read_blob = OdbBlob { - buf: &mut self.odb_buf, id: &entry.id, find, + worktree_reads: self.worktree_reads, + worktree_bytes: self.worktree_bytes, + odb_reads: self.odb_reads, + odb_bytes: self.odb_bytes, }; - let content_change = diff.compare_blobs(entry, metadata.len() as usize, read_file, read_blob)?; - if !self.buf.is_empty() { - self.worktree_reads.fetch_add(1, Ordering::Relaxed); - self.worktree_bytes.fetch_add(self.buf.len() as u64, Ordering::Relaxed); - } - if !self.odb_buf.is_empty() { - self.odb_reads.fetch_add(1, Ordering::Relaxed); - self.odb_bytes.fetch_add(self.odb_buf.len() as u64, Ordering::Relaxed); - } + let content_change = diff.compare_blobs(entry, metadata.len(), fetch_data, &mut self.buf2)?; // This file is racy clean! Set the size to 0 so we keep detecting this as the file is updated. if content_change.is_some() && racy_clean { entry.stat.size = 0; @@ -404,43 +423,91 @@ impl<'index, T, U, C: VisitEntry<'index, ContentChange = T, SubmoduleStatus = U> } } -struct WorktreeBlob<'a> { - buf: &'a mut Vec, - path: &'a Path, - entry: &'a gix_index::Entry, - options: &'a Options, -} - -struct OdbBlob<'a, Find, E> +struct ReadDataImpl<'a, Find, E> where E: std::error::Error + Send + Sync + 'static, - Find: FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E>, + Find: for<'b> FnMut(&gix_hash::oid, &'b mut Vec) -> Result, E>, { buf: &'a mut Vec, + path: &'a Path, + rela_path: &'a BStr, + file_len: u64, + entry: &'a gix_index::Entry, + filter: &'a mut gix_filter::Pipeline, + attr_stack: &'a mut gix_worktree::Stack, + options: &'a Options, id: &'a gix_hash::oid, find: Find, + worktree_bytes: &'a AtomicU64, + worktree_reads: &'a AtomicUsize, + odb_bytes: &'a AtomicU64, + odb_reads: &'a AtomicUsize, } -impl<'a> traits::ReadDataOnce<'a> for WorktreeBlob<'a> { - fn read_data(self) -> Result<&'a [u8], Error> { - let res = read::data_to_buf_with_meta( - self.path, - self.buf, - self.entry.mode == gix_index::entry::Mode::SYMLINK, - &self.options.fs, - )?; - Ok(res) - } -} - -impl<'a, Find, E> traits::ReadDataOnce<'a> for OdbBlob<'a, Find, E> +impl<'a, Find, E> traits::ReadData<'a> for ReadDataImpl<'a, Find, E> where E: std::error::Error + Send + Sync + 'static, - Find: FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E>, + Find: for<'b> FnMut(&gix_hash::oid, &'b mut Vec) -> Result, E>, { - fn read_data(mut self) -> Result<&'a [u8], Error> { + fn read_blob(mut self) -> Result<&'a [u8], Error> { (self.find)(self.id, self.buf) - .map(|b| b.data) + .map(|b| { + self.odb_reads.fetch_add(1, Ordering::Relaxed); + self.odb_bytes.fetch_add(b.data.len() as u64, Ordering::Relaxed); + b.data + }) .map_err(move |err| Error::Find(Box::new(err))) } + + fn stream_worktree_file(mut self) -> Result, Error> { + self.buf.clear(); + // symlinks are only stored as actual symlinks if the FS supports it otherwise they are just + // normal files with their content equal to the linked path (so can be read normally) + // + let is_symlink = self.entry.mode == gix_index::entry::Mode::SYMLINK; + // TODO: what to do about precompose unicode and ignore_case for symlinks + let out = if is_symlink && self.options.fs.symlink { + // conversion to bstr can never fail because symlinks are only used + // on unix (by git) so no reason to use the try version here + let symlink_path = gix_path::into_bstr(std::fs::read_link(self.path)?); + self.buf.extend_from_slice(&symlink_path); + self.worktree_bytes.fetch_add(self.buf.len() as u64, Ordering::Relaxed); + Stream { + inner: ToGitOutcome::Buffer(self.buf), + bytes: None, + len: None, + } + } else { + self.buf.clear(); + let platform = self.attr_stack.at_entry(self.rela_path, Some(false), &mut self.find)?; + let file = std::fs::File::open(self.path)?; + let out = self + .filter + .convert_to_git( + file, + self.path, + &mut |_path, attrs| { + platform.matching_attributes(attrs); + }, + &mut |buf| { + (self.find)(self.id, buf) + .map(|_| Some(())) + .map_err(|err| Box::new(err) as Box) + }, + ) + .map_err(|err| io::Error::new(io::ErrorKind::Other, err))?; + let len = match out { + ToGitOutcome::Unchanged(_) => Some(self.file_len), + ToGitOutcome::Process(_) | ToGitOutcome::Buffer(_) => None, + }; + Stream { + inner: out, + bytes: Some(self.worktree_bytes), + len, + } + }; + + self.worktree_reads.fetch_add(1, Ordering::Relaxed); + Ok(out) + } } diff --git a/gix-status/src/index_as_worktree/traits.rs b/gix-status/src/index_as_worktree/traits.rs index fa0a39370fa..958675dfc5d 100644 --- a/gix-status/src/index_as_worktree/traits.rs +++ b/gix-status/src/index_as_worktree/traits.rs @@ -3,6 +3,8 @@ use bstr::BStr; use gix_hash::ObjectId; use gix_index as index; use index::Entry; +use std::io::Read; +use std::sync::atomic::AtomicBool; /// Compares the content of two blobs in some way. pub trait CompareBlobs { @@ -10,15 +12,16 @@ pub trait CompareBlobs { type Output; /// Providing the underlying index `entry`, allow comparing a file in the worktree of size `worktree_blob_size` - /// and allow reading its bytes using `worktree_blob`. - /// If this function returns `None` the `entry` and the `worktree_blob` are assumed to be identical. - /// Use `entry_blob` to obtain the data for the blob referred to by `entry`, allowing comparisons of the data itself. + /// and allow streaming its bytes using `data`. + /// If this function returns `None` the `entry` and the worktree blob are assumed to be identical. + /// Use `data` to obtain the data for the blob referred to by `entry`, allowing comparisons of the data itself. + /// `buf` can be used to store additional data, and it can be assumed to be a cleared buffer. fn compare_blobs<'a, 'b>( &mut self, entry: &gix_index::Entry, - worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a>, - entry_blob: impl ReadDataOnce<'b>, + worktree_blob_size: u64, + data: impl ReadData<'a>, + buf: &mut Vec, ) -> Result, Error>; } @@ -33,13 +36,59 @@ pub trait SubmoduleStatus { fn status(&mut self, entry: &gix_index::Entry, rela_path: &BStr) -> Result, Self::Error>; } -/// Lazy borrowed access to blob data. -pub trait ReadDataOnce<'a> { +/// Lazy borrowed access to worktree or blob data, with streaming support for worktree files. +pub trait ReadData<'a> { /// Returns the contents of this blob. /// /// This potentially performs IO and other expensive operations /// and should only be called when necessary. - fn read_data(self) -> Result<&'a [u8], Error>; + fn read_blob(self) -> Result<&'a [u8], Error>; + + /// Stream a worktree file in such a manner that its content matches what would be put into git. + fn stream_worktree_file(self) -> Result, Error>; +} + +/// +pub mod read_data { + use gix_filter::pipeline::convert::ToGitOutcome; + use std::sync::atomic::{AtomicU64, Ordering}; + + /// A stream with worktree file data. + pub struct Stream<'a> { + pub(crate) inner: ToGitOutcome<'a, std::fs::File>, + pub(crate) bytes: Option<&'a AtomicU64>, + pub(crate) len: Option, + } + + impl<'a> Stream<'a> { + /// Return the underlying byte-buffer if there is one. + /// + /// If `None`, read from this instance like a stream. + /// Note that this method should only be called once to assure proper accounting of the amount of bytes read. + pub fn as_bytes(&self) -> Option<&'a [u8]> { + self.inner.as_bytes().map(|v| { + if let Some(bytes) = self.bytes { + bytes.fetch_add(v.len() as u64, Ordering::Relaxed); + } + v + }) + } + + /// Return the size of the stream in bytes if it is known in advance. + pub fn size(&self) -> Option { + self.len + } + } + + impl<'a> std::io::Read for Stream<'a> { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let n = self.inner.read(buf)?; + if let Some(bytes) = self.bytes { + bytes.fetch_add(n as u64, Ordering::Relaxed); + } + Ok(n) + } + } } /// Compares to blobs by comparing their size and oid, and only looks at the file if @@ -50,22 +99,23 @@ pub struct FastEq; impl CompareBlobs for FastEq { type Output = (); + // TODO: make all streaming IOPs interruptible. fn compare_blobs<'a, 'b>( &mut self, entry: &Entry, - worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a>, - _entry_blob: impl ReadDataOnce<'b>, + worktree_file_size: u64, + data: impl ReadData<'a>, + buf: &mut Vec, ) -> Result, Error> { // make sure to account for racily smudged entries here so that they don't always keep // showing up as modified even after their contents have changed again, to a potentially // unmodified state. That means that we want to ignore stat.size == 0 for non_empty_blobs. - if entry.stat.size as usize != worktree_blob_size && (entry.id.is_empty_blob() || entry.stat.size != 0) { + if entry.stat.size as u64 != worktree_file_size && (entry.id.is_empty_blob() || entry.stat.size != 0) { return Ok(Some(())); } - let blob = worktree_blob.read_data()?; - let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, blob); - Ok((entry.id != file_hash).then_some(())) + HashEq + .compare_blobs(entry, worktree_file_size, data, buf) + .map(|opt| opt.map(|_| ())) } } @@ -82,12 +132,37 @@ impl CompareBlobs for HashEq { fn compare_blobs<'a, 'b>( &mut self, entry: &Entry, - _worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a>, - _entry_blob: impl ReadDataOnce<'b>, + _worktree_blob_size: u64, + data: impl ReadData<'a>, + buf: &mut Vec, ) -> Result, Error> { - let blob = worktree_blob.read_data()?; - let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, blob); - Ok((entry.id != file_hash).then_some(file_hash)) + let mut stream = data.stream_worktree_file()?; + match stream.as_bytes() { + Some(buffer) => { + let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, buffer); + Ok((entry.id != file_hash).then_some(file_hash)) + } + None => { + let file_hash = match stream.size() { + None => { + stream.read_to_end(buf)?; + gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, buf) + } + Some(len) => { + let header = gix_object::encode::loose_header(gix_object::Kind::Blob, len); + let mut hasher = gix_features::hash::hasher(entry.id.kind()); + hasher.update(&header); + gix_features::hash::bytes_with_hasher( + &mut stream, + len, + hasher, + &mut gix_features::progress::Discard, + &AtomicBool::default(), + )? + } + }; + Ok((entry.id != file_hash).then_some(file_hash)) + } + } } } diff --git a/gix-status/src/index_as_worktree/types.rs b/gix-status/src/index_as_worktree/types.rs index 808b1747d11..b55442b4227 100644 --- a/gix-status/src/index_as_worktree/types.rs +++ b/gix-status/src/index_as_worktree/types.rs @@ -20,7 +20,7 @@ pub enum Error { } /// Options that control how the index status with a worktree is computed. -#[derive(Clone, Debug, Default)] +#[derive(Clone, Default)] pub struct Options { /// Capabilities of the file system which affect the status computation. pub fs: gix_fs::Capabilities, @@ -30,6 +30,11 @@ pub struct Options { pub thread_limit: Option, /// Options that control how stat comparisons are made when checking if a file is fresh. pub stat: gix_index::entry::stat::Options, + /// Pre-configured state to allow processing attributes. + /// + /// These are needed to potentially refresh the index with data read from the worktree, which needs to be converted back + /// to the form stored in git. + pub attributes: gix_worktree::stack::state::Attributes, } /// Provide additional information collected during the runtime of [`index_as_worktree()`](crate::index_as_worktree()). diff --git a/gix-status/src/lib.rs b/gix-status/src/lib.rs index 95b32e5dec2..28c20145d4a 100644 --- a/gix-status/src/lib.rs +++ b/gix-status/src/lib.rs @@ -8,12 +8,9 @@ //! While also being able to check check if the working tree is dirty, quickly. #![deny(missing_docs, rust_2018_idioms, unsafe_code)] -/// -pub mod read; +use bstr::BStr; pub mod index_as_worktree; - -use bstr::BStr; pub use index_as_worktree::function::index_as_worktree; /// A trait to facilitate working working with pathspecs. diff --git a/gix-status/src/read.rs b/gix-status/src/read.rs deleted file mode 100644 index 6d28ad33bf9..00000000000 --- a/gix-status/src/read.rs +++ /dev/null @@ -1,59 +0,0 @@ -//! This module allows creating git blobs from worktree files. -//! -//! For the most part a blob just contains the raw on-disk data. However symlinks need to be considered properly -//! and attributes/config options need to be considered. - -use std::{ - fs::{read_link, File}, - io::{self, Read}, - path::Path, -}; - -use gix_object::Blob; - -// TODO: tests - -// TODO: what to do about precompose unicode and ignore_case for symlinks - -/// Create a blob from a file or symlink. -pub fn blob(path: &Path, capabilities: &gix_fs::Capabilities) -> io::Result { - let mut data = Vec::new(); - data_to_buf(path, &mut data, capabilities)?; - Ok(Blob { data }) -} - -/// Create a blob from a file or symlink. -pub fn blob_with_meta(path: &Path, is_symlink: bool, capabilities: &gix_fs::Capabilities) -> io::Result { - let mut data = Vec::new(); - data_to_buf_with_meta(path, &mut data, is_symlink, capabilities)?; - Ok(Blob { data }) -} - -/// Create blob data from a file or symlink. -pub fn data_to_buf<'a>(path: &Path, buf: &'a mut Vec, capabilities: &gix_fs::Capabilities) -> io::Result<&'a [u8]> { - data_to_buf_with_meta(path, buf, path.symlink_metadata()?.is_symlink(), capabilities) -} - -/// Create a blob from a file or symlink. -pub fn data_to_buf_with_meta<'a>( - path: &Path, - buf: &'a mut Vec, - is_symlink: bool, - capabilities: &gix_fs::Capabilities, -) -> io::Result<&'a [u8]> { - buf.clear(); - // symlinks are only stored as actual symlinks if the FS supports it otherwise they are just - // normal files with their content equal to the linked path (so can be read normally) - // - if is_symlink && capabilities.symlink { - // conversion to bstr can never fail because symlinks are only used - // on unix (by git) so no reason to use the try version here - let symlink_path = gix_path::into_bstr(read_link(path)?); - buf.extend_from_slice(&symlink_path); - } else { - buf.clear(); - File::open(path)?.read_to_end(buf)?; - // TODO apply filters - } - Ok(buf.as_slice()) -} diff --git a/gix-status/tests/status/index_as_worktree.rs b/gix-status/tests/status/index_as_worktree.rs index 5658404cf60..3b931252a11 100644 --- a/gix-status/tests/status/index_as_worktree.rs +++ b/gix-status/tests/status/index_as_worktree.rs @@ -13,7 +13,7 @@ use gix_status::index_as_worktree::{Outcome, Record}; use gix_status::{ index_as_worktree, index_as_worktree::{ - traits::{CompareBlobs, FastEq, ReadDataOnce}, + traits::{CompareBlobs, FastEq, ReadData}, Change as WorktreeChange, Options, Recorder, }, }; @@ -37,8 +37,16 @@ fn fixture(name: &str, expected_status: &[(&BStr, Option, bool)]) -> Out fixture_filtered(name, &[], expected_status) } +fn fixture_with_index( + name: &str, + prepare_index: impl FnMut(&mut gix_index::State), + expected_status: &[(&BStr, Option, bool)], +) -> Outcome { + fixture_filtered_detailed(name, "", &[], expected_status, prepare_index, false) +} + fn submodule_fixture(name: &str, expected_status: &[(&BStr, Option, bool)]) -> Outcome { - fixture_filtered_detailed("status_submodule", name, &[], expected_status, false) + fixture_filtered_detailed("status_submodule", name, &[], expected_status, |_| {}, false) } fn submodule_fixture_status( @@ -46,11 +54,11 @@ fn submodule_fixture_status( expected_status: &[(&BStr, Option, bool)], submodule_dirty: bool, ) -> Outcome { - fixture_filtered_detailed("status_submodule", name, &[], expected_status, submodule_dirty) + fixture_filtered_detailed("status_submodule", name, &[], expected_status, |_| {}, submodule_dirty) } fn fixture_filtered(name: &str, pathspecs: &[&str], expected_status: &[(&BStr, Option, bool)]) -> Outcome { - fixture_filtered_detailed(name, "", pathspecs, expected_status, false) + fixture_filtered_detailed(name, "", pathspecs, expected_status, |_| {}, false) } fn fixture_filtered_detailed( @@ -58,12 +66,14 @@ fn fixture_filtered_detailed( subdir: &str, pathspecs: &[&str], expected_status: &[(&BStr, Option, bool)], + mut prepare_index: impl FnMut(&mut gix_index::State), submodule_dirty: bool, ) -> Outcome { let worktree = fixture_path(name).join(subdir); let git_dir = worktree.join(".git"); let mut index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, false, Default::default()).unwrap(); + prepare_index(&mut index); let mut recorder = Recorder::default(); let search = gix_pathspec::Search::from_specs(to_pathspecs(pathspecs), None, std::path::Path::new("")) .expect("valid specs can be normalized"); @@ -76,6 +86,7 @@ fn fixture_filtered_detailed( |_, _| Ok::<_, std::convert::Infallible>(gix_object::BlobRef { data: &[] }), &mut gix_features::progress::Discard, Pathspec(search), + Default::default(), &AtomicBool::default(), Options { fs: gix_fs::Capabilities::probe(&git_dir), @@ -297,14 +308,21 @@ fn unchanged() { } #[test] -#[cfg_attr( - windows, - ignore = "needs work, on windows plenty of additional files are considered modified for some reason" -)] -fn modified() { +fn refresh() { + let expected_outcome = Outcome { + entries_to_process: 5, + entries_processed: 5, + symlink_metadata_calls: 5, + entries_updated: if cfg!(windows) { 2 } else { 1 }, + worktree_files_read: 4, + worktree_bytes: if cfg!(windows) { 41 } else { 38 }, + ..Default::default() + }; assert_eq!( - fixture( + fixture_with_index( "status_changed", + |index| { index.entries_mut().iter_mut().for_each(|e| e.stat = Default::default()) }, + #[cfg(not(windows))] &[ ( BStr::new(b"dir/content"), @@ -332,18 +350,116 @@ fn modified() { NO_CONFLICT, ), ], + #[cfg(windows)] + &[ + ( + BStr::new("dir/content2"), + Some(Change::Modification { + executable_bit_changed: false, + content_change: Some(()) + }), + NO_CONFLICT + ), + ( + BStr::new("empty"), + Some(Change::Modification { + executable_bit_changed: false, + content_change: Some(()) + }), + NO_CONFLICT + ), + ( + BStr::new("executable"), + Some(Change::Modification { + executable_bit_changed: false, + content_change: Some(()) + }), + NO_CONFLICT + ) + ], ), - Outcome { - entries_to_process: 5, - entries_processed: 5, - symlink_metadata_calls: 5, - entries_updated: 1, - worktree_files_read: 2, - worktree_bytes: 23, - racy_clean: 1, - ..Default::default() - } + expected_outcome, + ); +} + +#[test] +fn modified() { + #[cfg(not(windows))] + let expected_outcome = Outcome { + entries_to_process: 5, + entries_processed: 5, + symlink_metadata_calls: 5, + worktree_files_read: 2, + worktree_bytes: 23, + ..Default::default() + }; + #[cfg(windows)] + let expected_outcome = Outcome { + entries_to_process: 5, + entries_processed: 5, + symlink_metadata_calls: 5, + ..Default::default() + }; + + let actual_outcome = fixture( + "status_changed", + #[cfg(not(windows))] + &[ + ( + BStr::new(b"dir/content"), + Some(Change::Modification { + executable_bit_changed: true, + content_change: None, + }), + NO_CONFLICT, + ), + ( + BStr::new(b"dir/content2"), + Some(Change::Modification { + executable_bit_changed: false, + content_change: Some(()), + }), + NO_CONFLICT, + ), + (BStr::new(b"empty"), Some(Change::Type), NO_CONFLICT), + ( + BStr::new(b"executable"), + Some(Change::Modification { + executable_bit_changed: true, + content_change: Some(()), + }), + NO_CONFLICT, + ), + ], + #[cfg(windows)] + &[ + ( + BStr::new("dir/content2"), + Some(Change::Modification { + executable_bit_changed: false, + content_change: Some(()), + }), + NO_CONFLICT, + ), + ( + BStr::new("empty"), + Some(Change::Modification { + executable_bit_changed: false, + content_change: Some(()), + }), + NO_CONFLICT, + ), + ( + BStr::new("executable"), + Some(Change::Modification { + executable_bit_changed: false, + content_change: Some(()), + }), + NO_CONFLICT, + ), + ], ); + assert_eq!(ignore_updated(ignore_racyclean(actual_outcome)), expected_outcome,); } const NO_CONFLICT: bool = false; @@ -369,13 +485,12 @@ fn racy_git() { fn compare_blobs<'a, 'b>( &mut self, entry: &Entry, - worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a>, - entry_blob: impl ReadDataOnce<'b>, + worktree_file_size: u64, + data: impl ReadData<'a>, + buf: &mut Vec, ) -> Result, gix_status::index_as_worktree::Error> { self.0.fetch_add(1, Ordering::Relaxed); - self.1 - .compare_blobs(entry, worktree_blob_size, worktree_blob, entry_blob) + self.1.compare_blobs(entry, worktree_file_size, data, buf) } } @@ -401,6 +516,7 @@ fn racy_git() { |_, _| Err(std::io::Error::new(std::io::ErrorKind::Other, "no odb access expected")), &mut gix_features::progress::Discard, Pathspec::default(), + Default::default(), &AtomicBool::default(), Options { fs, @@ -439,6 +555,7 @@ fn racy_git() { |_, _| Err(std::io::Error::new(std::io::ErrorKind::Other, "no odb access expected")), &mut gix_features::progress::Discard, Pathspec::default(), + Default::default(), &AtomicBool::default(), Options { fs, @@ -508,3 +625,9 @@ fn ignore_racyclean(mut out: Outcome) -> Outcome { out.racy_clean = 0; out } + +// This can easily happen in some fixtures, which can cause flakyness. It's time-dependent after all. +fn ignore_updated(mut out: Outcome) -> Outcome { + out.entries_updated = 0; + out +} diff --git a/gix-status/tests/worktree.rs b/gix-status/tests/worktree.rs index d5fbdb9b732..8508f38a9c4 100644 --- a/gix-status/tests/worktree.rs +++ b/gix-status/tests/worktree.rs @@ -1,6 +1,4 @@ pub use gix_testtools::Result; - +mod stack; mod status; use status::*; - -mod stack; From b55a8d58b8bd9e1ba2f9049668c166e75fb0a360 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 3 Oct 2023 20:47:00 +0200 Subject: [PATCH 15/23] feat!: add entries-relative index to each change. That way it's possible to lookup other, surrounding entries in case of conflicts or easily find entries that didn't change. --- gix-status/src/index_as_worktree/function.rs | 53 +++++++-- gix-status/src/index_as_worktree/recorder.rs | 4 + gix-status/src/index_as_worktree/types.rs | 5 +- .../status_submodule.tar.xz | 4 +- gix-status/tests/fixtures/status_submodule.sh | 14 +++ gix-status/tests/status/index_as_worktree.rs | 103 +++++++++++------- 6 files changed, 131 insertions(+), 52 deletions(-) diff --git a/gix-status/src/index_as_worktree/function.rs b/gix-status/src/index_as_worktree/function.rs index bae8e6b7769..beeccf23f0b 100644 --- a/gix-status/src/index_as_worktree/function.rs +++ b/gix-status/src/index_as_worktree/function.rs @@ -1,3 +1,4 @@ +use std::slice::ChunksMut; use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}; use std::{io, marker::PhantomData, path::Path}; @@ -19,7 +20,7 @@ use crate::{ /// Calculates the changes that need to be applied to an `index` to match the state of the `worktree` and makes them /// observable in `collector`, along with information produced by `compare` which gets to see blobs that may have changes, and -/// `submodule` which can take a look at submodules in detail to produce status information. +/// `submodule` which can take a look at submodules in detail to produce status information (BASE version if its conflicting). /// `options` are used to configure the operation. /// /// Note that `index` is updated with the latest seen stat information from the worktree, and its timestamp is adjusted to @@ -85,6 +86,7 @@ where ); let (entries, path_backing) = index.entries_mut_and_pathbacking(); let mut num_entries = entries.len(); + let entry_index_offset = range.start; let entries = &mut entries[range]; let _span = gix_features::trace::detail!("gix_status::index_as_worktree", @@ -137,14 +139,21 @@ where }; in_parallel_if( || true, // TODO: heuristic: when is parallelization not worth it? Git says 500 items per thread, but to 20 threads, we can be more fine-grained though. - gix_features::interrupt::Iter::new(entries.chunks_mut(chunk_size), should_interrupt), + gix_features::interrupt::Iter::new( + OffsetIter { + inner: entries.chunks_mut(chunk_size), + offset: entry_index_offset, + }, + should_interrupt, + ), thread_limit, new_state, - |entries, (state, blobdiff, submdule, find, pathspec)| { + |(entry_offset, entries), (state, blobdiff, submdule, find, pathspec)| { entries .iter_mut() - .filter_map(|entry| { - let res = state.process(entry, pathspec, blobdiff, submdule, find); + .enumerate() + .filter_map(|(entry_index, entry)| { + let res = state.process(entry, entry_offset + entry_index, pathspec, blobdiff, submdule, find); count.fetch_add(1, Ordering::Relaxed); res }) @@ -198,12 +207,22 @@ struct State<'a, 'b> { odb_reads: &'a AtomicUsize, } -type StatusResult<'index, T, U> = Result<(&'index gix_index::Entry, &'index BStr, Option>, bool), Error>; +type StatusResult<'index, T, U> = Result< + ( + &'index gix_index::Entry, + usize, + &'index BStr, + Option>, + bool, + ), + Error, +>; impl<'index> State<'_, 'index> { fn process( &mut self, entry: &'index mut gix_index::Entry, + entry_index: usize, pathspec: &mut impl Pathspec, diff: &mut impl CompareBlobs, submodule: &mut impl SubmoduleStatus, @@ -234,7 +253,7 @@ impl<'index> State<'_, 'index> { return None; } let status = self.compute_status(&mut *entry, path, diff, submodule, find); - Some(status.map(move |status| (&*entry, path, status, conflict))) + Some(status.map(move |status| (&*entry, entry_index, path, status, conflict))) } /// # On how racy-git is handled here @@ -412,8 +431,8 @@ impl<'index, T, U, C: VisitEntry<'index, ContentChange = T, SubmoduleStatus = U> fn feed(&mut self, items: Self::Input) -> Result { for item in items { - let (entry, path, change, conflict) = item?; - self.collector.visit_entry(entry, path, change, conflict); + let (entry, entry_index, path, change, conflict) = item?; + self.collector.visit_entry(entry, entry_index, path, change, conflict); } Ok(()) } @@ -511,3 +530,19 @@ where Ok(out) } } + +struct OffsetIter<'a, T> { + inner: ChunksMut<'a, T>, + offset: usize, +} + +impl<'a, T> Iterator for OffsetIter<'a, T> { + type Item = (usize, &'a mut [T]); + + fn next(&mut self) -> Option { + let block = self.inner.next()?; + let offset = self.offset; + self.offset += block.len(); + Some((offset, block)) + } +} diff --git a/gix-status/src/index_as_worktree/recorder.rs b/gix-status/src/index_as_worktree/recorder.rs index 7f098e86214..6e39f696d8d 100644 --- a/gix-status/src/index_as_worktree/recorder.rs +++ b/gix-status/src/index_as_worktree/recorder.rs @@ -10,6 +10,8 @@ use crate::index_as_worktree::{Change, VisitEntry}; pub struct Record<'index, T, U> { /// The index entry that is changed. pub entry: &'index index::Entry, + /// The index of the `entry` relative to all entries in the input index. + pub entry_index: usize, /// The path to the entry. pub relative_path: &'index BStr, /// The change itself, or `None` if there is only a conflict. @@ -32,6 +34,7 @@ impl<'index, T: Send, U: Send> VisitEntry<'index> for Recorder<'index, T, U> { fn visit_entry( &mut self, entry: &'index index::Entry, + entry_index: usize, rela_path: &'index BStr, change: Option>, conflict: bool, @@ -39,6 +42,7 @@ impl<'index, T: Send, U: Send> VisitEntry<'index> for Recorder<'index, T, U> { if conflict || change.is_some() { self.records.push(Record { entry, + entry_index, relative_path: rela_path, change, conflict, diff --git a/gix-status/src/index_as_worktree/types.rs b/gix-status/src/index_as_worktree/types.rs index b55442b4227..1d295720598 100644 --- a/gix-status/src/index_as_worktree/types.rs +++ b/gix-status/src/index_as_worktree/types.rs @@ -118,12 +118,13 @@ pub trait VisitEntry<'index> { type ContentChange; /// Data obtained when checking the submodule status. type SubmoduleStatus; - /// Observe the `change` of `entry` at the repository-relative `rela_path`, indicating whether - /// or not it has a `conflict`. + /// Observe the `change` of `entry` at the repository-relative `rela_path` at `entry_index` + /// (relative to the complete list of all index entries), indicating whether or not it has a `conflict`. /// If `change` is `None`, there is no change. fn visit_entry( &mut self, entry: &'index gix_index::Entry, + entry_index: usize, rela_path: &'index BStr, change: Option>, conflict: bool, diff --git a/gix-status/tests/fixtures/generated-archives/status_submodule.tar.xz b/gix-status/tests/fixtures/generated-archives/status_submodule.tar.xz index 1c7546c8c07..63a880d3709 100644 --- a/gix-status/tests/fixtures/generated-archives/status_submodule.tar.xz +++ b/gix-status/tests/fixtures/generated-archives/status_submodule.tar.xz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c2d7b75f62872a98ad670d1239e5ff6867b004a84ffacb050c1f4bc06b5a1d17 -size 14876 +oid sha256:18a83500a44dbb5ab3865281645d45f8a81d469cdc7a719c2bb93e92f2fac00e +size 16940 diff --git a/gix-status/tests/fixtures/status_submodule.sh b/gix-status/tests/fixtures/status_submodule.sh index 280612d6a97..a8b6634c5e0 100644 --- a/gix-status/tests/fixtures/status_submodule.sh +++ b/gix-status/tests/fixtures/status_submodule.sh @@ -32,3 +32,17 @@ cp -R no-change empty-dir-no-change rm -Rf m1 mkdir m1 ) + +cp -R no-change conflict +(cd conflict + (cd m1 + git checkout @~1 + ) + + git commit -am "change submodule head" + git checkout -b other @~1 + git rm -rf m1 + git commit -m "removed submodule" + + git merge main || : +) diff --git a/gix-status/tests/status/index_as_worktree.rs b/gix-status/tests/status/index_as_worktree.rs index 3b931252a11..c53b58f7962 100644 --- a/gix-status/tests/status/index_as_worktree.rs +++ b/gix-status/tests/status/index_as_worktree.rs @@ -32,32 +32,29 @@ const TEST_OPTIONS: index::entry::stat::Options = index::entry::stat::Options { }; type Change = WorktreeChange<(), ()>; +type Expectation<'a> = (&'a BStr, usize, Option, bool); -fn fixture(name: &str, expected_status: &[(&BStr, Option, bool)]) -> Outcome { +fn fixture(name: &str, expected_status: &[Expectation<'_>]) -> Outcome { fixture_filtered(name, &[], expected_status) } fn fixture_with_index( name: &str, prepare_index: impl FnMut(&mut gix_index::State), - expected_status: &[(&BStr, Option, bool)], + expected_status: &[Expectation<'_>], ) -> Outcome { fixture_filtered_detailed(name, "", &[], expected_status, prepare_index, false) } -fn submodule_fixture(name: &str, expected_status: &[(&BStr, Option, bool)]) -> Outcome { +fn submodule_fixture(name: &str, expected_status: &[Expectation<'_>]) -> Outcome { fixture_filtered_detailed("status_submodule", name, &[], expected_status, |_| {}, false) } -fn submodule_fixture_status( - name: &str, - expected_status: &[(&BStr, Option, bool)], - submodule_dirty: bool, -) -> Outcome { +fn submodule_fixture_status(name: &str, expected_status: &[Expectation<'_>], submodule_dirty: bool) -> Outcome { fixture_filtered_detailed("status_submodule", name, &[], expected_status, |_| {}, submodule_dirty) } -fn fixture_filtered(name: &str, pathspecs: &[&str], expected_status: &[(&BStr, Option, bool)]) -> Outcome { +fn fixture_filtered(name: &str, pathspecs: &[&str], expected_status: &[Expectation<'_>]) -> Outcome { fixture_filtered_detailed(name, "", pathspecs, expected_status, |_| {}, false) } @@ -65,7 +62,7 @@ fn fixture_filtered_detailed( name: &str, subdir: &str, pathspecs: &[&str], - expected_status: &[(&BStr, Option, bool)], + expected_status: &[Expectation<'_>], mut prepare_index: impl FnMut(&mut gix_index::State), submodule_dirty: bool, ) -> Outcome { @@ -100,12 +97,10 @@ fn fixture_filtered_detailed( outcome } -fn records_to_tuple<'index>( - records: impl IntoIterator>, -) -> Vec<(&'index BStr, Option, bool)> { +fn records_to_tuple<'index>(records: impl IntoIterator>) -> Vec> { records .into_iter() - .map(|r| (r.relative_path, r.change, r.conflict)) + .map(|r| (r.relative_path, r.entry_index, r.change, r.conflict)) .collect() } @@ -135,10 +130,10 @@ fn removed() { let out = fixture( "status_removed", &[ - (BStr::new(b"dir/content"), Some(Change::Removed), NO_CONFLICT), - (BStr::new(b"dir/sub-dir/symlink"), Some(Change::Removed), NO_CONFLICT), - (BStr::new(b"empty"), Some(Change::Removed), NO_CONFLICT), - (BStr::new(b"executable"), Some(Change::Removed), NO_CONFLICT), + (BStr::new(b"dir/content"), 0, Some(Change::Removed), NO_CONFLICT), + (BStr::new(b"dir/sub-dir/symlink"), 1, Some(Change::Removed), NO_CONFLICT), + (BStr::new(b"empty"), 2, Some(Change::Removed), NO_CONFLICT), + (BStr::new(b"executable"), 3, Some(Change::Removed), NO_CONFLICT), ], ); assert_eq!( @@ -155,8 +150,8 @@ fn removed() { "status_removed", &["dir"], &[ - (BStr::new(b"dir/content"), Some(Change::Removed), NO_CONFLICT), - (BStr::new(b"dir/sub-dir/symlink"), Some(Change::Removed), NO_CONFLICT), + (BStr::new(b"dir/content"), 0, Some(Change::Removed), NO_CONFLICT), + (BStr::new(b"dir/sub-dir/symlink"), 1, Some(Change::Removed), NO_CONFLICT), ], ); assert_eq!( @@ -192,7 +187,7 @@ fn subomdule_deleted_dir() { assert_eq!( ignore_racyclean(submodule_fixture( "deleted-dir", - &[(BStr::new(b"m1"), Some(Change::Removed), NO_CONFLICT)] + &[(BStr::new(b"m1"), 1, Some(Change::Removed), NO_CONFLICT)] )), Outcome { entries_to_process: 2, @@ -211,7 +206,7 @@ fn subomdule_typechange() { assert_eq!( ignore_racyclean(submodule_fixture( "type-change", - &[(BStr::new(b"m1"), Some(Change::Type), NO_CONFLICT)] + &[(BStr::new(b"m1"), 1, Some(Change::Type), NO_CONFLICT)] )), Outcome { entries_to_process: 2, @@ -246,7 +241,12 @@ fn subomdule_empty_dir_no_change_is_passed_to_submodule_handler() { assert_eq!( ignore_racyclean(submodule_fixture_status( "empty-dir-no-change", - &[(BStr::new(b"m1"), Some(Change::SubmoduleModification(())), NO_CONFLICT)], + &[( + BStr::new(b"m1"), + 1, + Some(Change::SubmoduleModification(())), + NO_CONFLICT + )], true, )), Outcome { @@ -266,7 +266,7 @@ fn intent_to_add() { assert_eq!( fixture( "status_intent_to_add", - &[(BStr::new(b"content"), Some(Change::IntentToAdd), NO_CONFLICT)], + &[(BStr::new(b"content"), 0, Some(Change::IntentToAdd), NO_CONFLICT)], ), Outcome { entries_to_process: 1, @@ -284,6 +284,7 @@ fn conflict() { "status_conflict", &[( BStr::new(b"content"), + 0, Some(Change::Modification { executable_bit_changed: false, content_change: Some(()), @@ -302,6 +303,20 @@ fn conflict() { ); } +#[test] +fn submodule_conflict() { + assert_eq!( + submodule_fixture("conflict", &[(BStr::new(b"m1"), 1, None, true)]), + Outcome { + entries_to_process: 3, + entries_processed: 3, + symlink_metadata_calls: 2, + ..Default::default() + }, + "submodule status is still called for OUR side of an entry, but never for THEIRS" + ); +} + #[test] fn unchanged() { fixture("status_unchanged", &[]); @@ -326,6 +341,7 @@ fn refresh() { &[ ( BStr::new(b"dir/content"), + 0, Some(Change::Modification { executable_bit_changed: true, content_change: None, @@ -334,15 +350,17 @@ fn refresh() { ), ( BStr::new(b"dir/content2"), + 1, Some(Change::Modification { executable_bit_changed: false, content_change: Some(()), }), NO_CONFLICT, ), - (BStr::new(b"empty"), Some(Change::Type), NO_CONFLICT), + (BStr::new(b"empty"), 3, Some(Change::Type), NO_CONFLICT), ( BStr::new(b"executable"), + 4, Some(Change::Modification { executable_bit_changed: true, content_change: Some(()), @@ -354,6 +372,7 @@ fn refresh() { &[ ( BStr::new("dir/content2"), + 1, Some(Change::Modification { executable_bit_changed: false, content_change: Some(()) @@ -362,6 +381,7 @@ fn refresh() { ), ( BStr::new("empty"), + 3, Some(Change::Modification { executable_bit_changed: false, content_change: Some(()) @@ -370,6 +390,7 @@ fn refresh() { ), ( BStr::new("executable"), + 4, Some(Change::Modification { executable_bit_changed: false, content_change: Some(()) @@ -384,29 +405,19 @@ fn refresh() { #[test] fn modified() { - #[cfg(not(windows))] - let expected_outcome = Outcome { - entries_to_process: 5, - entries_processed: 5, - symlink_metadata_calls: 5, - worktree_files_read: 2, - worktree_bytes: 23, - ..Default::default() - }; - #[cfg(windows)] let expected_outcome = Outcome { entries_to_process: 5, entries_processed: 5, symlink_metadata_calls: 5, ..Default::default() }; - let actual_outcome = fixture( "status_changed", #[cfg(not(windows))] &[ ( BStr::new(b"dir/content"), + 0, Some(Change::Modification { executable_bit_changed: true, content_change: None, @@ -415,15 +426,17 @@ fn modified() { ), ( BStr::new(b"dir/content2"), + 1, Some(Change::Modification { executable_bit_changed: false, content_change: Some(()), }), NO_CONFLICT, ), - (BStr::new(b"empty"), Some(Change::Type), NO_CONFLICT), + (BStr::new(b"empty"), 3, Some(Change::Type), NO_CONFLICT), ( BStr::new(b"executable"), + 4, Some(Change::Modification { executable_bit_changed: true, content_change: Some(()), @@ -435,6 +448,7 @@ fn modified() { &[ ( BStr::new("dir/content2"), + 1, Some(Change::Modification { executable_bit_changed: false, content_change: Some(()), @@ -443,6 +457,7 @@ fn modified() { ), ( BStr::new("empty"), + 3, Some(Change::Modification { executable_bit_changed: false, content_change: Some(()), @@ -451,6 +466,7 @@ fn modified() { ), ( BStr::new("executable"), + 4, Some(Change::Modification { executable_bit_changed: false, content_change: Some(()), @@ -459,7 +475,10 @@ fn modified() { ), ], ); - assert_eq!(ignore_updated(ignore_racyclean(actual_outcome)), expected_outcome,); + assert_eq!( + ignore_worktree_stats(ignore_updated(ignore_racyclean(actual_outcome))), + expected_outcome, + ); } const NO_CONFLICT: bool = false; @@ -585,6 +604,7 @@ fn racy_git() { records_to_tuple(recorder.records), &[( BStr::new(b"content"), + 0, Some(Change::Modification { executable_bit_changed: false, content_change: Some(()), @@ -626,8 +646,13 @@ fn ignore_racyclean(mut out: Outcome) -> Outcome { out } -// This can easily happen in some fixtures, which can cause flakyness. It's time-dependent after all. fn ignore_updated(mut out: Outcome) -> Outcome { out.entries_updated = 0; out } + +fn ignore_worktree_stats(mut out: Outcome) -> Outcome { + out.worktree_bytes = 0; + out.worktree_files_read = 0; + out +} From 60c948f55ec432ab40b826a9ce8cb3d8fe15a543 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 4 Oct 2023 09:38:10 +0200 Subject: [PATCH 16/23] feat!: replace `conflict` marker with detailed decoding of stages. We also adjust the returned data structure to allow the input to be immutable, which delegates entry updates to the caller. This also paves the way for rename tracking, which requires free access to entries for searching renames among the added and removed items, and/or copies among the added ones. --- gix-status/src/index_as_worktree/function.rs | 231 ++++++++---- gix-status/src/index_as_worktree/mod.rs | 2 +- gix-status/src/index_as_worktree/recorder.rs | 29 +- gix-status/src/index_as_worktree/types.rs | 76 +++- gix-status/tests/fixtures/conflicts.sh | 67 ++++ .../generated-archives/conflicts.tar.xz | 3 + gix-status/tests/status/index_as_worktree.rs | 335 +++++++++++------- 7 files changed, 506 insertions(+), 237 deletions(-) create mode 100644 gix-status/tests/fixtures/conflicts.sh create mode 100644 gix-status/tests/fixtures/generated-archives/conflicts.tar.xz diff --git a/gix-status/src/index_as_worktree/function.rs b/gix-status/src/index_as_worktree/function.rs index beeccf23f0b..b8ef0c7e4c5 100644 --- a/gix-status/src/index_as_worktree/function.rs +++ b/gix-status/src/index_as_worktree/function.rs @@ -1,6 +1,6 @@ -use std::slice::ChunksMut; +use std::slice::Chunks; use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}; -use std::{io, marker::PhantomData, path::Path}; +use std::{io, path::Path}; use bstr::BStr; use filetime::FileTime; @@ -8,6 +8,7 @@ use gix_features::parallel::{in_parallel_if, Reduce}; use gix_filter::pipeline::convert::ToGitOutcome; use crate::index_as_worktree::traits::read_data::Stream; +use crate::index_as_worktree::{Conflict, EntryStatus}; use crate::{ index_as_worktree::{ traits, @@ -23,26 +24,29 @@ use crate::{ /// `submodule` which can take a look at submodules in detail to produce status information (BASE version if its conflicting). /// `options` are used to configure the operation. /// -/// Note that `index` is updated with the latest seen stat information from the worktree, and its timestamp is adjusted to -/// the current time for which it will be considered fresh as long as it is included which depends on `pathspec`. +/// Note that `index` may require changes to be up-to-date with the working tree and avoid expensive computations by updating respective entries +/// with stat information from the worktree, and its timestamp is adjusted to the current time for which it will be considered fresh +/// as long as it is included which depends on `pathspec`. All this is delegated to the caller. /// /// `should_interrupt` can be used to stop all processing. /// `filter` is used to convert worktree files back to their internal git representation. For this to be correct, /// [`Options::attributes`] must be configured as well. /// +/// **It's important to note that the `index` should have its [timestamp updated](gix_index::State::set_timestamp()) with a timestamp +/// from just before making this call *if* [entries were updated](Outcome::entries_to_update)** +/// /// ### Note /// -/// Technically, this function does more as this also provides additional information, like whether a file has conflicts, -/// and files that were added with `git add` are shown as a special as well. It also updates index entry stats like `git status` would -/// if it had to determine the hash. If that happened, the index should be written back, see [Outcome::skipped] -/// The latter is a 'change' that is not technically requiring a change to the index since `git add` already added the -/// file to the index, but didn't hash it. +/// Technically, this function does more as it also provides additional information, like whether a file has conflicts, +/// and files that were added with `git add` are shown as a special as well. It also provides updates to entry filesystem +/// stats like `git status` would if it had to determine the hash. +/// If that happened, the index should be written back after updating the entries with these updated stats, see [Outcome::skipped]. /// /// Thus some care has to be taken to do the right thing when letting the index match the worktree by evaluating the changes observed /// by the `collector`. #[allow(clippy::too_many_arguments)] pub fn index_as_worktree<'index, T, U, Find, E1, E2>( - index: &'index mut gix_index::State, + index: &'index gix_index::State, worktree: &Path, collector: &mut impl VisitEntry<'index, ContentChange = T, SubmoduleStatus = U>, compare: impl CompareBlobs + Send + Clone, @@ -65,7 +69,6 @@ where // (modified at or after the last index update) during the index update we then set those // entries size to 0 (see below) to ensure they keep showing up as racy and reset the timestamp. let timestamp = index.timestamp(); - index.set_timestamp(FileTime::now()); let (chunk_size, thread_limit, _) = gix_features::parallel::optimize_chunk_size_and_thread_limit( 500, // just like git index.entries().len().into(), @@ -84,10 +87,10 @@ where index, index.path_backing(), ); - let (entries, path_backing) = index.entries_mut_and_pathbacking(); + let (entries, path_backing) = (index.entries(), index.path_backing()); let mut num_entries = entries.len(); let entry_index_offset = range.start; - let entries = &mut entries[range]; + let entries = &entries[range]; let _span = gix_features::trace::detail!("gix_status::index_as_worktree", num_entries = entries.len(), @@ -95,7 +98,7 @@ where thread_limit = ?thread_limit); let entries_skipped_by_common_prefix = num_entries - entries.len(); - let (skipped_by_pathspec, skipped_by_entry_flags, symlink_metadata_calls, entries_updated) = Default::default(); + let (skipped_by_pathspec, skipped_by_entry_flags, symlink_metadata_calls, entries_to_update) = Default::default(); let (worktree_bytes, worktree_reads, odb_bytes, odb_reads, racy_clean) = Default::default(); num_entries = entries.len(); @@ -105,7 +108,7 @@ where let new_state = { let options = &options; let (skipped_by_pathspec, skipped_by_entry_flags) = (&skipped_by_pathspec, &skipped_by_entry_flags); - let (symlink_metadata_calls, entries_updated) = (&symlink_metadata_calls, &entries_updated); + let (symlink_metadata_calls, entries_to_update) = (&symlink_metadata_calls, &entries_to_update); let (racy_clean, worktree_bytes) = (&racy_clean, &worktree_bytes); let (worktree_reads, odb_bytes, odb_reads) = (&worktree_reads, &odb_bytes, &odb_reads); move |_| { @@ -123,7 +126,7 @@ where skipped_by_pathspec, skipped_by_entry_flags, symlink_metadata_calls, - entries_updated, + entries_to_update, racy_clean, worktree_reads, worktree_bytes, @@ -141,27 +144,57 @@ where || true, // TODO: heuristic: when is parallelization not worth it? Git says 500 items per thread, but to 20 threads, we can be more fine-grained though. gix_features::interrupt::Iter::new( OffsetIter { - inner: entries.chunks_mut(chunk_size), + inner: entries.chunks(chunk_size), offset: entry_index_offset, }, should_interrupt, ), thread_limit, new_state, - |(entry_offset, entries), (state, blobdiff, submdule, find, pathspec)| { - entries - .iter_mut() - .enumerate() - .filter_map(|(entry_index, entry)| { - let res = state.process(entry, entry_offset + entry_index, pathspec, blobdiff, submdule, find); - count.fetch_add(1, Ordering::Relaxed); - res - }) - .collect() + |(entry_offset, chunk_entries), (state, blobdiff, submdule, find, pathspec)| { + let all_entries = index.entries(); + let mut out = Vec::new(); + let mut idx = 0; + while let Some(entry) = chunk_entries.get(idx) { + let absolute_entry_index = entry_offset + idx; + if idx == 0 && entry.stage() != 0 { + let offset = entry_offset.checked_sub(1).and_then(|prev_idx| { + let prev_entry = &all_entries[prev_idx]; + let entry_path = entry.path_in(state.path_backing); + if prev_entry.stage() == 0 || prev_entry.path_in(state.path_backing) != entry_path { + // prev_entry (in previous chunk) does not belong to our conflict + return None; + } + Conflict::try_from_entry(all_entries, state.path_backing, absolute_entry_index, entry_path) + .map(|(_conflict, offset)| offset) + }); + if let Some(entries_to_skip_as_conflict_originates_in_previous_chunk) = offset { + // skip current entry as it's done, along with following conflict entries + idx += entries_to_skip_as_conflict_originates_in_previous_chunk + 1; + continue; + } + } + let res = state.process( + all_entries, + entry, + absolute_entry_index, + pathspec, + blobdiff, + submdule, + find, + &mut idx, + ); + idx += 1; + count.fetch_add(1, Ordering::Relaxed); + if let Some(res) = res { + out.push(res); + } + } + out }, ReduceChange { collector, - phantom: PhantomData, + entries: index.entries(), }, )?; @@ -171,7 +204,7 @@ where entries_skipped_by_common_prefix, entries_skipped_by_pathspec: skipped_by_pathspec.load(Ordering::Relaxed), entries_skipped_by_entry_flags: skipped_by_entry_flags.load(Ordering::Relaxed), - entries_updated: entries_updated.load(Ordering::Relaxed), + entries_to_update: entries_to_update.load(Ordering::Relaxed), symlink_metadata_calls: symlink_metadata_calls.load(Ordering::Relaxed), racy_clean: racy_clean.load(Ordering::Relaxed), worktree_files_read: worktree_reads.load(Ordering::Relaxed), @@ -193,13 +226,13 @@ struct State<'a, 'b> { /// requires attributes to drive the filter configuration. attr_stack: gix_worktree::Stack, filter: gix_filter::Pipeline, - path_backing: &'b [u8], + path_backing: &'b gix_index::PathStorageRef, options: &'a Options, skipped_by_pathspec: &'a AtomicUsize, skipped_by_entry_flags: &'a AtomicUsize, symlink_metadata_calls: &'a AtomicUsize, - entries_updated: &'a AtomicUsize, + entries_to_update: &'a AtomicUsize, racy_clean: &'a AtomicUsize, worktree_bytes: &'a AtomicU64, worktree_reads: &'a AtomicUsize, @@ -207,37 +240,26 @@ struct State<'a, 'b> { odb_reads: &'a AtomicUsize, } -type StatusResult<'index, T, U> = Result< - ( - &'index gix_index::Entry, - usize, - &'index BStr, - Option>, - bool, - ), - Error, ->; +type StatusResult<'index, T, U> = Result<(&'index gix_index::Entry, usize, &'index BStr, EntryStatus), Error>; impl<'index> State<'_, 'index> { + #[allow(clippy::too_many_arguments)] fn process( &mut self, - entry: &'index mut gix_index::Entry, + entries: &'index [gix_index::Entry], + entry: &'index gix_index::Entry, entry_index: usize, pathspec: &mut impl Pathspec, diff: &mut impl CompareBlobs, submodule: &mut impl SubmoduleStatus, find: &mut Find, + outer_entry_index: &mut usize, ) -> Option> where E1: std::error::Error + Send + Sync + 'static, E2: std::error::Error + Send + Sync + 'static, Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E1>, { - let conflict = match entry.stage() { - 0 => false, - 1 => true, - _ => return None, - }; if entry.flags.intersects( gix_index::entry::Flags::UPTODATE | gix_index::entry::Flags::SKIP_WORKTREE @@ -252,8 +274,21 @@ impl<'index> State<'_, 'index> { self.skipped_by_pathspec.fetch_add(1, Ordering::Relaxed); return None; } - let status = self.compute_status(&mut *entry, path, diff, submodule, find); - Some(status.map(move |status| (&*entry, entry_index, path, status, conflict))) + let status = if entry.stage() != 0 { + Ok( + Conflict::try_from_entry(entries, self.path_backing, entry_index, path).map(|(conflict, offset)| { + *outer_entry_index += offset; // let out loop skip over entries related to the conflict + EntryStatus::Conflict(conflict) + }), + ) + } else { + self.compute_status(entry, path, diff, submodule, find) + }; + match status { + Ok(None) => None, + Ok(Some(status)) => Some(Ok((entry, entry_index, path, status))), + Err(err) => Some(Err(err)), + } } /// # On how racy-git is handled here @@ -297,12 +332,12 @@ impl<'index> State<'_, 'index> { /// Adapted from [here](https://github.com/Byron/gitoxide/pull/805#discussion_r1164676777). fn compute_status( &mut self, - entry: &mut gix_index::Entry, + entry: &gix_index::Entry, rela_path: &BStr, diff: &mut impl CompareBlobs, submodule: &mut impl SubmoduleStatus, find: &mut Find, - ) -> Result>, Error> + ) -> Result>, Error> where E1: std::error::Error + Send + Sync + 'static, E2: std::error::Error + Send + Sync + 'static, @@ -310,7 +345,7 @@ impl<'index> State<'_, 'index> { { let worktree_path = match self.path_stack.verified_path(gix_path::from_bstr(rela_path).as_ref()) { Ok(path) => path, - Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Some(Change::Removed)), + Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Some(Change::Removed.into())), Err(err) => return Err(Error::Io(err)), }; self.symlink_metadata_calls.fetch_add(1, Ordering::Relaxed); @@ -327,19 +362,19 @@ impl<'index> State<'_, 'index> { rela_path: rela_path.into(), source: Box::new(err), })?; - return Ok(status.map(|status| Change::SubmoduleModification(status))); + return Ok(status.map(|status| Change::SubmoduleModification(status).into())); } else { - return Ok(Some(Change::Removed)); + return Ok(Some(Change::Removed.into())); } } Ok(metadata) => metadata, - Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Some(Change::Removed)), + Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Some(Change::Removed.into())), Err(err) => { return Err(err.into()); } }; if entry.flags.contains(gix_index::entry::Flags::INTENT_TO_ADD) { - return Ok(Some(Change::IntentToAdd)); + return Ok(Some(EntryStatus::IntentToAdd)); } let new_stat = gix_index::entry::Stat::from_fs(&metadata)?; let executable_bit_changed = @@ -347,7 +382,7 @@ impl<'index> State<'_, 'index> { .mode .change_to_match_fs(&metadata, self.options.fs.symlink, self.options.fs.executable_bit) { - Some(gix_index::entry::mode::Change::Type { .. }) => return Ok(Some(Change::Type)), + Some(gix_index::entry::mode::Change::Type { .. }) => return Ok(Some(Change::Type.into())), Some(gix_index::entry::mode::Change::ExecutableBit) => true, None => false, }; @@ -396,26 +431,26 @@ impl<'index> State<'_, 'index> { }; let content_change = diff.compare_blobs(entry, metadata.len(), fetch_data, &mut self.buf2)?; // This file is racy clean! Set the size to 0 so we keep detecting this as the file is updated. - if content_change.is_some() && racy_clean { - entry.stat.size = 0; - } if content_change.is_some() || executable_bit_changed { - Ok(Some(Change::Modification { - executable_bit_changed, - content_change, - })) + let set_entry_stat_size_zero = content_change.is_some() && racy_clean; + Ok(Some( + Change::Modification { + executable_bit_changed, + content_change, + set_entry_stat_size_zero, + } + .into(), + )) } else { - // don't diff against this file next time since we know the file is unchanged. - entry.stat = new_stat; - self.entries_updated.fetch_add(1, Ordering::Relaxed); - Ok(None) + self.entries_to_update.fetch_add(1, Ordering::Relaxed); + Ok(Some(EntryStatus::NeedsUpdate(new_stat))) } } } struct ReduceChange<'a, 'index, T: VisitEntry<'index>> { collector: &'a mut T, - phantom: PhantomData, + entries: &'index [gix_index::Entry], } impl<'index, T, U, C: VisitEntry<'index, ContentChange = T, SubmoduleStatus = U>> Reduce @@ -431,8 +466,9 @@ impl<'index, T, U, C: VisitEntry<'index, ContentChange = T, SubmoduleStatus = U> fn feed(&mut self, items: Self::Input) -> Result { for item in items { - let (entry, entry_index, path, change, conflict) = item?; - self.collector.visit_entry(entry, entry_index, path, change, conflict); + let (entry, entry_index, path, status) = item?; + self.collector + .visit_entry(self.entries, entry, entry_index, path, status); } Ok(()) } @@ -532,12 +568,12 @@ where } struct OffsetIter<'a, T> { - inner: ChunksMut<'a, T>, + inner: Chunks<'a, T>, offset: usize, } impl<'a, T> Iterator for OffsetIter<'a, T> { - type Item = (usize, &'a mut [T]); + type Item = (usize, &'a [T]); fn next(&mut self) -> Option { let block = self.inner.next()?; @@ -546,3 +582,52 @@ impl<'a, T> Iterator for OffsetIter<'a, T> { Some((offset, block)) } } + +impl Conflict { + /// Given `entries` and `path_backing`, both values obtained from an [index](gix_index::State), use `start_index` and enumerate + /// all conflict stages that still match `entry_path` to produce a conflict description. + /// Also return the amount of extra-entries that were part of the conflict declaration (not counting the entry at `start_index`) + /// + /// If for some reason entry at `start_index` isn't in conflicting state, `None` is returned. + pub fn try_from_entry( + entries: &[gix_index::Entry], + path_backing: &gix_index::PathStorageRef, + start_index: usize, + entry_path: &BStr, + ) -> Option<(Self, usize)> { + use Conflict::*; + let mut mask = None::; + + let mut count = 0_usize; + for stage in (start_index..(start_index + 3).min(entries.len())).filter_map(|idx| { + let entry = &entries[idx]; + let stage = entry.stage(); + (stage > 0 && entry.path_in(path_backing) == entry_path).then_some(stage) + }) { + // This could be `1 << (stage - 1)` but let's be specific. + *mask.get_or_insert(0) |= match stage { + 1 => 0b001, + 2 => 0b010, + 3 => 0b100, + _ => 0, + }; + count += 1; + } + + mask.map(|mask| { + ( + match mask { + 0b001 => BothDeleted, + 0b010 => AddedByUs, + 0b011 => DeletedByThem, + 0b100 => AddedByThem, + 0b101 => DeletedByUs, + 0b110 => BothAdded, + 0b111 => BothModified, + _ => unreachable!("BUG: bitshifts and typical entry layout doesn't allow for more"), + }, + count - 1, + ) + }) + } +} diff --git a/gix-status/src/index_as_worktree/mod.rs b/gix-status/src/index_as_worktree/mod.rs index 9fe169c05de..412068875d0 100644 --- a/gix-status/src/index_as_worktree/mod.rs +++ b/gix-status/src/index_as_worktree/mod.rs @@ -1,7 +1,7 @@ //! Changes between an index and a worktree. /// mod types; -pub use types::{Change, Error, Options, Outcome, VisitEntry}; +pub use types::{Change, Conflict, EntryStatus, Error, Options, Outcome, VisitEntry}; mod recorder; pub use recorder::{Record, Recorder}; diff --git a/gix-status/src/index_as_worktree/recorder.rs b/gix-status/src/index_as_worktree/recorder.rs index 6e39f696d8d..0cf1aa6f367 100644 --- a/gix-status/src/index_as_worktree/recorder.rs +++ b/gix-status/src/index_as_worktree/recorder.rs @@ -1,7 +1,7 @@ use bstr::BStr; use gix_index as index; -use crate::index_as_worktree::{Change, VisitEntry}; +use crate::index_as_worktree::{EntryStatus, VisitEntry}; /// A record of a change. /// @@ -14,10 +14,8 @@ pub struct Record<'index, T, U> { pub entry_index: usize, /// The path to the entry. pub relative_path: &'index BStr, - /// The change itself, or `None` if there is only a conflict. - pub change: Option>, - /// information about the conflict itself - pub conflict: bool, + /// The status information itself. + pub status: EntryStatus, } /// Convenience implementation of [`VisitEntry`] that collects all non-trivial changes into a `Vec`. @@ -33,20 +31,17 @@ impl<'index, T: Send, U: Send> VisitEntry<'index> for Recorder<'index, T, U> { fn visit_entry( &mut self, + _entries: &'index [index::Entry], entry: &'index index::Entry, entry_index: usize, - rela_path: &'index BStr, - change: Option>, - conflict: bool, + relative_path: &'index BStr, + status: EntryStatus, ) { - if conflict || change.is_some() { - self.records.push(Record { - entry, - entry_index, - relative_path: rela_path, - change, - conflict, - }) - } + self.records.push(Record { + entry, + entry_index, + relative_path, + status, + }) } } diff --git a/gix-status/src/index_as_worktree/types.rs b/gix-status/src/index_as_worktree/types.rs index 1d295720598..daad11f27ad 100644 --- a/gix-status/src/index_as_worktree/types.rs +++ b/gix-status/src/index_as_worktree/types.rs @@ -53,10 +53,12 @@ pub struct Outcome { pub entries_skipped_by_entry_flags: usize, /// The amount of times we queried symlink-metadata for a file on disk. pub symlink_metadata_calls: usize, - /// The amount of entries whose stats have been updated as its modification couldn't be determined without an expensive calculation. + /// The amount of entries whose stats would need to be updated as its modification couldn't be determined without + /// an expensive calculation. /// /// With these updates, this calculation will be avoided next time the status runs. - pub entries_updated: usize, + /// Note that the stat updates are delegated to the caller. + pub entries_to_update: usize, /// The amount of entries that were considered racy-clean - they will need thorough checking to see if they are truly clean, /// i.e. didn't change. pub racy_clean: usize, @@ -79,7 +81,7 @@ impl Outcome { } /// How an index entry needs to be changed to obtain the destination worktree state, i.e. `entry.apply(this_change) == worktree-entry`. -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] pub enum Change { /// This corresponding file does not exist in the worktree anymore. Removed, @@ -95,6 +97,11 @@ pub enum Change { /// If there is no content change and only the executable bit /// changed then this is `None`. content_change: Option, + /// If true, the caller is expected to set [entry.stat.size = 0](gix_index::entry::Stat::size) to assure this + /// otherwise racily clean entry can still be detected as dirty next time this is called, but this time without + /// reading it from disk to hash it. It's a performance optimization and not doing so won't change the correctness + /// of the operation. + set_entry_stat_size_zero: bool, }, /// A submodule is initialized and checked out, and there was modification to either: /// @@ -105,28 +112,73 @@ pub enum Change { /// The exact nature of the modification is handled by the caller which may retain information per submodule or /// re-compute details as needed when seeing this variant. SubmoduleModification(U), - /// An index entry that correspond to an untracked worktree file marked with `git add --intent-to-add`. +} + +/// Information about an entry. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub enum EntryStatus { + /// The entry is in a conflicting state, and we didn't collect any more information about it. + Conflict(Conflict), + /// There is no conflict and a change was discovered. + Change(Change), + /// The entry didn't change, but its state caused extra work that can be avoided next time if its stats would be updated to the + /// given stat. + NeedsUpdate( + /// The new stats which represent what's currently in the working tree. If these replace the current stats in the entry, + /// next time this operation runs we can determine the actual state much faster. + gix_index::entry::Stat, + ), + /// An index entry that corresponds to an untracked worktree file marked with `git add --intent-to-add`. /// - /// This means it's not available in the object database yet - /// even though now an entry exists that represents the worktree file. + /// This means it's not available in the object database yet even though now an entry exists that represents the worktree file. + /// The entry represents the promise of adding a new file, no matter the actual stat or content. + /// Effectively this means nothing changed. + /// This also means the file is still present, and that no detailed change checks were performed. IntentToAdd, } -/// Observe changes by comparing an index entry to the worktree or another index. +impl From> for EntryStatus { + fn from(value: Change) -> Self { + EntryStatus::Change(value) + } +} + +/// Describes a conflicting entry as comparison between 'our' version and 'their' version of it. +/// +/// If one side isn't specified, it is assumed to have modified the entry. In general, there would be no conflict +/// if both parties ended up in the same state. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub enum Conflict { + /// Both deleted a different version of the entry. + BothDeleted, + /// We added, they modified, ending up in different states. + AddedByUs, + /// They deleted the entry, we modified it. + DeletedByThem, + /// They added the entry, we modified it, ending up in different states. + AddedByThem, + /// We deleted the entry, they modified it, ending up in different states. + DeletedByUs, + /// Both added the entry in different states. + BothAdded, + /// Both modified the entry, ending up in different states. + BothModified, +} + +/// Observe the status of an entry by comparing an index entry to the worktree. pub trait VisitEntry<'index> { /// Data generated by comparing an entry with a file. type ContentChange; /// Data obtained when checking the submodule status. type SubmoduleStatus; - /// Observe the `change` of `entry` at the repository-relative `rela_path` at `entry_index` - /// (relative to the complete list of all index entries), indicating whether or not it has a `conflict`. - /// If `change` is `None`, there is no change. + /// Observe the `status` of `entry` at the repository-relative `rela_path` at `entry_index` + /// (for accessing `entry` and surrounding in the complete list of `entries`). fn visit_entry( &mut self, + entries: &'index [gix_index::Entry], entry: &'index gix_index::Entry, entry_index: usize, rela_path: &'index BStr, - change: Option>, - conflict: bool, + status: EntryStatus, ); } diff --git a/gix-status/tests/fixtures/conflicts.sh b/gix-status/tests/fixtures/conflicts.sh new file mode 100644 index 00000000000..682a214cf99 --- /dev/null +++ b/gix-status/tests/fixtures/conflicts.sh @@ -0,0 +1,67 @@ +#!/bin/bash +set -eu -o pipefail + +(git init both-deleted && cd both-deleted + echo test > file + git add file && git commit -m file && + git branch alt && git mv file added-by-them + git commit -m "file renamed in added-by-them" && git checkout alt + git mv file added-by-us + git commit -m "file renamed in added-by-us" + git reset --hard alt + git merge main || : +) + +(git init deleted-by-us && cd deleted-by-us + git init + >file && git add file && git commit -m "initial" + echo change >> file && git commit -am "modify" + git checkout -b side HEAD^ + git rm file + git commit -m delete + git merge main || : +) + +(git init deleted-by-them && cd deleted-by-them + echo "This is some content." > file + git add file + git commit -m "Initial commit" + git checkout -b conflict + git rm file + git commit -m "Delete file in feature branch" + git checkout main + echo "Modified by main branch." >> file + git add file + git commit -m "Modified file in main branch" + git merge conflict || : +) + +(git init both-modified && cd both-modified + git init + > file && git add file && git commit -m "init" + + git checkout -b conflict + echo conflicting >> file && git commit -am "alt-change" + + git checkout main + echo other >> file && git commit -am "change" + + git merge conflict || : +) + +(git init both-added && cd both-added + git init + set -x + echo init >> deleted-by-them && git add . && git commit -m "init" + + git checkout -b second_branch + git rm deleted-by-them + git commit -m "deleted-by-them deleted on second_branch" + echo second > both-added && git add . && git commit -m second + + git checkout main + echo on_second > deleted-by-them && git commit -am "on second" + echo main > both-added && git add . && git commit -m main + + git merge second_branch || : +) diff --git a/gix-status/tests/fixtures/generated-archives/conflicts.tar.xz b/gix-status/tests/fixtures/generated-archives/conflicts.tar.xz new file mode 100644 index 00000000000..fcbf3888bc2 --- /dev/null +++ b/gix-status/tests/fixtures/generated-archives/conflicts.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:855ef3e8f1cf518daab18a572ad9dc4a7c532a213d1b9f8a78f32f5346b518a2 +size 19036 diff --git a/gix-status/tests/status/index_as_worktree.rs b/gix-status/tests/status/index_as_worktree.rs index c53b58f7962..49c264d9b95 100644 --- a/gix-status/tests/status/index_as_worktree.rs +++ b/gix-status/tests/status/index_as_worktree.rs @@ -9,7 +9,7 @@ use filetime::{set_file_mtime, FileTime}; use gix_index as index; use gix_index::Entry; use gix_status::index_as_worktree::traits::SubmoduleStatus; -use gix_status::index_as_worktree::{Outcome, Record}; +use gix_status::index_as_worktree::{Conflict, EntryStatus as WorktreeEntryStatus, Outcome, Record}; use gix_status::{ index_as_worktree, index_as_worktree::{ @@ -32,7 +32,8 @@ const TEST_OPTIONS: index::entry::stat::Options = index::entry::stat::Options { }; type Change = WorktreeChange<(), ()>; -type Expectation<'a> = (&'a BStr, usize, Option, bool); +type EntryStatus = WorktreeEntryStatus<(), ()>; +type Expectation<'a> = (&'a BStr, usize, EntryStatus); fn fixture(name: &str, expected_status: &[Expectation<'_>]) -> Outcome { fixture_filtered(name, &[], expected_status) @@ -50,6 +51,10 @@ fn submodule_fixture(name: &str, expected_status: &[Expectation<'_>]) -> Outcome fixture_filtered_detailed("status_submodule", name, &[], expected_status, |_| {}, false) } +fn conflict_fixture(name: &str, expected_status: &[Expectation<'_>]) -> Outcome { + fixture_filtered_detailed("conflicts", name, &[], expected_status, |_| {}, false) +} + fn submodule_fixture_status(name: &str, expected_status: &[Expectation<'_>], submodule_dirty: bool) -> Outcome { fixture_filtered_detailed("status_submodule", name, &[], expected_status, |_| {}, submodule_dirty) } @@ -66,6 +71,23 @@ fn fixture_filtered_detailed( mut prepare_index: impl FnMut(&mut gix_index::State), submodule_dirty: bool, ) -> Outcome { + // This can easily happen in some fixtures, which can cause flakyness. It's time-dependent after all. + fn ignore_racyclean(mut out: Outcome) -> Outcome { + out.racy_clean = 0; + out + } + + fn ignore_updated(mut out: Outcome) -> Outcome { + out.entries_to_update = 0; + out + } + + fn ignore_worktree_stats(mut out: Outcome) -> Outcome { + out.worktree_bytes = 0; + out.worktree_files_read = 0; + out + } + let worktree = fixture_path(name).join(subdir); let git_dir = worktree.join(".git"); let mut index = @@ -75,7 +97,7 @@ fn fixture_filtered_detailed( let search = gix_pathspec::Search::from_specs(to_pathspecs(pathspecs), None, std::path::Path::new("")) .expect("valid specs can be normalized"); let outcome = index_as_worktree( - &mut index, + &index, &worktree, &mut recorder, FastEq, @@ -94,16 +116,40 @@ fn fixture_filtered_detailed( .unwrap(); recorder.records.sort_unstable_by_key(|r| r.relative_path); assert_eq!(records_to_tuple(recorder.records), expected_status); - outcome + ignore_racyclean(ignore_updated(ignore_worktree_stats(outcome))) } +/// Note that we also reset certain information to assure there is no flakyness - everything regarding race-detection otherwise can cause failures. fn records_to_tuple<'index>(records: impl IntoIterator>) -> Vec> { records .into_iter() - .map(|r| (r.relative_path, r.entry_index, r.change, r.conflict)) + .filter_map(|r| deracify_status(r.status).map(|status| (r.relative_path, r.entry_index, status))) .collect() } +fn deracify_status(status: EntryStatus) -> Option { + Some(match status { + EntryStatus::Conflict(c) => EntryStatus::Conflict(c), + EntryStatus::Change(c) => match c { + Change::Removed => Change::Removed, + Change::Type => Change::Type, + Change::Modification { + executable_bit_changed, + content_change, + set_entry_stat_size_zero: _, + } => Change::Modification { + executable_bit_changed, + content_change, + set_entry_stat_size_zero: false, + }, + Change::SubmoduleModification(c) => Change::SubmoduleModification(c), + } + .into(), + EntryStatus::NeedsUpdate(_) => return None, + EntryStatus::IntentToAdd => EntryStatus::IntentToAdd, + }) +} + #[derive(Clone)] struct SubmoduleStatusMock { dirty: bool, @@ -125,15 +171,19 @@ fn to_pathspecs(input: &[&str]) -> Vec { .collect() } +fn status_removed() -> EntryStatus { + Change::Removed.into() +} + #[test] fn removed() { let out = fixture( "status_removed", &[ - (BStr::new(b"dir/content"), 0, Some(Change::Removed), NO_CONFLICT), - (BStr::new(b"dir/sub-dir/symlink"), 1, Some(Change::Removed), NO_CONFLICT), - (BStr::new(b"empty"), 2, Some(Change::Removed), NO_CONFLICT), - (BStr::new(b"executable"), 3, Some(Change::Removed), NO_CONFLICT), + (BStr::new(b"dir/content"), 0, status_removed()), + (BStr::new(b"dir/sub-dir/symlink"), 1, status_removed()), + (BStr::new(b"empty"), 2, status_removed()), + (BStr::new(b"executable"), 3, status_removed()), ], ); assert_eq!( @@ -150,8 +200,8 @@ fn removed() { "status_removed", &["dir"], &[ - (BStr::new(b"dir/content"), 0, Some(Change::Removed), NO_CONFLICT), - (BStr::new(b"dir/sub-dir/symlink"), 1, Some(Change::Removed), NO_CONFLICT), + (BStr::new(b"dir/content"), 0, status_removed()), + (BStr::new(b"dir/sub-dir/symlink"), 1, status_removed()), ], ); assert_eq!( @@ -169,14 +219,11 @@ fn removed() { #[test] fn subomdule_nochange() { assert_eq!( - ignore_racyclean(submodule_fixture("no-change", &[])), + submodule_fixture("no-change", &[]), Outcome { entries_to_process: 2, entries_processed: 2, - entries_updated: 1, symlink_metadata_calls: 2, - worktree_bytes: 46, - worktree_files_read: 1, ..Default::default() } ); @@ -185,17 +232,11 @@ fn subomdule_nochange() { #[test] fn subomdule_deleted_dir() { assert_eq!( - ignore_racyclean(submodule_fixture( - "deleted-dir", - &[(BStr::new(b"m1"), 1, Some(Change::Removed), NO_CONFLICT)] - )), + submodule_fixture("deleted-dir", &[(BStr::new(b"m1"), 1, status_removed())]), Outcome { entries_to_process: 2, entries_processed: 2, - entries_updated: 1, symlink_metadata_calls: 2, - worktree_files_read: 1, - worktree_bytes: 46, ..Default::default() } ); @@ -204,17 +245,11 @@ fn subomdule_deleted_dir() { #[test] fn subomdule_typechange() { assert_eq!( - ignore_racyclean(submodule_fixture( - "type-change", - &[(BStr::new(b"m1"), 1, Some(Change::Type), NO_CONFLICT)] - )), + submodule_fixture("type-change", &[(BStr::new(b"m1"), 1, Change::Type.into())]), Outcome { entries_to_process: 2, entries_processed: 2, - entries_updated: 1, symlink_metadata_calls: 2, - worktree_files_read: 1, - worktree_bytes: 46, ..Default::default() } ) @@ -223,14 +258,11 @@ fn subomdule_typechange() { #[test] fn subomdule_empty_dir_no_change() { assert_eq!( - ignore_racyclean(submodule_fixture("empty-dir-no-change", &[])), + submodule_fixture("empty-dir-no-change", &[]), Outcome { entries_to_process: 2, entries_processed: 2, - entries_updated: 1, symlink_metadata_calls: 2, - worktree_files_read: 1, - worktree_bytes: 46, ..Default::default() } ); @@ -239,23 +271,15 @@ fn subomdule_empty_dir_no_change() { #[test] fn subomdule_empty_dir_no_change_is_passed_to_submodule_handler() { assert_eq!( - ignore_racyclean(submodule_fixture_status( + submodule_fixture_status( "empty-dir-no-change", - &[( - BStr::new(b"m1"), - 1, - Some(Change::SubmoduleModification(())), - NO_CONFLICT - )], + &[(BStr::new(b"m1"), 1, Change::SubmoduleModification(()).into())], true, - )), + ), Outcome { entries_to_process: 2, entries_processed: 2, - entries_updated: 1, symlink_metadata_calls: 2, - worktree_files_read: 1, - worktree_bytes: 46, ..Default::default() } ); @@ -266,7 +290,7 @@ fn intent_to_add() { assert_eq!( fixture( "status_intent_to_add", - &[(BStr::new(b"content"), 0, Some(Change::IntentToAdd), NO_CONFLICT)], + &[(BStr::new(b"content"), 0, EntryStatus::IntentToAdd)], ), Outcome { entries_to_process: 1, @@ -282,38 +306,93 @@ fn conflict() { assert_eq!( fixture( "status_conflict", - &[( - BStr::new(b"content"), - 0, - Some(Change::Modification { - executable_bit_changed: false, - content_change: Some(()), - }), - true, - )], + &[(BStr::new(b"content"), 0, EntryStatus::Conflict(Conflict::BothModified))], + ), + Outcome { + entries_to_process: 3, + entries_processed: 1, + ..Default::default() + }, + "2 entries were just related to the conflict, which we don't count as processed then" + ); +} + +#[test] +fn conflict_both_deleted_and_added_by_them_and_added_by_us() { + use Conflict::*; + assert_eq!( + conflict_fixture( + "both-deleted", + &[ + (BStr::new(b"added-by-them"), 0, EntryStatus::Conflict(AddedByThem)), + (BStr::new(b"added-by-us"), 1, EntryStatus::Conflict(AddedByUs)), + (BStr::new(b"file"), 2, EntryStatus::Conflict(BothDeleted)), + ], ), Outcome { entries_to_process: 3, entries_processed: 3, - symlink_metadata_calls: 1, - worktree_files_read: 1, - worktree_bytes: 51, ..Default::default() - } + }, ); } +#[test] +fn conflict_both_added_and_deleted_by_them() { + use Conflict::*; + assert_eq!( + conflict_fixture( + "both-added", + &[ + (BStr::new(b"both-added"), 0, EntryStatus::Conflict(BothAdded)), + (BStr::new(b"deleted-by-them"), 2, EntryStatus::Conflict(DeletedByThem)), + ], + ), + Outcome { + entries_to_process: 4, + entries_processed: 2, + ..Default::default() + }, + ); +} + +#[test] +fn conflict_detailed_single() { + use Conflict::*; + for (name, expected, entry_index, entries_to_process, entries_processed) in [ + ("deleted-by-them", DeletedByThem, 0, 2, 1), + ("deleted-by-us", DeletedByUs, 0, 2, 1), + ("both-modified", BothModified, 0, 3, 1), + ] { + assert_eq!( + conflict_fixture( + name, + &[(BStr::new(b"file"), entry_index, EntryStatus::Conflict(expected))], + ), + Outcome { + entries_to_process, + entries_processed, + ..Default::default() + }, + "{name}" + ); + } +} + #[test] fn submodule_conflict() { assert_eq!( - submodule_fixture("conflict", &[(BStr::new(b"m1"), 1, None, true)]), + submodule_fixture( + "conflict", + &[(BStr::new(b"m1"), 1, EntryStatus::Conflict(Conflict::DeletedByUs))] + ), Outcome { entries_to_process: 3, - entries_processed: 3, - symlink_metadata_calls: 2, + entries_processed: 2, + symlink_metadata_calls: 1, ..Default::default() }, - "submodule status is still called for OUR side of an entry, but never for THEIRS" + "1 metadata call for .gitmodules, conflicting entries are not queried for status anymore." ); } @@ -328,9 +407,6 @@ fn refresh() { entries_to_process: 5, entries_processed: 5, symlink_metadata_calls: 5, - entries_updated: if cfg!(windows) { 2 } else { 1 }, - worktree_files_read: 4, - worktree_bytes: if cfg!(windows) { 41 } else { 38 }, ..Default::default() }; assert_eq!( @@ -342,30 +418,33 @@ fn refresh() { ( BStr::new(b"dir/content"), 0, - Some(Change::Modification { + Change::Modification { executable_bit_changed: true, content_change: None, - }), - NO_CONFLICT, + set_entry_stat_size_zero: false + } + .into(), ), ( BStr::new(b"dir/content2"), 1, - Some(Change::Modification { + Change::Modification { executable_bit_changed: false, content_change: Some(()), - }), - NO_CONFLICT, + set_entry_stat_size_zero: false + } + .into(), ), - (BStr::new(b"empty"), 3, Some(Change::Type), NO_CONFLICT), + (BStr::new(b"empty"), 3, Change::Type.into()), ( BStr::new(b"executable"), 4, - Some(Change::Modification { + Change::Modification { executable_bit_changed: true, content_change: Some(()), - }), - NO_CONFLICT, + set_entry_stat_size_zero: false + } + .into(), ), ], #[cfg(windows)] @@ -373,29 +452,32 @@ fn refresh() { ( BStr::new("dir/content2"), 1, - Some(Change::Modification { + Change::Modification { executable_bit_changed: false, - content_change: Some(()) - }), - NO_CONFLICT + content_change: Some(()), + set_entry_stat_size_zero: false + } + .into(), ), ( BStr::new("empty"), 3, - Some(Change::Modification { + Change::Modification { executable_bit_changed: false, - content_change: Some(()) - }), - NO_CONFLICT + content_change: Some(()), + set_entry_stat_size_zero: false + } + .into(), ), ( BStr::new("executable"), 4, - Some(Change::Modification { + Change::Modification { executable_bit_changed: false, - content_change: Some(()) - }), - NO_CONFLICT + content_change: Some(()), + set_entry_stat_size_zero: false + } + .into(), ) ], ), @@ -418,30 +500,33 @@ fn modified() { ( BStr::new(b"dir/content"), 0, - Some(Change::Modification { + Change::Modification { executable_bit_changed: true, content_change: None, - }), - NO_CONFLICT, + set_entry_stat_size_zero: false, + } + .into(), ), ( BStr::new(b"dir/content2"), 1, - Some(Change::Modification { + Change::Modification { executable_bit_changed: false, content_change: Some(()), - }), - NO_CONFLICT, + set_entry_stat_size_zero: false, + } + .into(), ), - (BStr::new(b"empty"), 3, Some(Change::Type), NO_CONFLICT), + (BStr::new(b"empty"), 3, Change::Type.into()), ( BStr::new(b"executable"), 4, - Some(Change::Modification { + Change::Modification { executable_bit_changed: true, content_change: Some(()), - }), - NO_CONFLICT, + set_entry_stat_size_zero: false, + } + .into(), ), ], #[cfg(windows)] @@ -449,40 +534,38 @@ fn modified() { ( BStr::new("dir/content2"), 1, - Some(Change::Modification { + Change::Modification { executable_bit_changed: false, content_change: Some(()), - }), - NO_CONFLICT, + set_entry_stat_size_zero: false, + } + .into(), ), ( BStr::new("empty"), 3, - Some(Change::Modification { + Change::Modification { executable_bit_changed: false, content_change: Some(()), - }), - NO_CONFLICT, + set_entry_stat_size_zero: false, + } + .into(), ), ( BStr::new("executable"), 4, - Some(Change::Modification { + Change::Modification { executable_bit_changed: false, content_change: Some(()), - }), - NO_CONFLICT, + set_entry_stat_size_zero: false, + } + .into(), ), ], ); - assert_eq!( - ignore_worktree_stats(ignore_updated(ignore_racyclean(actual_outcome))), - expected_outcome, - ); + assert_eq!(actual_outcome, expected_outcome,); } -const NO_CONFLICT: bool = false; - #[test] fn racy_git() { let timestamp = 940040400; @@ -527,7 +610,7 @@ fn racy_git() { let count = Arc::new(AtomicUsize::new(0)); let counter = CountCalls(count.clone(), FastEq); let out = index_as_worktree( - &mut index, + &index, worktree, &mut recorder, counter.clone(), @@ -566,7 +649,7 @@ fn racy_git() { index.set_timestamp(FileTime::from_unix_time(timestamp as i64, 0)); let mut recorder = Recorder::default(); let out = index_as_worktree( - &mut index, + &index, worktree, &mut recorder, counter, @@ -605,11 +688,12 @@ fn racy_git() { &[( BStr::new(b"content"), 0, - Some(Change::Modification { + Change::Modification { executable_bit_changed: false, content_change: Some(()), - }), - false + set_entry_stat_size_zero: false + } + .into(), )], "racy change is correctly detected" ); @@ -639,20 +723,3 @@ impl gix_status::Pathspec for Pathspec { .map_or(false, |m| !m.is_excluded()) } } - -// This can easily happen in some fixtures, which can cause flakyness. It's time-dependent after all. -fn ignore_racyclean(mut out: Outcome) -> Outcome { - out.racy_clean = 0; - out -} - -fn ignore_updated(mut out: Outcome) -> Outcome { - out.entries_updated = 0; - out -} - -fn ignore_worktree_stats(mut out: Outcome) -> Outcome { - out.worktree_bytes = 0; - out.worktree_files_read = 0; - out -} From 54fb7c24a97cb2339a67ad269344ce65166a545d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 27 Sep 2023 11:34:07 +0200 Subject: [PATCH 17/23] adapt to changes in `gix-status` --- crate-status.md | 4 +- gitoxide-core/src/repository/index/entries.rs | 7 +- gitoxide-core/src/repository/status.rs | 105 ++++++++++++------ gix/src/filter.rs | 4 +- 4 files changed, 83 insertions(+), 37 deletions(-) diff --git a/crate-status.md b/crate-status.md index 3991742e00d..e6f70ea0391 100644 --- a/crate-status.md +++ b/crate-status.md @@ -456,7 +456,9 @@ Make it the best-performing implementation and the most convenient one. ### gix-status * [x] differences between index and worktree to turn index into worktree -* [ ] differences between tree and index to turn tree into index + - [ ] rename tracking +* [ ] differences between index and index to learn what changed + - [ ] rename tracking * [ ] untracked files * [ ] fast answer to 'is it dirty'. * diff --git a/gitoxide-core/src/repository/index/entries.rs b/gitoxide-core/src/repository/index/entries.rs index 354ad77ebff..7f68b8f58b2 100644 --- a/gitoxide-core/src/repository/index/entries.rs +++ b/gitoxide-core/src/repository/index/entries.rs @@ -404,9 +404,10 @@ pub(crate) mod function { out, "{} {}{:?} {} {}{}{}", match entry.flags.stage() { - 0 => "BASE ", - 1 => "OURS ", - 2 => "THEIRS ", + 0 => " ", + 1 => "BASE ", + 2 => "OURS ", + 3 => "THEIRS ", _ => "UNKNOWN", }, if entry.flags.is_empty() { diff --git a/gitoxide-core/src/repository/status.rs b/gitoxide-core/src/repository/status.rs index 05849d8185f..e2815eb28af 100644 --- a/gitoxide-core/src/repository/status.rs +++ b/gitoxide-core/src/repository/status.rs @@ -5,7 +5,7 @@ use gix::index::Entry; use gix::prelude::FindExt; use gix::Progress; use gix_status::index_as_worktree::traits::FastEq; -use gix_status::index_as_worktree::Change; +use gix_status::index_as_worktree::{Change, Conflict, EntryStatus}; pub enum Submodules { /// display all information about submodules, including ref changes, modifications and untracked files. @@ -48,23 +48,42 @@ pub fn show( )?; let mut progress = progress.add_child("traverse index"); let start = std::time::Instant::now(); + let options = gix_status::index_as_worktree::Options { + fs: repo.filesystem_options()?, + thread_limit, + stat: repo.stat_options()?, + attributes: match repo + .attributes_only( + index, + gix::worktree::stack::state::attributes::Source::WorktreeThenIdMapping, + )? + .detach() + .state_mut() + { + gix::worktree::stack::State::AttributesStack(attrs) => std::mem::take(attrs), + // TODO: this should be nicer by creating attributes directly, but it's a private API + _ => unreachable!("state must be attributes stack only"), + }, + }; gix_status::index_as_worktree( index, repo.work_dir() .context("This operation cannot be run on a bare repository")?, &mut Printer(out), FastEq, + Submodule, { let odb = repo.objects.clone().into_arc()?; move |id, buf| odb.find_blob(id, buf) }, &mut progress, pathspec.detach()?, - gix_status::index_as_worktree::Options { - fs: repo.filesystem_options()?, - thread_limit, - stat: repo.stat_options()?, - }, + repo.filter_pipeline(Some(gix::hash::ObjectId::empty_tree(repo.object_hash())))? + .0 + .into_parts() + .0, + &gix::interrupt::IS_INTERRUPTED, + options, )?; writeln!(err, "\nhead -> index and untracked files aren't implemented yet")?; @@ -72,6 +91,18 @@ pub fn show( Ok(()) } +#[derive(Clone)] +struct Submodule; + +impl gix_status::index_as_worktree::traits::SubmoduleStatus for Submodule { + type Output = (); + type Error = std::convert::Infallible; + + fn status(&mut self, _entry: &Entry, _rela_path: &BStr) -> Result, Self::Error> { + Ok(None) + } +} + struct Printer(W); impl<'index, W> gix_status::index_as_worktree::VisitEntry<'index> for Printer @@ -79,42 +110,54 @@ where W: std::io::Write, { type ContentChange = (); + type SubmoduleStatus = (); fn visit_entry( &mut self, - entry: &'index Entry, + _entries: &'index [Entry], + _entry: &'index Entry, + _entry_index: usize, rela_path: &'index BStr, - change: Option>, - conflict: bool, + status: EntryStatus, ) { - self.visit_inner(entry, rela_path, change, conflict).ok(); + self.visit_inner(rela_path, status).ok(); } } impl Printer { - fn visit_inner( - &mut self, - _entry: &Entry, - rela_path: &BStr, - change: Option>, - conflict: bool, - ) -> anyhow::Result<()> { - if let Some(change) = conflict - .then_some('U') - .or_else(|| change.as_ref().and_then(change_to_char)) - { - writeln!(&mut self.0, "{change} {rela_path}")?; - } - Ok(()) + fn visit_inner(&mut self, rela_path: &BStr, status: EntryStatus<()>) -> std::io::Result<()> { + let char_storage; + let status = match status { + EntryStatus::Conflict(conflict) => as_str(conflict), + EntryStatus::Change(change) => { + char_storage = change_to_char(&change); + std::str::from_utf8(std::slice::from_ref(&char_storage)).expect("valid ASCII") + } + EntryStatus::NeedsUpdate(_) => return Ok(()), + EntryStatus::IntentToAdd => "A", + }; + + writeln!(&mut self.0, "{status: >3} {rela_path}") } } -fn change_to_char(change: &Change<()>) -> Option { +fn as_str(c: Conflict) -> &'static str { + match c { + Conflict::BothDeleted => "DD", + Conflict::AddedByUs => "AU", + Conflict::DeletedByThem => "UD", + Conflict::AddedByThem => "UA", + Conflict::DeletedByUs => "DU", + Conflict::BothAdded => "AA", + Conflict::BothModified => "UU", + } +} + +fn change_to_char(change: &Change<()>) -> u8 { // Known status letters: https://github.com/git/git/blob/6807fcfedab84bc8cd0fbf721bc13c4e68cda9ae/diff.h#L613 - Some(match change { - Change::Removed => 'D', - Change::Type => 'T', - Change::Modification { .. } => 'M', - Change::IntentToAdd => return None, - }) + match change { + Change::Removed => b'D', + Change::Type => b'T', + Change::SubmoduleModification(_) | Change::Modification { .. } => b'M', + } } diff --git a/gix/src/filter.rs b/gix/src/filter.rs index 935c91108ae..b106840a79a 100644 --- a/gix/src/filter.rs +++ b/gix/src/filter.rs @@ -150,8 +150,8 @@ impl<'repo> Pipeline<'repo> { &mut |_, attrs| { entry.matching_attributes(attrs); }, - &mut |rela_path, buf| -> Result<_, gix_odb::find::Error> { - let entry = match index.entry_by_path(rela_path) { + &mut |buf| -> Result<_, gix_odb::find::Error> { + let entry = match index.entry_by_path(gix_path::into_bstr(rela_path).as_ref()) { None => return Ok(None), Some(entry) => entry, }; From 7d9ecdd1c230204468a965f703d5efd00fa7fb79 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 27 Sep 2023 20:12:45 +0200 Subject: [PATCH 18/23] feat: add `Repository::index_or_empty()`. This is useful if a missing index should mean it's empty. --- gix/src/repository/index.rs | 10 ++++++++++ gix/tests/repository/worktree.rs | 3 +++ 2 files changed, 13 insertions(+) diff --git a/gix/src/repository/index.rs b/gix/src/repository/index.rs index a21b138a569..59666fc5f10 100644 --- a/gix/src/repository/index.rs +++ b/gix/src/repository/index.rs @@ -55,6 +55,16 @@ impl crate::Repository { }) } + /// Return the shared worktree index if present, or return a new empty one which has an association to the place where the index would be. + pub fn index_or_empty(&self) -> Result { + Ok(self.try_index()?.unwrap_or_else(|| { + worktree::Index::new(gix_fs::FileSnapshot::new(gix_index::File::from_state( + gix_index::State::new(self.object_hash()), + self.index_path(), + ))) + })) + } + /// Return a shared worktree index which is updated automatically if the in-memory snapshot has become stale as the underlying file /// on disk has changed, or `None` if no such file exists. /// diff --git a/gix/tests/repository/worktree.rs b/gix/tests/repository/worktree.rs index ddeba514274..a3cc89b250f 100644 --- a/gix/tests/repository/worktree.rs +++ b/gix/tests/repository/worktree.rs @@ -111,6 +111,7 @@ mod with_core_worktree_config { } #[test] + #[cfg(feature = "index")] fn bare_relative() -> crate::Result { let repo = repo("bare-relative-worktree"); @@ -123,6 +124,8 @@ mod with_core_worktree_config { repo.work_dir().is_none(), "we simply don't load core.worktree in bare repos either to match this behaviour" ); + assert!(repo.try_index()?.is_none()); + assert!(repo.index_or_empty()?.entries().is_empty()); Ok(()) } From 7ba2fa1c7781230913b0a04ad8684fa7d0143c18 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 27 Sep 2023 17:29:03 +0200 Subject: [PATCH 19/23] feat: `gix status -s/--statistics` to obtain additional information on what happened. This is useful for understanding performance characteristics in detail. --- gitoxide-core/src/repository/status.rs | 21 ++++++++++++++++++--- src/plumbing/main.rs | 7 ++++++- src/plumbing/options/mod.rs | 3 +++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/gitoxide-core/src/repository/status.rs b/gitoxide-core/src/repository/status.rs index e2815eb28af..b87caba6401 100644 --- a/gitoxide-core/src/repository/status.rs +++ b/gitoxide-core/src/repository/status.rs @@ -20,6 +20,7 @@ pub struct Options { pub format: OutputFormat, pub submodules: Submodules, pub thread_limit: Option, + pub statistics: bool, } pub fn show( @@ -33,12 +34,13 @@ pub fn show( // TODO: implement this submodules: _, thread_limit, + statistics, }: Options, ) -> anyhow::Result<()> { if format != OutputFormat::Human { bail!("Only human format is supported right now"); } - let mut index = repo.index()?; + let mut index = repo.index_or_empty()?; let index = gix::threading::make_mut(&mut index); let pathspec = repo.pathspec( pathspecs, @@ -65,7 +67,7 @@ pub fn show( _ => unreachable!("state must be attributes stack only"), }, }; - gix_status::index_as_worktree( + let outcome = gix_status::index_as_worktree( index, repo.work_dir() .context("This operation cannot be run on a bare repository")?, @@ -86,6 +88,10 @@ pub fn show( options, )?; + if statistics { + writeln!(err, "{outcome:#?}").ok(); + } + writeln!(err, "\nhead -> index and untracked files aren't implemented yet")?; progress.show_throughput(start); Ok(()) @@ -158,6 +164,15 @@ fn change_to_char(change: &Change<()>) -> u8 { match change { Change::Removed => b'D', Change::Type => b'T', - Change::SubmoduleModification(_) | Change::Modification { .. } => b'M', + Change::SubmoduleModification(_) => b'M', + Change::Modification { + executable_bit_changed, .. + } => { + if *executable_bit_changed { + b'X' + } else { + b'M' + } + } } } diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 04a4fa36bcd..10d0d289d35 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -133,7 +133,11 @@ pub fn main() -> Result<()> { })?; match cmd { - Subcommands::Status(crate::plumbing::options::status::Platform { submodules, pathspec }) => prepare_and_run( + Subcommands::Status(crate::plumbing::options::status::Platform { + statistics, + submodules, + pathspec, + }) => prepare_and_run( "status", trace, auto_verbose, @@ -150,6 +154,7 @@ pub fn main() -> Result<()> { progress, core::repository::status::Options { format, + statistics, thread_limit: thread_limit.or(cfg!(target_os = "macos").then_some(3)), // TODO: make this a configurable when in `gix`, this seems to be optimal on MacOS, linux scales though! submodules: match submodules { Submodules::All => core::repository::status::Submodules::All, diff --git a/src/plumbing/options/mod.rs b/src/plumbing/options/mod.rs index 300f1810e61..8543642ed16 100644 --- a/src/plumbing/options/mod.rs +++ b/src/plumbing/options/mod.rs @@ -205,6 +205,9 @@ pub mod status { /// Define how to display submodule status. #[clap(long, default_value = "all")] pub submodules: Submodules, + /// Print additional statistics to help understanding performance. + #[clap(long, short = 's')] + pub statistics: bool, /// The git path specifications to list attributes for, or unset to read from stdin one per line. #[clap(value_parser = CheckPathSpec)] pub pathspec: Vec, From b5b50f8fdb781b7c9a2ff5c8b462940e5098a94d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 28 Sep 2023 14:22:27 +0200 Subject: [PATCH 20/23] fix: hanging `Read` implementation of `pipeline::convert::ToGitOutcome`. This codepath was never tested and its function more subtle than one could have known. Also fix incorrect configuration handling which could lead to binary files with `text=auto` to be identified as text, which would then require conversion. --- gix-filter/src/pipeline/convert.rs | 2 +- gix-filter/src/pipeline/util.rs | 2 +- gix-filter/tests/pipeline/convert_to_git.rs | 5 ++++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/gix-filter/src/pipeline/convert.rs b/gix-filter/src/pipeline/convert.rs index c98bac3e80b..6a777be1468 100644 --- a/gix-filter/src/pipeline/convert.rs +++ b/gix-filter/src/pipeline/convert.rs @@ -315,7 +315,7 @@ where match self { ToGitOutcome::Unchanged(r) => r.read(buf), ToGitOutcome::Process(r) => r.read(buf), - ToGitOutcome::Buffer(mut r) => r.read(buf), + ToGitOutcome::Buffer(r) => r.read(buf), } } } diff --git a/gix-filter/src/pipeline/util.rs b/gix-filter/src/pipeline/util.rs index a92ac25efcc..ab10db08ec9 100644 --- a/gix-filter/src/pipeline/util.rs +++ b/gix-filter/src/pipeline/util.rs @@ -170,7 +170,7 @@ impl<'driver> Configuration<'driver> { let attr_digest = digest; digest = match digest { None => Some(config.auto_crlf.into()), - Some(AttributesDigest::TextAuto) => Some(config.to_eol().into()), + Some(AttributesDigest::Text) => Some(config.to_eol().into()), _ => digest, }; diff --git a/gix-filter/tests/pipeline/convert_to_git.rs b/gix-filter/tests/pipeline/convert_to_git.rs index 1bc5f64c0bd..cfa747e2bdd 100644 --- a/gix-filter/tests/pipeline/convert_to_git.rs +++ b/gix-filter/tests/pipeline/convert_to_git.rs @@ -19,7 +19,7 @@ fn no_driver_but_filter_with_autocrlf() -> gix_testtools::Result { ) })?; - let out = pipe.convert_to_git( + let mut out = pipe.convert_to_git( "hi\r\n".as_bytes(), Path::new("any.txt"), &mut |_path, _attrs| {}, @@ -31,6 +31,9 @@ fn no_driver_but_filter_with_autocrlf() -> gix_testtools::Result { "hi\n", "the read is read into memory if there is no driver" ); + let mut buf = Vec::new(); + out.read_to_end(&mut buf)?; + assert_eq!(buf.as_bstr(), "hi\n", "we can consume the output"); Ok(()) } From 46e591914d548bacae2656ffe14a0ea7ca2eb7ae Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 28 Sep 2023 08:02:14 +0200 Subject: [PATCH 21/23] feat: `gix status` auto-writes changed indices. This prevents expensive operations to re-occour. --- gitoxide-core/src/repository/status.rs | 61 +++++++++++++++++++++++--- src/plumbing/main.rs | 2 + src/plumbing/options/mod.rs | 3 ++ 3 files changed, 59 insertions(+), 7 deletions(-) diff --git a/gitoxide-core/src/repository/status.rs b/gitoxide-core/src/repository/status.rs index b87caba6401..e7cbea20e54 100644 --- a/gitoxide-core/src/repository/status.rs +++ b/gitoxide-core/src/repository/status.rs @@ -21,6 +21,7 @@ pub struct Options { pub submodules: Submodules, pub thread_limit: Option, pub statistics: bool, + pub allow_write: bool, } pub fn show( @@ -34,6 +35,7 @@ pub fn show( // TODO: implement this submodules: _, thread_limit, + allow_write, statistics, }: Options, ) -> anyhow::Result<()> { @@ -67,11 +69,15 @@ pub fn show( _ => unreachable!("state must be attributes stack only"), }, }; + let mut printer = Printer { + out, + changes: Vec::new(), + }; let outcome = gix_status::index_as_worktree( index, repo.work_dir() .context("This operation cannot be run on a bare repository")?, - &mut Printer(out), + &mut printer, FastEq, Submodule, { @@ -88,6 +94,27 @@ pub fn show( options, )?; + if outcome.entries_to_update != 0 && allow_write { + { + let entries = index.entries_mut(); + for (entry_index, change) in printer.changes { + let entry = &mut entries[entry_index]; + match change { + ApplyChange::SetSizeToZero => { + entry.stat.size = 0; + } + ApplyChange::NewStat(new_stat) => { + entry.stat = new_stat; + } + } + } + } + index.write(gix::index::write::Options { + extensions: Default::default(), + skip_hash: false, // TODO: make this based on configuration + })?; + } + if statistics { writeln!(err, "{outcome:#?}").ok(); } @@ -109,7 +136,15 @@ impl gix_status::index_as_worktree::traits::SubmoduleStatus for Submodule { } } -struct Printer(W); +struct Printer { + out: W, + changes: Vec<(usize, ApplyChange)>, +} + +enum ApplyChange { + SetSizeToZero, + NewStat(gix::index::entry::Stat), +} impl<'index, W> gix_status::index_as_worktree::VisitEntry<'index> for Printer where @@ -122,28 +157,40 @@ where &mut self, _entries: &'index [Entry], _entry: &'index Entry, - _entry_index: usize, + entry_index: usize, rela_path: &'index BStr, status: EntryStatus, ) { - self.visit_inner(rela_path, status).ok(); + self.visit_inner(entry_index, rela_path, status).ok(); } } impl Printer { - fn visit_inner(&mut self, rela_path: &BStr, status: EntryStatus<()>) -> std::io::Result<()> { + fn visit_inner(&mut self, entry_index: usize, rela_path: &BStr, status: EntryStatus<()>) -> std::io::Result<()> { let char_storage; let status = match status { EntryStatus::Conflict(conflict) => as_str(conflict), EntryStatus::Change(change) => { + if matches!( + change, + Change::Modification { + set_entry_stat_size_zero: true, + .. + } + ) { + self.changes.push((entry_index, ApplyChange::SetSizeToZero)) + } char_storage = change_to_char(&change); std::str::from_utf8(std::slice::from_ref(&char_storage)).expect("valid ASCII") } - EntryStatus::NeedsUpdate(_) => return Ok(()), + EntryStatus::NeedsUpdate(stat) => { + self.changes.push((entry_index, ApplyChange::NewStat(stat))); + return Ok(()); + } EntryStatus::IntentToAdd => "A", }; - writeln!(&mut self.0, "{status: >3} {rela_path}") + writeln!(&mut self.out, "{status: >3} {rela_path}") } } diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 10d0d289d35..2d04a37e17b 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -136,6 +136,7 @@ pub fn main() -> Result<()> { Subcommands::Status(crate::plumbing::options::status::Platform { statistics, submodules, + no_write, pathspec, }) => prepare_and_run( "status", @@ -156,6 +157,7 @@ pub fn main() -> Result<()> { format, statistics, thread_limit: thread_limit.or(cfg!(target_os = "macos").then_some(3)), // TODO: make this a configurable when in `gix`, this seems to be optimal on MacOS, linux scales though! + allow_write: !no_write, submodules: match submodules { Submodules::All => core::repository::status::Submodules::All, Submodules::RefChange => core::repository::status::Submodules::RefChange, diff --git a/src/plumbing/options/mod.rs b/src/plumbing/options/mod.rs index 8543642ed16..64c67e43e5a 100644 --- a/src/plumbing/options/mod.rs +++ b/src/plumbing/options/mod.rs @@ -208,6 +208,9 @@ pub mod status { /// Print additional statistics to help understanding performance. #[clap(long, short = 's')] pub statistics: bool, + /// Don't write back a changed index, which forces this operation to always be idempotent. + #[clap(long)] + pub no_write: bool, /// The git path specifications to list attributes for, or unset to read from stdin one per line. #[clap(value_parser = CheckPathSpec)] pub pathspec: Vec, From f929d420cb768f2df1d7886564ca03b3c3254a82 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 29 Sep 2023 09:55:22 +0200 Subject: [PATCH 22/23] trust Ctime again It seems to work now, but let's keep an eye on it. --- gix/src/config/cache/access.rs | 8 +------- gix/src/config/tree/sections/core.rs | 3 +-- src/plumbing/main.rs | 2 +- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/gix/src/config/cache/access.rs b/gix/src/config/cache/access.rs index 352bc97124b..5f4fbe72f11 100644 --- a/gix/src/config/cache/access.rs +++ b/gix/src/config/cache/access.rs @@ -158,13 +158,7 @@ impl Cache { pub(crate) fn stat_options(&self) -> Result { use crate::config::tree::gitoxide; Ok(gix_index::entry::stat::Options { - trust_ctime: boolean( - self, - "core.trustCTime", - &Core::TRUST_C_TIME, - // For now, on MacOS it's known to not be trust-worthy at least with the Rust STDlib, being 2s off - !cfg!(target_os = "macos"), - )?, + trust_ctime: boolean(self, "core.trustCTime", &Core::TRUST_C_TIME, true)?, use_nsec: boolean(self, "gitoxide.core.useNsec", &gitoxide::Core::USE_NSEC, false)?, use_stdev: boolean(self, "gitoxide.core.useStdev", &gitoxide::Core::USE_STDEV, false)?, check_stat: self diff --git a/gix/src/config/tree/sections/core.rs b/gix/src/config/tree/sections/core.rs index ab3e2bab93f..2ec5c279ea3 100644 --- a/gix/src/config/tree/sections/core.rs +++ b/gix/src/config/tree/sections/core.rs @@ -45,8 +45,7 @@ impl Core { /// The `core.symlinks` key. pub const SYMLINKS: keys::Boolean = keys::Boolean::new_boolean("symlinks", &config::Tree::CORE); /// The `core.trustCTime` key. - pub const TRUST_C_TIME: keys::Boolean = keys::Boolean::new_boolean("trustCTime", &config::Tree::CORE) - .with_deviation("Currently the default is false, instead of true, as it seems to be 2s off in tests"); + pub const TRUST_C_TIME: keys::Boolean = keys::Boolean::new_boolean("trustCTime", &config::Tree::CORE); /// The `core.worktree` key. pub const WORKTREE: keys::Any = keys::Any::new("worktree", &config::Tree::CORE) .with_environment_override("GIT_WORK_TREE") diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 2d04a37e17b..a7c8d8a69bc 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -156,7 +156,7 @@ pub fn main() -> Result<()> { core::repository::status::Options { format, statistics, - thread_limit: thread_limit.or(cfg!(target_os = "macos").then_some(3)), // TODO: make this a configurable when in `gix`, this seems to be optimal on MacOS, linux scales though! + thread_limit: thread_limit.or(cfg!(target_os = "macos").then_some(3)), // TODO: make this a configurable when in `gix`, this seems to be optimal on MacOS, linux scales though! MacOS also scales if reading a lot of files for refresh index allow_write: !no_write, submodules: match submodules { Submodules::All => core::repository::status::Submodules::All, From 67a0220dc4c4ff6f79a24771fb175baec22bc6b0 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 5 Oct 2023 13:37:25 +0200 Subject: [PATCH 23/23] fix gix-command tests on windows It seems windows now has a windows-unspecific `echo` program and one can't really rely on it producing windows style newlines. Now we use printf which is more standard and can be used to validate multiple arguments as well. --- gix-command/tests/command.rs | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/gix-command/tests/command.rs b/gix-command/tests/command.rs index e2faa7fc929..606066da131 100644 --- a/gix-command/tests/command.rs +++ b/gix-command/tests/command.rs @@ -82,32 +82,26 @@ mod spawn { #[test] fn sh_shell_specific_script_code_with_single_extra_arg() -> crate::Result { - let out = gix_command::prepare("echo") + let out = gix_command::prepare("printf") .with_shell() .arg("1") .spawn()? .wait_with_output()?; assert!(out.status.success()); - #[cfg(not(windows))] - assert_eq!(out.stdout.as_bstr(), "1\n"); - #[cfg(windows)] - assert_eq!(out.stdout.as_bstr(), "1\r\n"); + assert_eq!(out.stdout.as_bstr(), "1"); Ok(()) } #[test] fn sh_shell_specific_script_code_with_multiple_extra_args() -> crate::Result { - let out = gix_command::prepare("echo") + let out = gix_command::prepare("printf") .with_shell() - .arg("1") - .arg("2") + .arg("%s") + .arg("arg") .spawn()? .wait_with_output()?; assert!(out.status.success()); - #[cfg(not(windows))] - assert_eq!(out.stdout.as_bstr(), "1 2\n"); - #[cfg(windows)] - assert_eq!(out.stdout.as_bstr(), "1 2\r\n"); + assert_eq!(out.stdout.as_bstr(), "arg"); Ok(()) } }