diff --git a/Cargo.lock b/Cargo.lock index 627d4c7b2a4..4a08226543b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2253,12 +2253,14 @@ dependencies = [ "bstr", "filetime", "gix-features 0.35.0", + "gix-filter", "gix-fs 0.7.0", "gix-hash 0.13.0", "gix-index 0.25.0", "gix-object 0.37.0", "gix-path 0.10.0", "gix-pathspec", + "gix-worktree 0.26.0", "thiserror", ] diff --git a/crate-status.md b/crate-status.md index 3991742e00d..e6f70ea0391 100644 --- a/crate-status.md +++ b/crate-status.md @@ -456,7 +456,9 @@ Make it the best-performing implementation and the most convenient one. ### gix-status * [x] differences between index and worktree to turn index into worktree -* [ ] differences between tree and index to turn tree into index + - [ ] rename tracking +* [ ] differences between index and index to learn what changed + - [ ] rename tracking * [ ] untracked files * [ ] fast answer to 'is it dirty'. * diff --git a/gitoxide-core/src/repository/index/entries.rs b/gitoxide-core/src/repository/index/entries.rs index 354ad77ebff..7f68b8f58b2 100644 --- a/gitoxide-core/src/repository/index/entries.rs +++ b/gitoxide-core/src/repository/index/entries.rs @@ -404,9 +404,10 @@ pub(crate) mod function { out, "{} {}{:?} {} {}{}{}", match entry.flags.stage() { - 0 => "BASE ", - 1 => "OURS ", - 2 => "THEIRS ", + 0 => " ", + 1 => "BASE ", + 2 => "OURS ", + 3 => "THEIRS ", _ => "UNKNOWN", }, if entry.flags.is_empty() { diff --git a/gitoxide-core/src/repository/status.rs b/gitoxide-core/src/repository/status.rs index 2b892d53190..e7cbea20e54 100644 --- a/gitoxide-core/src/repository/status.rs +++ b/gitoxide-core/src/repository/status.rs @@ -4,8 +4,8 @@ use gix::bstr::{BStr, BString}; use gix::index::Entry; use gix::prelude::FindExt; use gix::Progress; -use gix_status::index_as_worktree::content::FastEq; -use gix_status::index_as_worktree::Change; +use gix_status::index_as_worktree::traits::FastEq; +use gix_status::index_as_worktree::{Change, Conflict, EntryStatus}; pub enum Submodules { /// display all information about submodules, including ref changes, modifications and untracked files. @@ -20,6 +20,8 @@ pub struct Options { pub format: OutputFormat, pub submodules: Submodules, pub thread_limit: Option, + pub statistics: bool, + pub allow_write: bool, } pub fn show( @@ -33,12 +35,14 @@ pub fn show( // TODO: implement this submodules: _, thread_limit, + allow_write, + statistics, }: Options, ) -> anyhow::Result<()> { if format != OutputFormat::Human { bail!("Only human format is supported right now"); } - let mut index = repo.index()?; + let mut index = repo.index_or_empty()?; let index = gix::threading::make_mut(&mut index); let pathspec = repo.pathspec( pathspecs, @@ -48,73 +52,174 @@ pub fn show( )?; let mut progress = progress.add_child("traverse index"); let start = std::time::Instant::now(); - gix_status::index_as_worktree( + let options = gix_status::index_as_worktree::Options { + fs: repo.filesystem_options()?, + thread_limit, + stat: repo.stat_options()?, + attributes: match repo + .attributes_only( + index, + gix::worktree::stack::state::attributes::Source::WorktreeThenIdMapping, + )? + .detach() + .state_mut() + { + gix::worktree::stack::State::AttributesStack(attrs) => std::mem::take(attrs), + // TODO: this should be nicer by creating attributes directly, but it's a private API + _ => unreachable!("state must be attributes stack only"), + }, + }; + let mut printer = Printer { + out, + changes: Vec::new(), + }; + let outcome = gix_status::index_as_worktree( index, repo.work_dir() .context("This operation cannot be run on a bare repository")?, - &mut Printer(out), + &mut printer, FastEq, + Submodule, { let odb = repo.objects.clone().into_arc()?; move |id, buf| odb.find_blob(id, buf) }, &mut progress, pathspec.detach()?, - gix_status::index_as_worktree::Options { - fs: repo.filesystem_options()?, - thread_limit, - stat: repo.stat_options()?, - }, + repo.filter_pipeline(Some(gix::hash::ObjectId::empty_tree(repo.object_hash())))? + .0 + .into_parts() + .0, + &gix::interrupt::IS_INTERRUPTED, + options, )?; + if outcome.entries_to_update != 0 && allow_write { + { + let entries = index.entries_mut(); + for (entry_index, change) in printer.changes { + let entry = &mut entries[entry_index]; + match change { + ApplyChange::SetSizeToZero => { + entry.stat.size = 0; + } + ApplyChange::NewStat(new_stat) => { + entry.stat = new_stat; + } + } + } + } + index.write(gix::index::write::Options { + extensions: Default::default(), + skip_hash: false, // TODO: make this based on configuration + })?; + } + + if statistics { + writeln!(err, "{outcome:#?}").ok(); + } + writeln!(err, "\nhead -> index and untracked files aren't implemented yet")?; progress.show_throughput(start); Ok(()) } -struct Printer(W); +#[derive(Clone)] +struct Submodule; + +impl gix_status::index_as_worktree::traits::SubmoduleStatus for Submodule { + type Output = (); + type Error = std::convert::Infallible; + + fn status(&mut self, _entry: &Entry, _rela_path: &BStr) -> Result, Self::Error> { + Ok(None) + } +} + +struct Printer { + out: W, + changes: Vec<(usize, ApplyChange)>, +} + +enum ApplyChange { + SetSizeToZero, + NewStat(gix::index::entry::Stat), +} impl<'index, W> gix_status::index_as_worktree::VisitEntry<'index> for Printer where W: std::io::Write, { type ContentChange = (); + type SubmoduleStatus = (); fn visit_entry( &mut self, - entry: &'index Entry, + _entries: &'index [Entry], + _entry: &'index Entry, + entry_index: usize, rela_path: &'index BStr, - change: Option>, - conflict: bool, + status: EntryStatus, ) { - self.visit_inner(entry, rela_path, change, conflict).ok(); + self.visit_inner(entry_index, rela_path, status).ok(); } } impl Printer { - fn visit_inner( - &mut self, - _entry: &Entry, - rela_path: &BStr, - change: Option>, - conflict: bool, - ) -> anyhow::Result<()> { - if let Some(change) = conflict - .then_some('U') - .or_else(|| change.as_ref().and_then(change_to_char)) - { - writeln!(&mut self.0, "{change} {rela_path}")?; - } - Ok(()) + fn visit_inner(&mut self, entry_index: usize, rela_path: &BStr, status: EntryStatus<()>) -> std::io::Result<()> { + let char_storage; + let status = match status { + EntryStatus::Conflict(conflict) => as_str(conflict), + EntryStatus::Change(change) => { + if matches!( + change, + Change::Modification { + set_entry_stat_size_zero: true, + .. + } + ) { + self.changes.push((entry_index, ApplyChange::SetSizeToZero)) + } + char_storage = change_to_char(&change); + std::str::from_utf8(std::slice::from_ref(&char_storage)).expect("valid ASCII") + } + EntryStatus::NeedsUpdate(stat) => { + self.changes.push((entry_index, ApplyChange::NewStat(stat))); + return Ok(()); + } + EntryStatus::IntentToAdd => "A", + }; + + writeln!(&mut self.out, "{status: >3} {rela_path}") + } +} + +fn as_str(c: Conflict) -> &'static str { + match c { + Conflict::BothDeleted => "DD", + Conflict::AddedByUs => "AU", + Conflict::DeletedByThem => "UD", + Conflict::AddedByThem => "UA", + Conflict::DeletedByUs => "DU", + Conflict::BothAdded => "AA", + Conflict::BothModified => "UU", } } -fn change_to_char(change: &Change<()>) -> Option { +fn change_to_char(change: &Change<()>) -> u8 { // Known status letters: https://github.com/git/git/blob/6807fcfedab84bc8cd0fbf721bc13c4e68cda9ae/diff.h#L613 - Some(match change { - Change::Removed => 'D', - Change::Type => 'T', - Change::Modification { .. } => 'M', - Change::IntentToAdd => return None, - }) + match change { + Change::Removed => b'D', + Change::Type => b'T', + Change::SubmoduleModification(_) => b'M', + Change::Modification { + executable_bit_changed, .. + } => { + if *executable_bit_changed { + b'X' + } else { + b'M' + } + } + } } diff --git a/gix-command/tests/command.rs b/gix-command/tests/command.rs index e2faa7fc929..606066da131 100644 --- a/gix-command/tests/command.rs +++ b/gix-command/tests/command.rs @@ -82,32 +82,26 @@ mod spawn { #[test] fn sh_shell_specific_script_code_with_single_extra_arg() -> crate::Result { - let out = gix_command::prepare("echo") + let out = gix_command::prepare("printf") .with_shell() .arg("1") .spawn()? .wait_with_output()?; assert!(out.status.success()); - #[cfg(not(windows))] - assert_eq!(out.stdout.as_bstr(), "1\n"); - #[cfg(windows)] - assert_eq!(out.stdout.as_bstr(), "1\r\n"); + assert_eq!(out.stdout.as_bstr(), "1"); Ok(()) } #[test] fn sh_shell_specific_script_code_with_multiple_extra_args() -> crate::Result { - let out = gix_command::prepare("echo") + let out = gix_command::prepare("printf") .with_shell() - .arg("1") - .arg("2") + .arg("%s") + .arg("arg") .spawn()? .wait_with_output()?; assert!(out.status.success()); - #[cfg(not(windows))] - assert_eq!(out.stdout.as_bstr(), "1 2\n"); - #[cfg(windows)] - assert_eq!(out.stdout.as_bstr(), "1 2\r\n"); + assert_eq!(out.stdout.as_bstr(), "arg"); Ok(()) } } diff --git a/gix-features/src/hash.rs b/gix-features/src/hash.rs index 435e018e906..33054a7836c 100644 --- a/gix-features/src/hash.rs +++ b/gix-features/src/hash.rs @@ -96,7 +96,7 @@ pub fn hasher(kind: gix_hash::Kind) -> Sha1 { #[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))] pub fn bytes_of_file( path: &std::path::Path, - num_bytes_from_start: usize, + num_bytes_from_start: u64, kind: gix_hash::Kind, progress: &mut dyn crate::progress::Progress, should_interrupt: &std::sync::atomic::AtomicBool, @@ -110,28 +110,42 @@ pub fn bytes_of_file( ) } -/// Similar to [`bytes_of_file`], but operates on an already open file. +/// Similar to [`bytes_of_file`], but operates on a stream of bytes. #[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))] pub fn bytes( read: &mut dyn std::io::Read, - num_bytes_from_start: usize, + num_bytes_from_start: u64, kind: gix_hash::Kind, progress: &mut dyn crate::progress::Progress, should_interrupt: &std::sync::atomic::AtomicBool, ) -> std::io::Result { - let mut hasher = hasher(kind); + bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt) +} + +/// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind. +#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))] +pub fn bytes_with_hasher( + read: &mut dyn std::io::Read, + num_bytes_from_start: u64, + mut hasher: Sha1, + progress: &mut dyn crate::progress::Progress, + should_interrupt: &std::sync::atomic::AtomicBool, +) -> std::io::Result { let start = std::time::Instant::now(); // init progress before the possibility for failure, as convenience in case people want to recover - progress.init(Some(num_bytes_from_start), crate::progress::bytes()); + progress.init( + Some(num_bytes_from_start as prodash::progress::Step), + crate::progress::bytes(), + ); const BUF_SIZE: usize = u16::MAX as usize; let mut buf = [0u8; BUF_SIZE]; let mut bytes_left = num_bytes_from_start; while bytes_left > 0 { - let out = &mut buf[..BUF_SIZE.min(bytes_left)]; + let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)]; read.read_exact(out)?; - bytes_left -= out.len(); + bytes_left -= out.len() as u64; progress.inc_by(out.len()); hasher.update(out); if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) { diff --git a/gix-filter/src/pipeline/convert.rs b/gix-filter/src/pipeline/convert.rs index 1294944cb8c..6a777be1468 100644 --- a/gix-filter/src/pipeline/convert.rs +++ b/gix-filter/src/pipeline/convert.rs @@ -21,12 +21,9 @@ pub mod configuration { /// pub mod to_git { - use bstr::BStr; - - /// A function that writes a buffer like `fn(rela_path, &mut buf)` with by tes of an object in the index that is the one - /// that should be converted. + /// A function that fills `buf` `fn(&mut buf)` with the data stored in the index of the file that should be converted. pub type IndexObjectFn<'a> = - dyn FnMut(&BStr, &mut Vec) -> Result, Box> + 'a; + dyn FnMut(&mut Vec) -> Result, Box> + 'a; /// The error returned by [Pipeline::convert_to_git()][super::Pipeline::convert_to_git()]. #[derive(Debug, thiserror::Error)] @@ -91,7 +88,7 @@ impl Pipeline { self.options.eol_config, )?; - let mut changed = false; + let mut in_buffer = false; // this is just an approximation, but it's as good as it gets without reading the actual input. let would_convert_eol = eol::convert_to_git( b"\r\n", @@ -119,12 +116,13 @@ impl Pipeline { } self.bufs.clear(); read.read_to_end(&mut self.bufs.src)?; - changed = true; + in_buffer = true; } } - if !changed && (apply_ident_filter || encoding.is_some() || would_convert_eol) { + if !in_buffer && (apply_ident_filter || encoding.is_some() || would_convert_eol) { self.bufs.clear(); src.read_to_end(&mut self.bufs.src)?; + in_buffer = true; } if let Some(encoding) = encoding { @@ -139,28 +137,25 @@ impl Pipeline { }, )?; self.bufs.swap(); - changed = true; } if eol::convert_to_git( &self.bufs.src, digest, &mut self.bufs.dest, - &mut |buf| index_object(bstr_path.as_ref(), buf), + &mut |buf| index_object(buf), eol::convert_to_git::Options { round_trip_check: self.options.crlf_roundtrip_check.to_eol_roundtrip_check(rela_path), config: self.options.eol_config, }, )? { self.bufs.swap(); - changed = true; } if apply_ident_filter && ident::undo(&self.bufs.src, &mut self.bufs.dest) { self.bufs.swap(); - changed = true; } - Ok(if changed { + Ok(if in_buffer { ToGitOutcome::Buffer(&self.bufs.src) } else { ToGitOutcome::Unchanged(src) @@ -320,17 +315,17 @@ where match self { ToGitOutcome::Unchanged(r) => r.read(buf), ToGitOutcome::Process(r) => r.read(buf), - ToGitOutcome::Buffer(mut r) => r.read(buf), + ToGitOutcome::Buffer(r) => r.read(buf), } } } -impl ToGitOutcome<'_, R> +impl<'a, R> ToGitOutcome<'a, R> where R: std::io::Read, { /// If we contain a buffer, and not a stream, return it. - pub fn as_bytes(&self) -> Option<&[u8]> { + pub fn as_bytes(&self) -> Option<&'a [u8]> { match self { ToGitOutcome::Unchanged(_) | ToGitOutcome::Process(_) => None, ToGitOutcome::Buffer(b) => Some(b), diff --git a/gix-filter/src/pipeline/util.rs b/gix-filter/src/pipeline/util.rs index a92ac25efcc..ab10db08ec9 100644 --- a/gix-filter/src/pipeline/util.rs +++ b/gix-filter/src/pipeline/util.rs @@ -170,7 +170,7 @@ impl<'driver> Configuration<'driver> { let attr_digest = digest; digest = match digest { None => Some(config.auto_crlf.into()), - Some(AttributesDigest::TextAuto) => Some(config.to_eol().into()), + Some(AttributesDigest::Text) => Some(config.to_eol().into()), _ => digest, }; diff --git a/gix-filter/tests/pipeline/convert_to_git.rs b/gix-filter/tests/pipeline/convert_to_git.rs index 3a627f28d43..cfa747e2bdd 100644 --- a/gix-filter/tests/pipeline/convert_to_git.rs +++ b/gix-filter/tests/pipeline/convert_to_git.rs @@ -1,6 +1,6 @@ use std::{io::Read, path::Path}; -use bstr::{BStr, ByteSlice}; +use bstr::ByteSlice; use gix_filter::{eol, pipeline::CrlfRoundTripCheck}; use crate::{driver::apply::driver_with_process, pipeline::pipeline}; @@ -19,7 +19,7 @@ fn no_driver_but_filter_with_autocrlf() -> gix_testtools::Result { ) })?; - let out = pipe.convert_to_git( + let mut out = pipe.convert_to_git( "hi\r\n".as_bytes(), Path::new("any.txt"), &mut |_path, _attrs| {}, @@ -31,6 +31,9 @@ fn no_driver_but_filter_with_autocrlf() -> gix_testtools::Result { "hi\n", "the read is read into memory if there is no driver" ); + let mut buf = Vec::new(); + out.read_to_end(&mut buf)?; + assert_eq!(buf.as_bstr(), "hi\n", "we can consume the output"); Ok(()) } @@ -132,14 +135,11 @@ fn no_filter_means_reader_is_returned_unchanged() -> gix_testtools::Result { } #[allow(clippy::ptr_arg)] -fn no_call(_path: &BStr, _buf: &mut Vec) -> Result, Box> { +fn no_call(_buf: &mut Vec) -> Result, Box> { unreachable!("index function will not be called") } #[allow(clippy::ptr_arg)] -fn no_object_in_index( - _path: &BStr, - _buf: &mut Vec, -) -> Result, Box> { +fn no_object_in_index(_buf: &mut Vec) -> Result, Box> { Ok(None) } diff --git a/gix-index/src/access/mod.rs b/gix-index/src/access/mod.rs index 08cb2302011..2a3e85f1120 100644 --- a/gix-index/src/access/mod.rs +++ b/gix-index/src/access/mod.rs @@ -4,7 +4,7 @@ use std::ops::Range; use bstr::{BStr, ByteSlice, ByteVec}; use filetime::FileTime; -use crate::{entry, extension, Entry, PathStorage, State, Version}; +use crate::{entry, extension, Entry, PathStorage, PathStorageRef, State, Version}; // TODO: integrate this somehow, somewhere, depending on later usage. #[allow(dead_code)] @@ -41,7 +41,7 @@ impl State { &self.entries } /// Return our path backing, the place which keeps all paths one after another, with entries storing only the range to access them. - pub fn path_backing(&self) -> &PathStorage { + pub fn path_backing(&self) -> &PathStorageRef { &self.path_backing } @@ -58,7 +58,7 @@ impl State { /// Return mutable entries along with their path, as obtained from `backing`. pub fn entries_mut_with_paths_in<'state, 'backing>( &'state mut self, - backing: &'backing PathStorage, + backing: &'backing PathStorageRef, ) -> impl Iterator { self.entries.iter_mut().map(move |e| { let path = backing[e.path.clone()].as_bstr(); @@ -279,7 +279,7 @@ impl State { } /// Return a writable slice to entries and read-access to their path storage at the same time. - pub fn entries_mut_and_pathbacking(&mut self) -> (&mut [Entry], &PathStorage) { + pub fn entries_mut_and_pathbacking(&mut self) -> (&mut [Entry], &PathStorageRef) { (&mut self.entries, &self.path_backing) } diff --git a/gix-index/src/entry/mod.rs b/gix-index/src/entry/mod.rs index e680e08b06d..2f257ef633d 100644 --- a/gix-index/src/entry/mod.rs +++ b/gix-index/src/entry/mod.rs @@ -1,4 +1,8 @@ -/// The stage of an entry, one of 0 = base, 1 = ours, 2 = theirs +/// The stage of an entry, one of… +/// * 0 = no conflict, +/// * 1 = base, +/// * 2 = ours, +/// * 3 = theirs pub type Stage = u32; /// @@ -71,7 +75,7 @@ mod access { backing[self.path.clone()].as_bstr() } - /// Return an entry's stage. + /// Return an entry's stage. See [entry::Stage] for possible values. pub fn stage(&self) -> entry::Stage { self.flags.stage() } diff --git a/gix-index/src/entry/stat.rs b/gix-index/src/entry/stat.rs index 7bde717633f..5e60f8540be 100644 --- a/gix-index/src/entry/stat.rs +++ b/gix-index/src/entry/stat.rs @@ -92,21 +92,22 @@ impl Stat { size: fstat.len() as u32, }; #[cfg(unix)] - use std::os::unix::fs::MetadataExt; - #[cfg(unix)] - let res = Stat { - mtime: mtime.try_into().unwrap_or_default(), - ctime: ctime.try_into().unwrap_or_default(), - // truncating to 32 bits is fine here because - // that's what the linux syscalls returns - // just rust upcasts to 64 bits for some reason? - // numbers this large are impractical anyway (that's a lot of hard-drives). - dev: fstat.dev() as u32, - ino: fstat.ino() as u32, - uid: fstat.uid(), - gid: fstat.gid(), - // truncation to 32 bits is on purpose (git does the same). - size: fstat.len() as u32, + let res = { + use std::os::unix::fs::MetadataExt; + Stat { + mtime: mtime.try_into().unwrap_or_default(), + ctime: ctime.try_into().unwrap_or_default(), + // truncating to 32 bits is fine here because + // that's what the linux syscalls returns + // just rust upcasts to 64 bits for some reason? + // numbers this large are impractical anyway (that's a lot of hard-drives). + dev: fstat.dev() as u32, + ino: fstat.ino() as u32, + uid: fstat.uid(), + gid: fstat.gid(), + // truncation to 32 bits is on purpose (git does the same). + size: fstat.len() as u32, + } }; Ok(res) diff --git a/gix-index/src/file/init.rs b/gix-index/src/file/init.rs index 99f4be258cf..ed13bed6e9b 100644 --- a/gix-index/src/file/init.rs +++ b/gix-index/src/file/init.rs @@ -78,7 +78,7 @@ impl File { let num_bytes_to_hash = meta.len() - object_hash.len_in_bytes() as u64; let actual_hash = gix_features::hash::bytes( &mut file, - num_bytes_to_hash as usize, + num_bytes_to_hash, object_hash, &mut gix_features::progress::Discard, &Default::default(), diff --git a/gix-index/src/file/verify.rs b/gix-index/src/file/verify.rs index 3890acd9524..1c0dc7539b6 100644 --- a/gix-index/src/file/verify.rs +++ b/gix-index/src/file/verify.rs @@ -27,7 +27,7 @@ impl File { let should_interrupt = AtomicBool::new(false); let actual = gix_features::hash::bytes_of_file( &self.path, - num_bytes_to_hash as usize, + num_bytes_to_hash, checksum.kind(), &mut gix_features::progress::Discard, &should_interrupt, diff --git a/gix-index/src/file/write.rs b/gix-index/src/file/write.rs index 47a4cde9661..0623f6c59c3 100644 --- a/gix-index/src/file/write.rs +++ b/gix-index/src/file/write.rs @@ -22,6 +22,7 @@ impl File { mut out: impl std::io::Write, options: write::Options, ) -> std::io::Result<(Version, gix_hash::ObjectId)> { + let _span = gix_features::trace::detail!("gix_index::File::write_to()", skip_hash = options.skip_hash); let (version, hash) = if options.skip_hash { let out: &mut dyn std::io::Write = &mut out; let version = self.state.write_to(out, options)?; @@ -40,6 +41,7 @@ impl File { /// /// Note that the hash produced will be stored which is why we need to be mutable. pub fn write(&mut self, options: write::Options) -> Result<(), Error> { + let _span = gix_features::trace::detail!("gix_index::File::write()", path = ?self.path); let mut lock = std::io::BufWriter::with_capacity( 64 * 1024, gix_lock::File::acquire_to_update_resource(&self.path, gix_lock::acquire::Fail::Immediately, None)?, diff --git a/gix-index/src/lib.rs b/gix-index/src/lib.rs index 1fb1f68009b..55b332a8280 100644 --- a/gix-index/src/lib.rs +++ b/gix-index/src/lib.rs @@ -121,9 +121,10 @@ mod impls { f, "{} {}{:?} {} {}", match entry.flags.stage() { - 0 => "BASE ", - 1 => "OURS ", - 2 => "THEIRS ", + 0 => " ", + 1 => "BASE ", + 2 => "OURS ", + 3 => "THEIRS ", _ => "UNKNOWN", }, if entry.flags.is_empty() { diff --git a/gix-object/src/blob.rs b/gix-object/src/blob.rs index d0a42092c5c..57db54758f6 100644 --- a/gix-object/src/blob.rs +++ b/gix-object/src/blob.rs @@ -12,8 +12,8 @@ impl<'a> crate::WriteTo for BlobRef<'a> { Kind::Blob } - fn size(&self) -> usize { - self.data.len() + fn size(&self) -> u64 { + self.data.len() as u64 } } @@ -27,7 +27,7 @@ impl crate::WriteTo for Blob { Kind::Blob } - fn size(&self) -> usize { + fn size(&self) -> u64 { self.to_ref().size() } } diff --git a/gix-object/src/commit/write.rs b/gix-object/src/commit/write.rs index 667d257634d..efad2b6db53 100644 --- a/gix-object/src/commit/write.rs +++ b/gix-object/src/commit/write.rs @@ -27,9 +27,9 @@ impl crate::WriteTo for Commit { Kind::Commit } - fn size(&self) -> usize { + fn size(&self) -> u64 { let hash_in_hex = self.tree.kind().len_in_hex(); - b"tree".len() + 1 /*space*/ + hash_in_hex + 1 /* nl */ + (b"tree".len() + 1 /*space*/ + hash_in_hex + 1 /* nl */ + self.parents.iter().count() * (b"parent".len() + 1 + hash_in_hex + 1) + b"author".len() + 1 /* space */ + self.author.size() + 1 /* nl */ + b"committer".len() + 1 /* space */ + self.committer.size() + 1 /* nl */ @@ -46,7 +46,7 @@ impl crate::WriteTo for Commit { }) .sum::() + 1 /* nl */ - + self.message.len() + + self.message.len()) as u64 } } @@ -73,9 +73,9 @@ impl<'a> crate::WriteTo for CommitRef<'a> { Kind::Commit } - fn size(&self) -> usize { + fn size(&self) -> u64 { let hash_in_hex = self.tree().kind().len_in_hex(); - b"tree".len() + 1 /* space */ + hash_in_hex + 1 /* nl */ + (b"tree".len() + 1 /* space */ + hash_in_hex + 1 /* nl */ + self.parents.iter().count() * (b"parent".len() + 1 /* space */ + hash_in_hex + 1 /* nl */) + b"author".len() + 1 /* space */ + self.author.size() + 1 /* nl */ + b"committer".len() + 1 /* space */ + self.committer.size() + 1 /* nl */ @@ -92,6 +92,6 @@ impl<'a> crate::WriteTo for CommitRef<'a> { }) .sum::() + 1 /* nl */ - + self.message.len() + + self.message.len()) as u64 } } diff --git a/gix-object/src/encode.rs b/gix-object/src/encode.rs index 3ba7db0b58a..04a79194863 100644 --- a/gix-object/src/encode.rs +++ b/gix-object/src/encode.rs @@ -19,7 +19,7 @@ macro_rules! check { }; } /// Generates a loose header buffer -pub fn loose_header(kind: crate::Kind, size: usize) -> smallvec::SmallVec<[u8; 28]> { +pub fn loose_header(kind: crate::Kind, size: u64) -> smallvec::SmallVec<[u8; 28]> { let mut v = smallvec::SmallVec::new(); check!(v.write_all(kind.as_bytes())); check!(v.write_all(SPACE)); diff --git a/gix-object/src/lib.rs b/gix-object/src/lib.rs index 56e0019fd8c..63a356450b6 100644 --- a/gix-object/src/lib.rs +++ b/gix-object/src/lib.rs @@ -345,7 +345,7 @@ pub mod decode { /// ([`kind`](super::Kind), `size`, `consumed bytes`). /// /// `size` is the uncompressed size of the payload in bytes. - pub fn loose_header(input: &[u8]) -> Result<(super::Kind, usize, usize), LooseHeaderDecodeError> { + pub fn loose_header(input: &[u8]) -> Result<(super::Kind, u64, usize), LooseHeaderDecodeError> { use LooseHeaderDecodeError::*; let kind_end = input.find_byte(0x20).ok_or(InvalidHeader { message: "Expected ' '", @@ -366,7 +366,7 @@ pub mod decode { /// A standalone function to compute a hash of kind `hash_kind` for an object of `object_kind` and its `data`. pub fn compute_hash(hash_kind: gix_hash::Kind, object_kind: Kind, data: &[u8]) -> gix_hash::ObjectId { - let header = encode::loose_header(object_kind, data.len()); + let header = encode::loose_header(object_kind, data.len() as u64); let mut hasher = gix_features::hash::hasher(hash_kind); hasher.update(&header); diff --git a/gix-object/src/object/mod.rs b/gix-object/src/object/mod.rs index bebc1cc6522..53e9b4178e0 100644 --- a/gix-object/src/object/mod.rs +++ b/gix-object/src/object/mod.rs @@ -24,7 +24,7 @@ mod write { self.kind() } - fn size(&self) -> usize { + fn size(&self) -> u64 { use crate::ObjectRef::*; match self { Tree(v) => v.size(), @@ -52,7 +52,7 @@ mod write { self.kind() } - fn size(&self) -> usize { + fn size(&self) -> u64 { use crate::Object::*; match self { Tree(v) => v.size(), @@ -185,6 +185,8 @@ pub enum LooseDecodeError { InvalidHeader(#[from] LooseHeaderDecodeError), #[error(transparent)] InvalidContent(#[from] DecodeError), + #[error("Object sized {size} does not fit into memory - this can happen on 32 bit systems")] + OutOfMemory { size: u64 }, } impl<'a> ObjectRef<'a> { @@ -193,7 +195,7 @@ impl<'a> ObjectRef<'a> { let (kind, size, offset) = loose_header(data)?; let body = &data[offset..] - .get(..size) + .get(..size.try_into().map_err(|_| LooseDecodeError::OutOfMemory { size })?) .ok_or(LooseHeaderDecodeError::InvalidHeader { message: "object data was shorter than its size declared in the header", })?; diff --git a/gix-object/src/tag/write.rs b/gix-object/src/tag/write.rs index cee9e587c69..dc8fd1ba336 100644 --- a/gix-object/src/tag/write.rs +++ b/gix-object/src/tag/write.rs @@ -44,8 +44,8 @@ impl crate::WriteTo for Tag { Kind::Tag } - fn size(&self) -> usize { - b"object".len() + 1 /* space */ + self.target.kind().len_in_hex() + 1 /* nl */ + fn size(&self) -> u64 { + (b"object".len() + 1 /* space */ + self.target.kind().len_in_hex() + 1 /* nl */ + b"type".len() + 1 /* space */ + self.target_kind.as_bytes().len() + 1 /* nl */ + b"tag".len() + 1 /* space */ + self.name.len() + 1 /* nl */ + self @@ -53,7 +53,7 @@ impl crate::WriteTo for Tag { .as_ref() .map_or(0, |t| b"tagger".len() + 1 /* space */ + t.size() + 1 /* nl */) + 1 /* nl */ + self.message.len() - + self.pgp_signature.as_ref().map_or(0, |m| 1 /* nl */ + m.len()) + + self.pgp_signature.as_ref().map_or(0, |m| 1 /* nl */ + m.len())) as u64 } } @@ -81,8 +81,8 @@ impl<'a> crate::WriteTo for TagRef<'a> { Kind::Tag } - fn size(&self) -> usize { - b"object".len() + 1 /* space */ + self.target().kind().len_in_hex() + 1 /* nl */ + fn size(&self) -> u64 { + (b"object".len() + 1 /* space */ + self.target().kind().len_in_hex() + 1 /* nl */ + b"type".len() + 1 /* space */ + self.target_kind.as_bytes().len() + 1 /* nl */ + b"tag".len() + 1 /* space */ + self.name.len() + 1 /* nl */ + self @@ -90,7 +90,7 @@ impl<'a> crate::WriteTo for TagRef<'a> { .as_ref() .map_or(0, |t| b"tagger".len() + 1 /* space */ + t.size() + 1 /* nl */) + 1 /* nl */ + self.message.len() - + self.pgp_signature.as_ref().map_or(0, |m| 1 /* nl */ + m.len()) + + self.pgp_signature.as_ref().map_or(0, |m| 1 /* nl */ + m.len())) as u64 } } diff --git a/gix-object/src/traits.rs b/gix-object/src/traits.rs index c0c4adee2f5..ce0463c988c 100644 --- a/gix-object/src/traits.rs +++ b/gix-object/src/traits.rs @@ -17,7 +17,7 @@ pub trait WriteTo { /// the object, as such it's possible for [`size`](Self::size) to /// return a sensible value but [`write_to`](Self::write_to) to /// fail because the object was not actually valid in some way. - fn size(&self) -> usize; + fn size(&self) -> u64; /// Returns a loose object header based on the object's data fn loose_header(&self) -> smallvec::SmallVec<[u8; 28]> { @@ -37,7 +37,7 @@ where ::kind(self) } - fn size(&self) -> usize { + fn size(&self) -> u64 { ::size(self) } } diff --git a/gix-object/src/tree/write.rs b/gix-object/src/tree/write.rs index 91e1dc2e0ea..e1a82720f1f 100644 --- a/gix-object/src/tree/write.rs +++ b/gix-object/src/tree/write.rs @@ -57,10 +57,12 @@ impl crate::WriteTo for Tree { Kind::Tree } - fn size(&self) -> usize { + fn size(&self) -> u64 { self.entries .iter() - .map(|Entry { mode, filename, oid }| mode.as_bytes().len() + 1 + filename.len() + 1 + oid.as_bytes().len()) + .map(|Entry { mode, filename, oid }| { + (mode.as_bytes().len() + 1 + filename.len() + 1 + oid.as_bytes().len()) as u64 + }) .sum() } } @@ -100,11 +102,11 @@ impl<'a> crate::WriteTo for TreeRef<'a> { Kind::Tree } - fn size(&self) -> usize { + fn size(&self) -> u64 { self.entries .iter() .map(|EntryRef { mode, filename, oid }| { - mode.as_bytes().len() + 1 + filename.len() + 1 + oid.as_bytes().len() + (mode.as_bytes().len() + 1 + filename.len() + 1 + oid.as_bytes().len()) as u64 }) .sum() } diff --git a/gix-odb/src/find.rs b/gix-odb/src/find.rs index bf807e27c0d..196845c4d7c 100644 --- a/gix-odb/src/find.rs +++ b/gix-odb/src/find.rs @@ -104,11 +104,11 @@ mod header { } } - impl From<(usize, gix_object::Kind)> for Header { - fn from((object_size, kind): (usize, gix_object::Kind)) -> Self { + impl From<(u64, gix_object::Kind)> for Header { + fn from((object_size, kind): (u64, gix_object::Kind)) -> Self { Header::Loose { kind, - size: object_size as u64, + size: object_size, } } } diff --git a/gix-odb/src/sink.rs b/gix-odb/src/sink.rs index f6334a51c5b..7784901a8c9 100644 --- a/gix-odb/src/sink.rs +++ b/gix-odb/src/sink.rs @@ -1,6 +1,5 @@ use std::{ cell::RefCell, - convert::TryInto, io::{self, Write}, }; @@ -24,11 +23,10 @@ impl crate::traits::Write for Sink { fn write_stream( &self, kind: gix_object::Kind, - size: u64, + mut size: u64, from: &mut dyn io::Read, ) -> Result { - let mut size = size.try_into().expect("object size to fit into usize"); - let mut buf = [0u8; 8096]; + let mut buf = [0u8; u16::MAX as usize]; let header = gix_object::encode::loose_header(kind, size); let possibly_compress = |buf: &[u8]| -> io::Result<()> { @@ -43,11 +41,11 @@ impl crate::traits::Write for Sink { possibly_compress(&header).map_err(Box::new)?; while size != 0 { - let bytes = size.min(buf.len()); + let bytes = (size as usize).min(buf.len()); from.read_exact(&mut buf[..bytes]).map_err(Box::new)?; hasher.update(&buf[..bytes]); possibly_compress(&buf[..bytes]).map_err(Box::new)?; - size -= bytes; + size -= bytes as u64; } if let Some(compressor) = self.compressor.as_ref() { let mut c = compressor.borrow_mut(); diff --git a/gix-odb/src/store_impls/loose/find.rs b/gix-odb/src/store_impls/loose/find.rs index 91bf0ba871d..4116a045348 100644 --- a/gix-odb/src/store_impls/loose/find.rs +++ b/gix-odb/src/store_impls/loose/find.rs @@ -14,13 +14,11 @@ pub enum Error { path: PathBuf, }, #[error("file at '{path}' showed invalid size of inflated data, expected {expected}, got {actual}")] - SizeMismatch { - actual: usize, - expected: usize, - path: PathBuf, - }, + SizeMismatch { actual: u64, expected: u64, path: PathBuf }, #[error(transparent)] Decode(#[from] gix_object::decode::LooseHeaderDecodeError), + #[error("Cannot store {size} in memory as it's not representable")] + OutOfMemory { size: u64 }, #[error("Could not {action} data at '{path}'")] Io { source: std::io::Error, @@ -137,7 +135,7 @@ impl Store { /// Return only the decompressed size of the object and its kind without fully reading it into memory as tuple of `(size, kind)`. /// Returns `None` if `id` does not exist in the database. - pub fn try_header(&self, id: &gix_hash::oid) -> Result, Error> { + pub fn try_header(&self, id: &gix_hash::oid) -> Result, Error> { const BUF_SIZE: usize = 256; let mut buf = [0_u8; BUF_SIZE]; let path = hash_path(id, self.path.clone()); @@ -224,16 +222,17 @@ impl Store { let decompressed_body_bytes_sans_header = decompressed_start + header_size..decompressed_start + consumed_out; - if consumed_out != size + header_size { + if consumed_out as u64 != size + header_size as u64 { return Err(Error::SizeMismatch { - expected: size + header_size, - actual: consumed_out, + expected: size + header_size as u64, + actual: consumed_out as u64, path, }); } buf.copy_within(decompressed_body_bytes_sans_header, 0); } else { - buf.resize(bytes_read + size + header_size, 0); + let new_len = bytes_read as u64 + size + header_size as u64; + buf.resize(new_len.try_into().map_err(|_| Error::OutOfMemory { size: new_len })?, 0); { let (input, output) = buf.split_at_mut(bytes_read); let num_decompressed_bytes = zlib::stream::inflate::read( @@ -246,17 +245,21 @@ impl Store { action: "deflate", path: path.to_owned(), })?; - if num_decompressed_bytes + consumed_out != size + header_size { + if num_decompressed_bytes as u64 + consumed_out as u64 != size + header_size as u64 { return Err(Error::SizeMismatch { - expected: size + header_size, - actual: num_decompressed_bytes + consumed_out, + expected: size + header_size as u64, + actual: num_decompressed_bytes as u64 + consumed_out as u64, path, }); } }; buf.copy_within(decompressed_start + header_size.., 0); } - buf.resize(size, 0); + buf.resize( + size.try_into() + .expect("BUG: here the size is already confirmed to fit into memory"), + 0, + ); Ok(gix_object::Data { kind, data: buf }) } } diff --git a/gix-odb/src/store_impls/loose/write.rs b/gix-odb/src/store_impls/loose/write.rs index e537eda9264..2cac12d1814 100644 --- a/gix-odb/src/store_impls/loose/write.rs +++ b/gix-odb/src/store_impls/loose/write.rs @@ -1,4 +1,4 @@ -use std::{convert::TryInto, fs, io, io::Write, path::PathBuf}; +use std::{fs, io, io::Write, path::PathBuf}; use gix_features::{hash, zlib::stream::deflate}; use gix_object::WriteTo; @@ -48,7 +48,7 @@ impl crate::traits::Write for Store { /// This will cost at least 4 IO operations. fn write_buf(&self, kind: gix_object::Kind, from: &[u8]) -> Result { let mut to = self.dest().map_err(Box::new)?; - to.write_all(&gix_object::encode::loose_header(kind, from.len())) + to.write_all(&gix_object::encode::loose_header(kind, from.len() as u64)) .map_err(|err| Error::Io { source: err, message: "write header to tempfile in", @@ -74,15 +74,12 @@ impl crate::traits::Write for Store { mut from: &mut dyn io::Read, ) -> Result { let mut to = self.dest().map_err(Box::new)?; - to.write_all(&gix_object::encode::loose_header( - kind, - size.try_into().expect("object size to fit into usize"), - )) - .map_err(|err| Error::Io { - source: err, - message: "write header to tempfile in", - path: self.path.to_owned(), - })?; + to.write_all(&gix_object::encode::loose_header(kind, size)) + .map_err(|err| Error::Io { + source: err, + message: "write header to tempfile in", + path: self.path.to_owned(), + })?; io::copy(&mut from, &mut to) .map_err(|err| Error::Io { diff --git a/gix-odb/tests/odb/store/loose.rs b/gix-odb/tests/odb/store/loose.rs index 77a26cb8daf..c998522100d 100644 --- a/gix-odb/tests/odb/store/loose.rs +++ b/gix-odb/tests/odb/store/loose.rs @@ -420,7 +420,7 @@ cjHJZXWmV4CcRfmLsXzU8s2cR9A0DBvOxhPD1TlKC2JhBFXigjuL9U4Rbq9tdegB let id = id?; let expected = db.try_find(&id, &mut buf)?.expect("exists"); let (size, kind) = db.try_header(&id)?.expect("header exists"); - assert_eq!(size, expected.data.len()); + assert_eq!(size, expected.data.len() as u64); assert_eq!(kind, expected.kind); } Ok(()) diff --git a/gix-pack/src/data/input/entries_to_bytes.rs b/gix-pack/src/data/input/entries_to_bytes.rs index 27cd046482f..dc2ac58848a 100644 --- a/gix-pack/src/data/input/entries_to_bytes.rs +++ b/gix-pack/src/data/input/entries_to_bytes.rs @@ -97,7 +97,7 @@ where let interrupt_never = std::sync::atomic::AtomicBool::new(false); let digest = hash::bytes( &mut self.output, - num_bytes_written as usize, + num_bytes_written, self.object_hash, &mut gix_features::progress::Discard, &interrupt_never, diff --git a/gix-pack/src/verify.rs b/gix-pack/src/verify.rs index d502ada389a..fff99c75d12 100644 --- a/gix-pack/src/verify.rs +++ b/gix-pack/src/verify.rs @@ -39,7 +39,7 @@ pub fn checksum_on_disk_or_mmap( let data_len_without_trailer = data.len() - object_hash.len_in_bytes(); let actual = match gix_features::hash::bytes_of_file( data_path, - data_len_without_trailer, + data_len_without_trailer as u64, object_hash, progress, should_interrupt, diff --git a/gix-status/Cargo.toml b/gix-status/Cargo.toml index 911e5860cc6..d068f349458 100644 --- a/gix-status/Cargo.toml +++ b/gix-status/Cargo.toml @@ -21,6 +21,8 @@ gix-object = { version = "^0.37.0", path = "../gix-object" } gix-path = { version = "^0.10.0", path = "../gix-path" } gix-features = { version = "^0.35.0", path = "../gix-features" } gix-pathspec = { version = "^0.3.0", path = "../gix-pathspec" } +gix-filter = { version = "^0.5.0", path = "../gix-filter" } +gix-worktree = { version = "^0.26.0", path = "../gix-worktree", default-features = false, features = ["attributes"] } thiserror = "1.0.26" filetime = "0.2.15" diff --git a/gix-status/src/index_as_worktree/content.rs b/gix-status/src/index_as_worktree/content.rs deleted file mode 100644 index aa775821a7a..00000000000 --- a/gix-status/src/index_as_worktree/content.rs +++ /dev/null @@ -1,80 +0,0 @@ -use gix_hash::ObjectId; -use gix_index as index; -use index::Entry; - -/// Compares the content of two blobs in some way. -pub trait CompareBlobs { - /// Output data produced by [`compare_blobs()`][CompareBlobs::compare_blobs()]. - type Output; - - /// Providing the underlying index `entry`, allow comparing a file in the worktree of size `worktree_blob_size` - /// and allow reading its bytes using `worktree_blob`. - /// If this function returns `None` the `entry` and the `worktree_blob` are assumed to be identical. - /// Use `entry_blob` to obtain the data for the blob referred to by `entry`, allowing comparisons of the data itself. - fn compare_blobs<'a, E>( - &mut self, - entry: &'a gix_index::Entry, - worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a, E>, - entry_blob: impl ReadDataOnce<'a, E>, - ) -> Result, E>; -} - -/// Lazy borrowed access to blob data. -pub trait ReadDataOnce<'a, E> { - /// Returns the contents of this blob. - /// - /// This potentially performs IO and other expensive operations - /// and should only be called when necessary. - fn read_data(self) -> Result<&'a [u8], E>; -} - -/// Compares to blobs by comparing their size and oid, and only looks at the file if -/// the size matches, therefore it's very fast. -#[derive(Clone)] -pub struct FastEq; - -impl CompareBlobs for FastEq { - type Output = (); - - fn compare_blobs<'a, E>( - &mut self, - entry: &'a Entry, - worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a, E>, - _entry_blob: impl ReadDataOnce<'a, E>, - ) -> Result, E> { - // make sure to account for racily smudged entries here so that they don't always keep - // showing up as modified even after their contents have changed again, to a potentially - // unmodified state. That means that we want to ignore stat.size == 0 for non_empty_blobs. - if entry.stat.size as usize != worktree_blob_size && (entry.id.is_empty_blob() || entry.stat.size != 0) { - return Ok(Some(())); - } - let blob = worktree_blob.read_data()?; - let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, blob); - Ok((entry.id != file_hash).then_some(())) - } -} - -/// Compares files to blobs by *always* comparing their hashes. -/// -/// Same as [`FastEq`] but does not contain a fast path for files with mismatched files and -/// therefore always returns an OID that can be reused later. -#[derive(Clone)] -pub struct HashEq; - -impl CompareBlobs for HashEq { - type Output = ObjectId; - - fn compare_blobs<'a, E>( - &mut self, - entry: &'a Entry, - _worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a, E>, - _entry_blob: impl ReadDataOnce<'a, E>, - ) -> Result, E> { - let blob = worktree_blob.read_data()?; - let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, blob); - Ok((entry.id != file_hash).then_some(file_hash)) - } -} diff --git a/gix-status/src/index_as_worktree/function.rs b/gix-status/src/index_as_worktree/function.rs index e576cc35c44..b8ef0c7e4c5 100644 --- a/gix-status/src/index_as_worktree/function.rs +++ b/gix-status/src/index_as_worktree/function.rs @@ -1,51 +1,74 @@ -use std::sync::atomic::Ordering; -use std::{io, marker::PhantomData, path::Path}; +use std::slice::Chunks; +use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}; +use std::{io, path::Path}; use bstr::BStr; use filetime::FileTime; use gix_features::parallel::{in_parallel_if, Reduce}; +use gix_filter::pipeline::convert::ToGitOutcome; +use crate::index_as_worktree::traits::read_data::Stream; +use crate::index_as_worktree::{Conflict, EntryStatus}; use crate::{ index_as_worktree::{ - content, - content::CompareBlobs, + traits, + traits::{CompareBlobs, SubmoduleStatus}, types::{Error, Options}, - Change, VisitEntry, + Change, Outcome, VisitEntry, }, - read, Pathspec, + Pathspec, SymlinkCheck, }; /// Calculates the changes that need to be applied to an `index` to match the state of the `worktree` and makes them -/// observable in `collector`, along with information produced by `compare` which gets to see blobs that may have changes. +/// observable in `collector`, along with information produced by `compare` which gets to see blobs that may have changes, and +/// `submodule` which can take a look at submodules in detail to produce status information (BASE version if its conflicting). /// `options` are used to configure the operation. /// -/// Note that `index` is updated with the latest seen stat information from the worktree, and its timestamp is adjusted to -/// the current time for which it will be considered fresh. +/// Note that `index` may require changes to be up-to-date with the working tree and avoid expensive computations by updating respective entries +/// with stat information from the worktree, and its timestamp is adjusted to the current time for which it will be considered fresh +/// as long as it is included which depends on `pathspec`. All this is delegated to the caller. /// -/// Note that this isn't technically quite what this function does as this also provides some additional information, -/// like whether a file has conflicts, and files that were added with `git add` are shown as a special -/// changes despite not technically requiring a change to the index since `git add` already added the file to the index. +/// `should_interrupt` can be used to stop all processing. +/// `filter` is used to convert worktree files back to their internal git representation. For this to be correct, +/// [`Options::attributes`] must be configured as well. +/// +/// **It's important to note that the `index` should have its [timestamp updated](gix_index::State::set_timestamp()) with a timestamp +/// from just before making this call *if* [entries were updated](Outcome::entries_to_update)** +/// +/// ### Note +/// +/// Technically, this function does more as it also provides additional information, like whether a file has conflicts, +/// and files that were added with `git add` are shown as a special as well. It also provides updates to entry filesystem +/// stats like `git status` would if it had to determine the hash. +/// If that happened, the index should be written back after updating the entries with these updated stats, see [Outcome::skipped]. +/// +/// Thus some care has to be taken to do the right thing when letting the index match the worktree by evaluating the changes observed +/// by the `collector`. #[allow(clippy::too_many_arguments)] -pub fn index_as_worktree<'index, T, Find, E>( - index: &'index mut gix_index::State, +pub fn index_as_worktree<'index, T, U, Find, E1, E2>( + index: &'index gix_index::State, worktree: &Path, - collector: &mut impl VisitEntry<'index, ContentChange = T>, + collector: &mut impl VisitEntry<'index, ContentChange = T, SubmoduleStatus = U>, compare: impl CompareBlobs + Send + Clone, + submodule: impl SubmoduleStatus + Send + Clone, find: Find, progress: &mut dyn gix_features::progress::Progress, pathspec: impl Pathspec + Send + Clone, - options: Options, -) -> Result<(), Error> + filter: gix_filter::Pipeline, + should_interrupt: &AtomicBool, + mut options: Options, +) -> Result where T: Send, - E: std::error::Error + Send + Sync + 'static, - Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E> + Send + Clone, + U: Send, + E1: std::error::Error + Send + Sync + 'static, + E2: std::error::Error + Send + Sync + 'static, + Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E1> + Send + Clone, { // the order is absolutely critical here we use the old timestamp to detect racy index entries // (modified at or after the last index update) during the index update we then set those // entries size to 0 (see below) to ensure they keep showing up as racy and reset the timestamp. let timestamp = index.timestamp(); - index.set_timestamp(FileTime::now()); let (chunk_size, thread_limit, _) = gix_features::parallel::optimize_chunk_size_and_thread_limit( 500, // just like git index.entries().len().into(), @@ -56,93 +79,216 @@ where let range = index .prefixed_entries_range(pathspec.common_prefix()) .unwrap_or(0..index.entries().len()); - let (entries, path_backing) = index.entries_mut_and_pathbacking(); - let entries = &mut entries[range]; + + let stack = gix_worktree::Stack::from_state_and_ignore_case( + worktree, + options.fs.ignore_case, + gix_worktree::stack::State::AttributesStack(std::mem::take(&mut options.attributes)), + index, + index.path_backing(), + ); + let (entries, path_backing) = (index.entries(), index.path_backing()); + let mut num_entries = entries.len(); + let entry_index_offset = range.start; + let entries = &entries[range]; + + let _span = gix_features::trace::detail!("gix_status::index_as_worktree", + num_entries = entries.len(), + chunk_size = chunk_size, + thread_limit = ?thread_limit); + + let entries_skipped_by_common_prefix = num_entries - entries.len(); + let (skipped_by_pathspec, skipped_by_entry_flags, symlink_metadata_calls, entries_to_update) = Default::default(); + let (worktree_bytes, worktree_reads, odb_bytes, odb_reads, racy_clean) = Default::default(); + + num_entries = entries.len(); progress.init(entries.len().into(), gix_features::progress::count("files")); let count = progress.counter(); + let new_state = { + let options = &options; + let (skipped_by_pathspec, skipped_by_entry_flags) = (&skipped_by_pathspec, &skipped_by_entry_flags); + let (symlink_metadata_calls, entries_to_update) = (&symlink_metadata_calls, &entries_to_update); + let (racy_clean, worktree_bytes) = (&racy_clean, &worktree_bytes); + let (worktree_reads, odb_bytes, odb_reads) = (&worktree_reads, &odb_bytes, &odb_reads); + move |_| { + ( + State { + buf: Vec::new(), + buf2: Vec::new(), + attr_stack: stack, + path_stack: SymlinkCheck::new(worktree.into()), + timestamp, + path_backing, + filter, + options, + + skipped_by_pathspec, + skipped_by_entry_flags, + symlink_metadata_calls, + entries_to_update, + racy_clean, + worktree_reads, + worktree_bytes, + odb_reads, + odb_bytes, + }, + compare, + submodule, + find, + pathspec, + ) + } + }; in_parallel_if( || true, // TODO: heuristic: when is parallelization not worth it? Git says 500 items per thread, but to 20 threads, we can be more fine-grained though. - entries.chunks_mut(chunk_size), + gix_features::interrupt::Iter::new( + OffsetIter { + inner: entries.chunks(chunk_size), + offset: entry_index_offset, + }, + should_interrupt, + ), thread_limit, - { - let options = &options; - move |_| { - ( - State { - buf: Vec::new(), - odb_buf: Vec::new(), - timestamp, - path_backing, - worktree, - options, - }, - compare, - find, + new_state, + |(entry_offset, chunk_entries), (state, blobdiff, submdule, find, pathspec)| { + let all_entries = index.entries(); + let mut out = Vec::new(); + let mut idx = 0; + while let Some(entry) = chunk_entries.get(idx) { + let absolute_entry_index = entry_offset + idx; + if idx == 0 && entry.stage() != 0 { + let offset = entry_offset.checked_sub(1).and_then(|prev_idx| { + let prev_entry = &all_entries[prev_idx]; + let entry_path = entry.path_in(state.path_backing); + if prev_entry.stage() == 0 || prev_entry.path_in(state.path_backing) != entry_path { + // prev_entry (in previous chunk) does not belong to our conflict + return None; + } + Conflict::try_from_entry(all_entries, state.path_backing, absolute_entry_index, entry_path) + .map(|(_conflict, offset)| offset) + }); + if let Some(entries_to_skip_as_conflict_originates_in_previous_chunk) = offset { + // skip current entry as it's done, along with following conflict entries + idx += entries_to_skip_as_conflict_originates_in_previous_chunk + 1; + continue; + } + } + let res = state.process( + all_entries, + entry, + absolute_entry_index, pathspec, - ) + blobdiff, + submdule, + find, + &mut idx, + ); + idx += 1; + count.fetch_add(1, Ordering::Relaxed); + if let Some(res) = res { + out.push(res); + } } - }, - |entries, (state, diff, find, pathspec)| { - entries - .iter_mut() - .filter_map(|entry| { - let res = state.process(entry, diff, find, pathspec); - count.fetch_add(1, Ordering::Relaxed); - res - }) - .collect() + out }, ReduceChange { collector, - phantom: PhantomData, + entries: index.entries(), }, - ) + )?; + + Ok(Outcome { + entries_to_process: num_entries, + entries_processed: count.load(Ordering::Relaxed), + entries_skipped_by_common_prefix, + entries_skipped_by_pathspec: skipped_by_pathspec.load(Ordering::Relaxed), + entries_skipped_by_entry_flags: skipped_by_entry_flags.load(Ordering::Relaxed), + entries_to_update: entries_to_update.load(Ordering::Relaxed), + symlink_metadata_calls: symlink_metadata_calls.load(Ordering::Relaxed), + racy_clean: racy_clean.load(Ordering::Relaxed), + worktree_files_read: worktree_reads.load(Ordering::Relaxed), + worktree_bytes: worktree_bytes.load(Ordering::Relaxed), + odb_objects_read: odb_reads.load(Ordering::Relaxed), + odb_bytes: odb_bytes.load(Ordering::Relaxed), + }) } struct State<'a, 'b> { buf: Vec, - odb_buf: Vec, + buf2: Vec, timestamp: FileTime, - // path_cache: fs::Cache TODO path cache - path_backing: &'b [u8], - worktree: &'a Path, + /// This is the cheap stack that only assure that we don't go through symlinks. + /// It's always used to get the path to perform an lstat on. + path_stack: SymlinkCheck, + /// This is the expensive stack that will need to check for `.gitattributes` files each time + /// it changes directory. It's only used when we know we have to read a worktree file, which in turn + /// requires attributes to drive the filter configuration. + attr_stack: gix_worktree::Stack, + filter: gix_filter::Pipeline, + path_backing: &'b gix_index::PathStorageRef, options: &'a Options, + + skipped_by_pathspec: &'a AtomicUsize, + skipped_by_entry_flags: &'a AtomicUsize, + symlink_metadata_calls: &'a AtomicUsize, + entries_to_update: &'a AtomicUsize, + racy_clean: &'a AtomicUsize, + worktree_bytes: &'a AtomicU64, + worktree_reads: &'a AtomicUsize, + odb_bytes: &'a AtomicU64, + odb_reads: &'a AtomicUsize, } -type StatusResult<'index, T> = Result<(&'index gix_index::Entry, &'index BStr, Option>, bool), Error>; +type StatusResult<'index, T, U> = Result<(&'index gix_index::Entry, usize, &'index BStr, EntryStatus), Error>; impl<'index> State<'_, 'index> { - fn process( + #[allow(clippy::too_many_arguments)] + fn process( &mut self, - entry: &'index mut gix_index::Entry, + entries: &'index [gix_index::Entry], + entry: &'index gix_index::Entry, + entry_index: usize, + pathspec: &mut impl Pathspec, diff: &mut impl CompareBlobs, + submodule: &mut impl SubmoduleStatus, find: &mut Find, - pathspec: &mut impl Pathspec, - ) -> Option> + outer_entry_index: &mut usize, + ) -> Option> where - E: std::error::Error + Send + Sync + 'static, - Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E>, + E1: std::error::Error + Send + Sync + 'static, + E2: std::error::Error + Send + Sync + 'static, + Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E1>, { - let conflict = match entry.stage() { - 0 => false, - 1 => true, - _ => return None, - }; if entry.flags.intersects( gix_index::entry::Flags::UPTODATE | gix_index::entry::Flags::SKIP_WORKTREE | gix_index::entry::Flags::ASSUME_VALID | gix_index::entry::Flags::FSMONITOR_VALID, ) { + self.skipped_by_entry_flags.fetch_add(1, Ordering::Relaxed); return None; } let path = entry.path_in(self.path_backing); if !pathspec.is_included(path, Some(false)) { + self.skipped_by_pathspec.fetch_add(1, Ordering::Relaxed); return None; } - let status = self.compute_status(&mut *entry, path, diff, find); - Some(status.map(move |status| (&*entry, path, status, conflict))) + let status = if entry.stage() != 0 { + Ok( + Conflict::try_from_entry(entries, self.path_backing, entry_index, path).map(|(conflict, offset)| { + *outer_entry_index += offset; // let out loop skip over entries related to the conflict + EntryStatus::Conflict(conflict) + }), + ) + } else { + self.compute_status(entry, path, diff, submodule, find) + }; + match status { + Ok(None) => None, + Ok(Some(status)) => Some(Ok((entry, entry_index, path, status))), + Err(err) => Some(Err(err)), + } } /// # On how racy-git is handled here @@ -184,42 +330,51 @@ impl<'index> State<'_, 'index> { /// which is a constant. /// /// Adapted from [here](https://github.com/Byron/gitoxide/pull/805#discussion_r1164676777). - fn compute_status( + fn compute_status( &mut self, - entry: &mut gix_index::Entry, - git_path: &BStr, + entry: &gix_index::Entry, + rela_path: &BStr, diff: &mut impl CompareBlobs, + submodule: &mut impl SubmoduleStatus, find: &mut Find, - ) -> Result>, Error> + ) -> Result>, Error> where - E: std::error::Error + Send + Sync + 'static, - Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E>, + E1: std::error::Error + Send + Sync + 'static, + E2: std::error::Error + Send + Sync + 'static, + Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E1>, { - // TODO fs cache - let worktree_path = gix_path::try_from_bstr(git_path).map_err(|_| Error::IllformedUtf8)?; - let worktree_path = self.worktree.join(worktree_path); + let worktree_path = match self.path_stack.verified_path(gix_path::from_bstr(rela_path).as_ref()) { + Ok(path) => path, + Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Some(Change::Removed.into())), + Err(err) => return Err(Error::Io(err)), + }; + self.symlink_metadata_calls.fetch_add(1, Ordering::Relaxed); let metadata = match worktree_path.symlink_metadata() { - // TODO: check if any parent directory is a symlink - // we need to use fs::Cache for that Ok(metadata) if metadata.is_dir() => { // index entries are normally only for files/symlinks // if a file turned into a directory it was removed // the only exception here are submodules which are // part of the index despite being directories - // - // TODO: submodules: - // if entry.mode.contains(Mode::COMMIT) && - // resolve_gitlink_ref(ce->name, "HEAD", &sub)) - return Ok(Some(Change::Removed)); + if entry.mode.is_submodule() { + let status = submodule + .status(entry, rela_path) + .map_err(|err| Error::SubmoduleStatus { + rela_path: rela_path.into(), + source: Box::new(err), + })?; + return Ok(status.map(|status| Change::SubmoduleModification(status).into())); + } else { + return Ok(Some(Change::Removed.into())); + } } Ok(metadata) => metadata, - Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Some(Change::Removed)), + Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Some(Change::Removed.into())), Err(err) => { return Err(err.into()); } }; if entry.flags.contains(gix_index::entry::Flags::INTENT_TO_ADD) { - return Ok(Some(Change::IntentToAdd)); + return Ok(Some(EntryStatus::IntentToAdd)); } let new_stat = gix_index::entry::Stat::from_fs(&metadata)?; let executable_bit_changed = @@ -227,7 +382,7 @@ impl<'index> State<'_, 'index> { .mode .change_to_match_fs(&metadata, self.options.fs.symlink, self.options.fs.executable_bit) { - Some(gix_index::entry::mode::Change::Type { .. }) => return Ok(Some(Change::Type)), + Some(gix_index::entry::mode::Change::Type { .. }) => return Ok(Some(Change::Type.into())), Some(gix_index::entry::mode::Change::ExecutableBit) => true, None => false, }; @@ -251,45 +406,57 @@ impl<'index> State<'_, 'index> { racy_clean = new_stat.is_racy(self.timestamp, self.options.stat); if !racy_clean { return Ok(None); + } else { + self.racy_clean.fetch_add(1, Ordering::Relaxed); } } - let read_file = WorktreeBlob { + self.buf.clear(); + self.buf2.clear(); + let fetch_data = ReadDataImpl { buf: &mut self.buf, - path: &worktree_path, + path: worktree_path, + rela_path, entry, + file_len: metadata.len(), + filter: &mut self.filter, + attr_stack: &mut self.attr_stack, options: self.options, - }; - let read_blob = OdbBlob { - buf: &mut self.odb_buf, id: &entry.id, find, + worktree_reads: self.worktree_reads, + worktree_bytes: self.worktree_bytes, + odb_reads: self.odb_reads, + odb_bytes: self.odb_bytes, }; - let content_change = diff.compare_blobs::(entry, metadata.len() as usize, read_file, read_blob)?; + let content_change = diff.compare_blobs(entry, metadata.len(), fetch_data, &mut self.buf2)?; // This file is racy clean! Set the size to 0 so we keep detecting this as the file is updated. - if content_change.is_some() && racy_clean { - entry.stat.size = 0; - } if content_change.is_some() || executable_bit_changed { - Ok(Some(Change::Modification { - executable_bit_changed, - content_change, - })) + let set_entry_stat_size_zero = content_change.is_some() && racy_clean; + Ok(Some( + Change::Modification { + executable_bit_changed, + content_change, + set_entry_stat_size_zero, + } + .into(), + )) } else { - // don't diff against this file next time since we know the file is unchanged. - entry.stat = new_stat; - Ok(None) + self.entries_to_update.fetch_add(1, Ordering::Relaxed); + Ok(Some(EntryStatus::NeedsUpdate(new_stat))) } } } struct ReduceChange<'a, 'index, T: VisitEntry<'index>> { collector: &'a mut T, - phantom: PhantomData, + entries: &'index [gix_index::Entry], } -impl<'index, T, C: VisitEntry<'index, ContentChange = T>> Reduce for ReduceChange<'_, 'index, C> { - type Input = Vec>; +impl<'index, T, U, C: VisitEntry<'index, ContentChange = T, SubmoduleStatus = U>> Reduce + for ReduceChange<'_, 'index, C> +{ + type Input = Vec>; type FeedProduce = (); @@ -299,8 +466,9 @@ impl<'index, T, C: VisitEntry<'index, ContentChange = T>> Reduce for ReduceChang fn feed(&mut self, items: Self::Input) -> Result { for item in items { - let (entry, path, change, conflict) = item?; - self.collector.visit_entry(entry, path, change, conflict); + let (entry, entry_index, path, status) = item?; + self.collector + .visit_entry(self.entries, entry, entry_index, path, status); } Ok(()) } @@ -310,43 +478,156 @@ impl<'index, T, C: VisitEntry<'index, ContentChange = T>> Reduce for ReduceChang } } -struct WorktreeBlob<'a> { +struct ReadDataImpl<'a, Find, E> +where + E: std::error::Error + Send + Sync + 'static, + Find: for<'b> FnMut(&gix_hash::oid, &'b mut Vec) -> Result, E>, +{ buf: &'a mut Vec, path: &'a Path, + rela_path: &'a BStr, + file_len: u64, entry: &'a gix_index::Entry, + filter: &'a mut gix_filter::Pipeline, + attr_stack: &'a mut gix_worktree::Stack, options: &'a Options, + id: &'a gix_hash::oid, + find: Find, + worktree_bytes: &'a AtomicU64, + worktree_reads: &'a AtomicUsize, + odb_bytes: &'a AtomicU64, + odb_reads: &'a AtomicUsize, } -struct OdbBlob<'a, Find, E> +impl<'a, Find, E> traits::ReadData<'a> for ReadDataImpl<'a, Find, E> where E: std::error::Error + Send + Sync + 'static, - Find: FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E>, + Find: for<'b> FnMut(&gix_hash::oid, &'b mut Vec) -> Result, E>, { - buf: &'a mut Vec, - id: &'a gix_hash::oid, - find: Find, + fn read_blob(mut self) -> Result<&'a [u8], Error> { + (self.find)(self.id, self.buf) + .map(|b| { + self.odb_reads.fetch_add(1, Ordering::Relaxed); + self.odb_bytes.fetch_add(b.data.len() as u64, Ordering::Relaxed); + b.data + }) + .map_err(move |err| Error::Find(Box::new(err))) + } + + fn stream_worktree_file(mut self) -> Result, Error> { + self.buf.clear(); + // symlinks are only stored as actual symlinks if the FS supports it otherwise they are just + // normal files with their content equal to the linked path (so can be read normally) + // + let is_symlink = self.entry.mode == gix_index::entry::Mode::SYMLINK; + // TODO: what to do about precompose unicode and ignore_case for symlinks + let out = if is_symlink && self.options.fs.symlink { + // conversion to bstr can never fail because symlinks are only used + // on unix (by git) so no reason to use the try version here + let symlink_path = gix_path::into_bstr(std::fs::read_link(self.path)?); + self.buf.extend_from_slice(&symlink_path); + self.worktree_bytes.fetch_add(self.buf.len() as u64, Ordering::Relaxed); + Stream { + inner: ToGitOutcome::Buffer(self.buf), + bytes: None, + len: None, + } + } else { + self.buf.clear(); + let platform = self.attr_stack.at_entry(self.rela_path, Some(false), &mut self.find)?; + let file = std::fs::File::open(self.path)?; + let out = self + .filter + .convert_to_git( + file, + self.path, + &mut |_path, attrs| { + platform.matching_attributes(attrs); + }, + &mut |buf| { + (self.find)(self.id, buf) + .map(|_| Some(())) + .map_err(|err| Box::new(err) as Box) + }, + ) + .map_err(|err| io::Error::new(io::ErrorKind::Other, err))?; + let len = match out { + ToGitOutcome::Unchanged(_) => Some(self.file_len), + ToGitOutcome::Process(_) | ToGitOutcome::Buffer(_) => None, + }; + Stream { + inner: out, + bytes: Some(self.worktree_bytes), + len, + } + }; + + self.worktree_reads.fetch_add(1, Ordering::Relaxed); + Ok(out) + } +} + +struct OffsetIter<'a, T> { + inner: Chunks<'a, T>, + offset: usize, } -impl<'a> content::ReadDataOnce<'a, Error> for WorktreeBlob<'a> { - fn read_data(self) -> Result<&'a [u8], Error> { - let res = read::data_to_buf_with_meta( - self.path, - self.buf, - self.entry.mode == gix_index::entry::Mode::SYMLINK, - &self.options.fs, - )?; - Ok(res) +impl<'a, T> Iterator for OffsetIter<'a, T> { + type Item = (usize, &'a [T]); + + fn next(&mut self) -> Option { + let block = self.inner.next()?; + let offset = self.offset; + self.offset += block.len(); + Some((offset, block)) } } -impl<'a, Find, E> content::ReadDataOnce<'a, Error> for OdbBlob<'a, Find, E> -where - E: std::error::Error + Send + Sync + 'static, - Find: FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E>, -{ - fn read_data(mut self) -> Result<&'a [u8], Error> { - (self.find)(self.id, self.buf) - .map(|b| b.data) - .map_err(move |err| Error::Find(Box::new(err))) +impl Conflict { + /// Given `entries` and `path_backing`, both values obtained from an [index](gix_index::State), use `start_index` and enumerate + /// all conflict stages that still match `entry_path` to produce a conflict description. + /// Also return the amount of extra-entries that were part of the conflict declaration (not counting the entry at `start_index`) + /// + /// If for some reason entry at `start_index` isn't in conflicting state, `None` is returned. + pub fn try_from_entry( + entries: &[gix_index::Entry], + path_backing: &gix_index::PathStorageRef, + start_index: usize, + entry_path: &BStr, + ) -> Option<(Self, usize)> { + use Conflict::*; + let mut mask = None::; + + let mut count = 0_usize; + for stage in (start_index..(start_index + 3).min(entries.len())).filter_map(|idx| { + let entry = &entries[idx]; + let stage = entry.stage(); + (stage > 0 && entry.path_in(path_backing) == entry_path).then_some(stage) + }) { + // This could be `1 << (stage - 1)` but let's be specific. + *mask.get_or_insert(0) |= match stage { + 1 => 0b001, + 2 => 0b010, + 3 => 0b100, + _ => 0, + }; + count += 1; + } + + mask.map(|mask| { + ( + match mask { + 0b001 => BothDeleted, + 0b010 => AddedByUs, + 0b011 => DeletedByThem, + 0b100 => AddedByThem, + 0b101 => DeletedByUs, + 0b110 => BothAdded, + 0b111 => BothModified, + _ => unreachable!("BUG: bitshifts and typical entry layout doesn't allow for more"), + }, + count - 1, + ) + }) } } diff --git a/gix-status/src/index_as_worktree/mod.rs b/gix-status/src/index_as_worktree/mod.rs index 8294a54e8ac..412068875d0 100644 --- a/gix-status/src/index_as_worktree/mod.rs +++ b/gix-status/src/index_as_worktree/mod.rs @@ -1,11 +1,11 @@ //! Changes between an index and a worktree. /// mod types; -pub use types::{Change, Error, Options, VisitEntry}; +pub use types::{Change, Conflict, EntryStatus, Error, Options, Outcome, VisitEntry}; mod recorder; -pub use recorder::Recorder; +pub use recorder::{Record, Recorder}; -/// -pub mod content; pub(crate) mod function; +/// +pub mod traits; diff --git a/gix-status/src/index_as_worktree/recorder.rs b/gix-status/src/index_as_worktree/recorder.rs index 48beb25a313..0cf1aa6f367 100644 --- a/gix-status/src/index_as_worktree/recorder.rs +++ b/gix-status/src/index_as_worktree/recorder.rs @@ -1,27 +1,47 @@ use bstr::BStr; use gix_index as index; -use crate::index_as_worktree::{Change, VisitEntry}; +use crate::index_as_worktree::{EntryStatus, VisitEntry}; + +/// A record of a change. +/// +/// It's created either if there is a conflict or a change, or both. +#[derive(Debug)] +pub struct Record<'index, T, U> { + /// The index entry that is changed. + pub entry: &'index index::Entry, + /// The index of the `entry` relative to all entries in the input index. + pub entry_index: usize, + /// The path to the entry. + pub relative_path: &'index BStr, + /// The status information itself. + pub status: EntryStatus, +} /// Convenience implementation of [`VisitEntry`] that collects all non-trivial changes into a `Vec`. #[derive(Debug, Default)] -pub struct Recorder<'index, T = ()> { +pub struct Recorder<'index, T = (), U = ()> { /// collected changes, index entries without conflicts or changes are excluded. - pub records: Vec<(&'index BStr, Option>, bool)>, + pub records: Vec>, } -impl<'index, T: Send> VisitEntry<'index> for Recorder<'index, T> { +impl<'index, T: Send, U: Send> VisitEntry<'index> for Recorder<'index, T, U> { type ContentChange = T; + type SubmoduleStatus = U; fn visit_entry( &mut self, - _entry: &'index index::Entry, - rela_path: &'index BStr, - status: Option>, - conflict: bool, + _entries: &'index [index::Entry], + entry: &'index index::Entry, + entry_index: usize, + relative_path: &'index BStr, + status: EntryStatus, ) { - if conflict || status.is_some() { - self.records.push((rela_path, status, conflict)) - } + self.records.push(Record { + entry, + entry_index, + relative_path, + status, + }) } } diff --git a/gix-status/src/index_as_worktree/traits.rs b/gix-status/src/index_as_worktree/traits.rs new file mode 100644 index 00000000000..958675dfc5d --- /dev/null +++ b/gix-status/src/index_as_worktree/traits.rs @@ -0,0 +1,168 @@ +use crate::index_as_worktree::Error; +use bstr::BStr; +use gix_hash::ObjectId; +use gix_index as index; +use index::Entry; +use std::io::Read; +use std::sync::atomic::AtomicBool; + +/// Compares the content of two blobs in some way. +pub trait CompareBlobs { + /// Output data produced by [`compare_blobs()`][CompareBlobs::compare_blobs()]. + type Output; + + /// Providing the underlying index `entry`, allow comparing a file in the worktree of size `worktree_blob_size` + /// and allow streaming its bytes using `data`. + /// If this function returns `None` the `entry` and the worktree blob are assumed to be identical. + /// Use `data` to obtain the data for the blob referred to by `entry`, allowing comparisons of the data itself. + /// `buf` can be used to store additional data, and it can be assumed to be a cleared buffer. + fn compare_blobs<'a, 'b>( + &mut self, + entry: &gix_index::Entry, + worktree_blob_size: u64, + data: impl ReadData<'a>, + buf: &mut Vec, + ) -> Result, Error>; +} + +/// Determine the status of a submodule, which always indicates that it changed if present. +pub trait SubmoduleStatus { + /// The status result, describing in which way the submodule changed. + type Output; + /// A custom error that may occur while computing the submodule status. + type Error: std::error::Error + Send + Sync + 'static; + + /// Compute the status of the submodule at `entry` and `rela_path`, or return `None` if no change was detected. + fn status(&mut self, entry: &gix_index::Entry, rela_path: &BStr) -> Result, Self::Error>; +} + +/// Lazy borrowed access to worktree or blob data, with streaming support for worktree files. +pub trait ReadData<'a> { + /// Returns the contents of this blob. + /// + /// This potentially performs IO and other expensive operations + /// and should only be called when necessary. + fn read_blob(self) -> Result<&'a [u8], Error>; + + /// Stream a worktree file in such a manner that its content matches what would be put into git. + fn stream_worktree_file(self) -> Result, Error>; +} + +/// +pub mod read_data { + use gix_filter::pipeline::convert::ToGitOutcome; + use std::sync::atomic::{AtomicU64, Ordering}; + + /// A stream with worktree file data. + pub struct Stream<'a> { + pub(crate) inner: ToGitOutcome<'a, std::fs::File>, + pub(crate) bytes: Option<&'a AtomicU64>, + pub(crate) len: Option, + } + + impl<'a> Stream<'a> { + /// Return the underlying byte-buffer if there is one. + /// + /// If `None`, read from this instance like a stream. + /// Note that this method should only be called once to assure proper accounting of the amount of bytes read. + pub fn as_bytes(&self) -> Option<&'a [u8]> { + self.inner.as_bytes().map(|v| { + if let Some(bytes) = self.bytes { + bytes.fetch_add(v.len() as u64, Ordering::Relaxed); + } + v + }) + } + + /// Return the size of the stream in bytes if it is known in advance. + pub fn size(&self) -> Option { + self.len + } + } + + impl<'a> std::io::Read for Stream<'a> { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let n = self.inner.read(buf)?; + if let Some(bytes) = self.bytes { + bytes.fetch_add(n as u64, Ordering::Relaxed); + } + Ok(n) + } + } +} + +/// Compares to blobs by comparing their size and oid, and only looks at the file if +/// the size matches, therefore it's very fast. +#[derive(Clone)] +pub struct FastEq; + +impl CompareBlobs for FastEq { + type Output = (); + + // TODO: make all streaming IOPs interruptible. + fn compare_blobs<'a, 'b>( + &mut self, + entry: &Entry, + worktree_file_size: u64, + data: impl ReadData<'a>, + buf: &mut Vec, + ) -> Result, Error> { + // make sure to account for racily smudged entries here so that they don't always keep + // showing up as modified even after their contents have changed again, to a potentially + // unmodified state. That means that we want to ignore stat.size == 0 for non_empty_blobs. + if entry.stat.size as u64 != worktree_file_size && (entry.id.is_empty_blob() || entry.stat.size != 0) { + return Ok(Some(())); + } + HashEq + .compare_blobs(entry, worktree_file_size, data, buf) + .map(|opt| opt.map(|_| ())) + } +} + +/// Compares files to blobs by *always* comparing their hashes. +/// +/// Same as [`FastEq`] but does not contain a fast path for files with mismatched files and +/// therefore always returns an OID that can be reused later. +#[derive(Clone)] +pub struct HashEq; + +impl CompareBlobs for HashEq { + type Output = ObjectId; + + fn compare_blobs<'a, 'b>( + &mut self, + entry: &Entry, + _worktree_blob_size: u64, + data: impl ReadData<'a>, + buf: &mut Vec, + ) -> Result, Error> { + let mut stream = data.stream_worktree_file()?; + match stream.as_bytes() { + Some(buffer) => { + let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, buffer); + Ok((entry.id != file_hash).then_some(file_hash)) + } + None => { + let file_hash = match stream.size() { + None => { + stream.read_to_end(buf)?; + gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, buf) + } + Some(len) => { + let header = gix_object::encode::loose_header(gix_object::Kind::Blob, len); + let mut hasher = gix_features::hash::hasher(entry.id.kind()); + hasher.update(&header); + gix_features::hash::bytes_with_hasher( + &mut stream, + len, + hasher, + &mut gix_features::progress::Discard, + &AtomicBool::default(), + )? + } + }; + Ok((entry.id != file_hash).then_some(file_hash)) + } + } + } +} diff --git a/gix-status/src/index_as_worktree/types.rs b/gix-status/src/index_as_worktree/types.rs index 35cf45401b1..daad11f27ad 100644 --- a/gix-status/src/index_as_worktree/types.rs +++ b/gix-status/src/index_as_worktree/types.rs @@ -1,6 +1,6 @@ -use bstr::BStr; +use bstr::{BStr, BString}; -/// The error returned by [`status()`](crate::index_as_worktree()). +/// The error returned by [index_as_worktree()`](crate::index_as_worktree()). #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { @@ -12,10 +12,15 @@ pub enum Error { Io(#[from] std::io::Error), #[error("Failed to obtain blob from object database")] Find(#[source] Box), + #[error("Could not determine status for submodule at '{rela_path}'")] + SubmoduleStatus { + rela_path: BString, + source: Box, + }, } -#[derive(Clone, Debug, Default)] /// Options that control how the index status with a worktree is computed. +#[derive(Clone, Default)] pub struct Options { /// Capabilities of the file system which affect the status computation. pub fs: gix_fs::Capabilities, @@ -25,11 +30,59 @@ pub struct Options { pub thread_limit: Option, /// Options that control how stat comparisons are made when checking if a file is fresh. pub stat: gix_index::entry::stat::Options, + /// Pre-configured state to allow processing attributes. + /// + /// These are needed to potentially refresh the index with data read from the worktree, which needs to be converted back + /// to the form stored in git. + pub attributes: gix_worktree::stack::state::Attributes, +} + +/// Provide additional information collected during the runtime of [`index_as_worktree()`](crate::index_as_worktree()). +#[derive(Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd)] +pub struct Outcome { + /// The total amount of entries that is to be processed. + pub entries_to_process: usize, + /// The amount of entries we actually processed. If this isn't the entire set, the operation was interrupted. + pub entries_processed: usize, + /// The amount of entries we didn't even traverse (and thus update with stat) due to a common prefix in pathspecs. + /// This is similar to the current working directory. + pub entries_skipped_by_common_prefix: usize, + /// The amount of entries that were skipped due to exclusion by *pathspecs*. + pub entries_skipped_by_pathspec: usize, + /// The amount of entries that were skipped as the entry flag indicated this. + pub entries_skipped_by_entry_flags: usize, + /// The amount of times we queried symlink-metadata for a file on disk. + pub symlink_metadata_calls: usize, + /// The amount of entries whose stats would need to be updated as its modification couldn't be determined without + /// an expensive calculation. + /// + /// With these updates, this calculation will be avoided next time the status runs. + /// Note that the stat updates are delegated to the caller. + pub entries_to_update: usize, + /// The amount of entries that were considered racy-clean - they will need thorough checking to see if they are truly clean, + /// i.e. didn't change. + pub racy_clean: usize, + + /// The amount of bytes read from the worktree in order to determine if an entry changed, across all files. + pub worktree_bytes: u64, + /// The amount of files read in full from the worktree (and into memory). + pub worktree_files_read: usize, + /// The amount of bytes read from the object database in order to determine if an entry changed, across all objects. + pub odb_bytes: u64, + /// The amount of objects read from the object database. + pub odb_objects_read: usize, +} + +impl Outcome { + /// The total amount of skipped entries, i.e. those that weren't processed at all. + pub fn skipped(&self) -> usize { + self.entries_skipped_by_common_prefix + self.entries_skipped_by_pathspec + self.entries_skipped_by_entry_flags + } } /// How an index entry needs to be changed to obtain the destination worktree state, i.e. `entry.apply(this_change) == worktree-entry`. -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] -pub enum Change { +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub enum Change { /// This corresponding file does not exist in the worktree anymore. Removed, /// The type of file changed compared to the worktree, i.e. a symlink s now a file. @@ -40,30 +93,92 @@ pub enum Change { /// Indicates that one of the stat changes was an executable bit change /// which is a significant change itself. executable_bit_changed: bool, - /// The output of the [`CompareBlobs`](crate::index_as_worktree::content::CompareBlobs) run on this entry. + /// The output of the [`CompareBlobs`](crate::index_as_worktree::traits::CompareBlobs) run on this entry. /// If there is no content change and only the executable bit - /// changed than this is `None`. + /// changed then this is `None`. content_change: Option, + /// If true, the caller is expected to set [entry.stat.size = 0](gix_index::entry::Stat::size) to assure this + /// otherwise racily clean entry can still be detected as dirty next time this is called, but this time without + /// reading it from disk to hash it. It's a performance optimization and not doing so won't change the correctness + /// of the operation. + set_entry_stat_size_zero: bool, }, - /// An index entry that correspond to an untracked worktree file marked with `git add --intent-to-add`. + /// A submodule is initialized and checked out, and there was modification to either: /// - /// This means it's not available in the object database yet or the index was created from, - /// even though now an entry exists that represents the worktree file. + /// * the `HEAD` as compared to the superproject's desired commit for `HEAD` + /// * the worktree has at least one modified file + /// * there is at least one untracked file + /// + /// The exact nature of the modification is handled by the caller which may retain information per submodule or + /// re-compute details as needed when seeing this variant. + SubmoduleModification(U), +} + +/// Information about an entry. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub enum EntryStatus { + /// The entry is in a conflicting state, and we didn't collect any more information about it. + Conflict(Conflict), + /// There is no conflict and a change was discovered. + Change(Change), + /// The entry didn't change, but its state caused extra work that can be avoided next time if its stats would be updated to the + /// given stat. + NeedsUpdate( + /// The new stats which represent what's currently in the working tree. If these replace the current stats in the entry, + /// next time this operation runs we can determine the actual state much faster. + gix_index::entry::Stat, + ), + /// An index entry that corresponds to an untracked worktree file marked with `git add --intent-to-add`. + /// + /// This means it's not available in the object database yet even though now an entry exists that represents the worktree file. + /// The entry represents the promise of adding a new file, no matter the actual stat or content. + /// Effectively this means nothing changed. + /// This also means the file is still present, and that no detailed change checks were performed. IntentToAdd, } -/// Observe changes by comparing an index entry to the worktree or another index. +impl From> for EntryStatus { + fn from(value: Change) -> Self { + EntryStatus::Change(value) + } +} + +/// Describes a conflicting entry as comparison between 'our' version and 'their' version of it. +/// +/// If one side isn't specified, it is assumed to have modified the entry. In general, there would be no conflict +/// if both parties ended up in the same state. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub enum Conflict { + /// Both deleted a different version of the entry. + BothDeleted, + /// We added, they modified, ending up in different states. + AddedByUs, + /// They deleted the entry, we modified it. + DeletedByThem, + /// They added the entry, we modified it, ending up in different states. + AddedByThem, + /// We deleted the entry, they modified it, ending up in different states. + DeletedByUs, + /// Both added the entry in different states. + BothAdded, + /// Both modified the entry, ending up in different states. + BothModified, +} + +/// Observe the status of an entry by comparing an index entry to the worktree. pub trait VisitEntry<'index> { /// Data generated by comparing an entry with a file. type ContentChange; - /// Observe the `change` of `entry` at the repository-relative `rela_path`, indicating whether - /// or not it has a `conflict`. - /// If `change` is `None`, there is no change. + /// Data obtained when checking the submodule status. + type SubmoduleStatus; + /// Observe the `status` of `entry` at the repository-relative `rela_path` at `entry_index` + /// (for accessing `entry` and surrounding in the complete list of `entries`). fn visit_entry( &mut self, + entries: &'index [gix_index::Entry], entry: &'index gix_index::Entry, + entry_index: usize, rela_path: &'index BStr, - change: Option>, - conflict: bool, + status: EntryStatus, ); } diff --git a/gix-status/src/lib.rs b/gix-status/src/lib.rs index 225668ced78..28c20145d4a 100644 --- a/gix-status/src/lib.rs +++ b/gix-status/src/lib.rs @@ -8,12 +8,9 @@ //! While also being able to check check if the working tree is dirty, quickly. #![deny(missing_docs, rust_2018_idioms, unsafe_code)] -/// -pub mod read; +use bstr::BStr; pub mod index_as_worktree; - -use bstr::BStr; pub use index_as_worktree::function::index_as_worktree; /// A trait to facilitate working working with pathspecs. @@ -31,3 +28,13 @@ pub trait Pathspec { /// `is_dir` is `true` if `relative_path` is a directory. fn is_included(&mut self, relative_path: &BStr, is_dir: Option) -> bool; } + +/// A stack that validates we are not going through a symlink in a way that is read-only. +/// +/// It can efficiently validate paths when these are queried in sort-order, which leads to each component +/// to only be checked once. +pub struct SymlinkCheck { + inner: gix_fs::Stack, +} + +mod stack; diff --git a/gix-status/src/read.rs b/gix-status/src/read.rs deleted file mode 100644 index fd336817525..00000000000 --- a/gix-status/src/read.rs +++ /dev/null @@ -1,63 +0,0 @@ -//! This module allows creating git blobs from worktree files. -//! -//! For the most part a blob just contains the raw on-disk data. However symlinks need to be considered properly -//! and attributes/config options need to be considered. - -use std::{ - fs::{read_link, File}, - io::{self, Read}, - path::Path, -}; - -use gix_object::Blob; - -// TODO: tests - -// TODO: what to do about precompose unicode and ignore_case for symlinks - -/// Create a blob from a file or symlink. -pub fn blob(path: &Path, capabilities: &gix_fs::Capabilities) -> io::Result { - let mut data = Vec::new(); - data_to_buf(path, &mut data, capabilities)?; - Ok(Blob { data }) -} - -/// Create a blob from a file or symlink. -pub fn blob_with_meta(path: &Path, is_symlink: bool, capabilities: &gix_fs::Capabilities) -> io::Result { - let mut data = Vec::new(); - data_to_buf_with_meta(path, &mut data, is_symlink, capabilities)?; - Ok(Blob { data }) -} - -/// Create blob data from a file or symlink. -pub fn data_to_buf<'a>(path: &Path, buf: &'a mut Vec, capabilities: &gix_fs::Capabilities) -> io::Result<&'a [u8]> { - data_to_buf_with_meta(path, buf, path.symlink_metadata()?.is_symlink(), capabilities) -} - -/// Create a blob from a file or symlink. -pub fn data_to_buf_with_meta<'a>( - path: &Path, - buf: &'a mut Vec, - is_symlink: bool, - capabilities: &gix_fs::Capabilities, -) -> io::Result<&'a [u8]> { - buf.clear(); - // symlinks are only stored as actual symlinks if the FS supports it otherwise they are just - // normal files with their content equal to the linked path (so can be read normally) - // - if is_symlink && capabilities.symlink { - // conversion to bstr can never fail because symlinks are only used - // on unix (by git) so no reason to use the try version here - let symlink_path = gix_path::into_bstr(read_link(path)?); - buf.extend_from_slice(&symlink_path); - // TODO: there is no reason this should be a clone - // std isn't great about allowing users to avoid allocations but we could - // simply write our own wrapper around libc::readlink which reuses the - // buffer. This would require unsafe code tough (obviously) - } else { - buf.clear(); - File::open(path)?.read_to_end(buf)?; - // TODO apply filters - } - Ok(buf.as_slice()) -} diff --git a/gix-status/src/stack.rs b/gix-status/src/stack.rs new file mode 100644 index 00000000000..3f3cd8f9797 --- /dev/null +++ b/gix-status/src/stack.rs @@ -0,0 +1,59 @@ +use crate::SymlinkCheck; +use gix_fs::Stack; +use std::path::{Path, PathBuf}; + +impl SymlinkCheck { + /// Create a new stack that starts operating at `root`. + pub fn new(root: PathBuf) -> Self { + Self { + inner: gix_fs::Stack::new(root), + } + } + + /// Return a valid filesystem path located in our root by appending `relative_path`, which is guaranteed to + /// not pass through a symbolic link. That way the caller can be sure to not be misled by an attacker that + /// tries to make us reach outside of the repository. + /// + /// Note that the file pointed to by `relative_path` may still be a symbolic link, or not exist at all, + /// and that an error may also be produced if directories on the path leading to the leaf + /// component of `relative_path` are missing. + /// + /// ### Note + /// + /// On windows, no verification is performed, instead only the combined path is provided as usual. + pub fn verified_path(&mut self, relative_path: &Path) -> std::io::Result<&Path> { + self.inner.make_relative_path_current(relative_path, &mut Delegate)?; + Ok(self.inner.current()) + } +} + +struct Delegate; + +impl gix_fs::stack::Delegate for Delegate { + fn push_directory(&mut self, _stack: &Stack) -> std::io::Result<()> { + Ok(()) + } + + fn push(&mut self, is_last_component: bool, stack: &Stack) -> std::io::Result<()> { + #[cfg(windows)] + { + Ok(()) + } + #[cfg(not(windows))] + { + if is_last_component { + return Ok(()); + } + + if stack.current().symlink_metadata()?.is_symlink() { + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + "Cannot step through symlink to perform an lstat", + )); + } + Ok(()) + } + } + + fn pop_directory(&mut self) {} +} diff --git a/gix-status/tests/fixtures/conflicts.sh b/gix-status/tests/fixtures/conflicts.sh new file mode 100644 index 00000000000..682a214cf99 --- /dev/null +++ b/gix-status/tests/fixtures/conflicts.sh @@ -0,0 +1,67 @@ +#!/bin/bash +set -eu -o pipefail + +(git init both-deleted && cd both-deleted + echo test > file + git add file && git commit -m file && + git branch alt && git mv file added-by-them + git commit -m "file renamed in added-by-them" && git checkout alt + git mv file added-by-us + git commit -m "file renamed in added-by-us" + git reset --hard alt + git merge main || : +) + +(git init deleted-by-us && cd deleted-by-us + git init + >file && git add file && git commit -m "initial" + echo change >> file && git commit -am "modify" + git checkout -b side HEAD^ + git rm file + git commit -m delete + git merge main || : +) + +(git init deleted-by-them && cd deleted-by-them + echo "This is some content." > file + git add file + git commit -m "Initial commit" + git checkout -b conflict + git rm file + git commit -m "Delete file in feature branch" + git checkout main + echo "Modified by main branch." >> file + git add file + git commit -m "Modified file in main branch" + git merge conflict || : +) + +(git init both-modified && cd both-modified + git init + > file && git add file && git commit -m "init" + + git checkout -b conflict + echo conflicting >> file && git commit -am "alt-change" + + git checkout main + echo other >> file && git commit -am "change" + + git merge conflict || : +) + +(git init both-added && cd both-added + git init + set -x + echo init >> deleted-by-them && git add . && git commit -m "init" + + git checkout -b second_branch + git rm deleted-by-them + git commit -m "deleted-by-them deleted on second_branch" + echo second > both-added && git add . && git commit -m second + + git checkout main + echo on_second > deleted-by-them && git commit -am "on second" + echo main > both-added && git add . && git commit -m main + + git merge second_branch || : +) diff --git a/gix-status/tests/fixtures/generated-archives/.gitignore b/gix-status/tests/fixtures/generated-archives/.gitignore index ee165f3a1c5..c0de66a1edd 100644 --- a/gix-status/tests/fixtures/generated-archives/.gitignore +++ b/gix-status/tests/fixtures/generated-archives/.gitignore @@ -1,2 +1,3 @@ status_unchanged.tar.xz status_changed.tar.xz +symlink_stack.tar.xz diff --git a/gix-status/tests/fixtures/generated-archives/conflicts.tar.xz b/gix-status/tests/fixtures/generated-archives/conflicts.tar.xz new file mode 100644 index 00000000000..fcbf3888bc2 --- /dev/null +++ b/gix-status/tests/fixtures/generated-archives/conflicts.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:855ef3e8f1cf518daab18a572ad9dc4a7c532a213d1b9f8a78f32f5346b518a2 +size 19036 diff --git a/gix-status/tests/fixtures/generated-archives/status_submodule.tar.xz b/gix-status/tests/fixtures/generated-archives/status_submodule.tar.xz new file mode 100644 index 00000000000..63a880d3709 --- /dev/null +++ b/gix-status/tests/fixtures/generated-archives/status_submodule.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18a83500a44dbb5ab3865281645d45f8a81d469cdc7a719c2bb93e92f2fac00e +size 16940 diff --git a/gix-status/tests/fixtures/status_submodule.sh b/gix-status/tests/fixtures/status_submodule.sh new file mode 100644 index 00000000000..a8b6634c5e0 --- /dev/null +++ b/gix-status/tests/fixtures/status_submodule.sh @@ -0,0 +1,48 @@ +#!/bin/bash +set -eu -o pipefail + +git init -q module1 +(cd module1 + touch this + git add . + git commit -q -m c1 + echo hello >> this + git commit -q -am c2 +) + +git init no-change +(cd no-change + git submodule add ../module1 m1 + git commit -m "add module 1" +) + +cp -R no-change deleted-dir +(cd deleted-dir + rm -Rf m1 +) + +cp -R no-change type-change +(cd type-change + rm -Rf m1 + touch m1 +) + +cp -R no-change empty-dir-no-change +(cd empty-dir-no-change + rm -Rf m1 + mkdir m1 +) + +cp -R no-change conflict +(cd conflict + (cd m1 + git checkout @~1 + ) + + git commit -am "change submodule head" + git checkout -b other @~1 + git rm -rf m1 + git commit -m "removed submodule" + + git merge main || : +) diff --git a/gix-status/tests/fixtures/symlink_stack.sh b/gix-status/tests/fixtures/symlink_stack.sh new file mode 100755 index 00000000000..479460c00a9 --- /dev/null +++ b/gix-status/tests/fixtures/symlink_stack.sh @@ -0,0 +1,20 @@ +#!/bin/bash +set -eu -o pipefail + +mkdir base; +(cd base + touch file + mkdir dir + touch dir/file-in-dir + (cd dir + ln -s file-in-dir filelink + mkdir subdir + ln -s subdir dirlink + ) + + ln -s file root-filelink + ln -s dir root-dirlink +) + +ln -s base symlink-base + diff --git a/gix-status/tests/stack/mod.rs b/gix-status/tests/stack/mod.rs new file mode 100644 index 00000000000..edc4d52028a --- /dev/null +++ b/gix-status/tests/stack/mod.rs @@ -0,0 +1,107 @@ +fn stack() -> gix_status::SymlinkCheck { + stack_in("base") +} + +fn stack_in(dir: &str) -> gix_status::SymlinkCheck { + gix_status::SymlinkCheck::new( + gix_testtools::scripted_fixture_read_only_standalone("symlink_stack.sh") + .expect("valid script") + .join(dir), + ) +} + +#[test] +fn paths_not_going_through_symlink_directories_are_ok_and_point_to_correct_item() -> crate::Result { + for root in ["base", "symlink-base"] { + let mut stack = stack_in(root); + for (rela_path, expectation) in [ + ("root-filelink", is_symlink as fn(&std::fs::Metadata) -> bool), + ("root-dirlink", is_symlinked_dir), + ("file", is_file), + ("dir/file-in-dir", is_file), + ("dir", is_dir), + ("dir/subdir", is_dir), + ("dir/filelink", is_symlink), + ("dir/dirlink", is_symlinked_dir), + ] { + assert!( + expectation(&stack.verified_path(rela_path.as_ref())?.symlink_metadata()?), + "{rela_path:?} expectation failed" + ); + } + } + Ok(()) +} + +#[test] +fn leaf_file_does_not_have_to_exist() -> crate::Result { + assert!(!stack().verified_path("dir/does-not-exist".as_ref())?.exists()); + Ok(()) +} + +#[test] +#[cfg(not(windows))] +fn intermediate_directories_have_to_exist_or_not_found_error() -> crate::Result { + assert_eq!( + stack() + .verified_path("nonexisting-dir/file".as_ref()) + .unwrap_err() + .kind(), + std::io::ErrorKind::NotFound + ); + Ok(()) +} + +#[test] +#[cfg(windows)] +fn intermediate_directories_do_not_have_exist_for_success() -> crate::Result { + assert!(stack().verified_path("nonexisting-dir/file".as_ref()).is_ok()); + Ok(()) +} + +#[test] +#[cfg_attr( + windows, + ignore = "on windows, symlinks appear to be files or dirs, is_symlink() doesn't work" +)] +fn paths_leading_through_symlinks_are_rejected() { + let mut stack = stack(); + assert_eq!( + stack + .verified_path("root-dirlink/file-in-dir".as_ref()) + .unwrap_err() + .kind(), + std::io::ErrorKind::Other, + "root-dirlink is a symlink to a directory" + ); + + assert_eq!( + stack.verified_path("dir/dirlink/nothing".as_ref()).unwrap_err().kind(), + std::io::ErrorKind::Other, + "root-dirlink is a symlink to a directory" + ); +} + +fn is_symlink(m: &std::fs::Metadata) -> bool { + if cfg!(windows) { + // On windows, symlinks can't be seen, at least not through std + m.is_file() + } else { + m.is_symlink() + } +} + +fn is_symlinked_dir(m: &std::fs::Metadata) -> bool { + if cfg!(windows) { + // On windows, symlinks can't be seen, at least not through std + m.is_dir() + } else { + m.is_symlink() + } +} +fn is_file(m: &std::fs::Metadata) -> bool { + m.is_file() +} +fn is_dir(m: &std::fs::Metadata) -> bool { + m.is_dir() +} diff --git a/gix-status/tests/status/index_as_worktree.rs b/gix-status/tests/status/index_as_worktree.rs index 49ede98167d..49c264d9b95 100644 --- a/gix-status/tests/status/index_as_worktree.rs +++ b/gix-status/tests/status/index_as_worktree.rs @@ -1,3 +1,4 @@ +use std::sync::atomic::AtomicBool; use std::sync::{ atomic::{AtomicUsize, Ordering}, Arc, @@ -7,11 +8,13 @@ use bstr::BStr; use filetime::{set_file_mtime, FileTime}; use gix_index as index; use gix_index::Entry; +use gix_status::index_as_worktree::traits::SubmoduleStatus; +use gix_status::index_as_worktree::{Conflict, EntryStatus as WorktreeEntryStatus, Outcome, Record}; use gix_status::{ index_as_worktree, index_as_worktree::{ - content::{CompareBlobs, FastEq, ReadDataOnce}, - Change, Options, Recorder, + traits::{CompareBlobs, FastEq, ReadData}, + Change as WorktreeChange, Options, Recorder, }, }; @@ -28,26 +31,82 @@ const TEST_OPTIONS: index::entry::stat::Options = index::entry::stat::Options { use_stdev: false, }; -fn fixture(name: &str, expected_status: &[(&BStr, Option, bool)]) { +type Change = WorktreeChange<(), ()>; +type EntryStatus = WorktreeEntryStatus<(), ()>; +type Expectation<'a> = (&'a BStr, usize, EntryStatus); + +fn fixture(name: &str, expected_status: &[Expectation<'_>]) -> Outcome { fixture_filtered(name, &[], expected_status) } -fn fixture_filtered(name: &str, pathspecs: &[&str], expected_status: &[(&BStr, Option, bool)]) { - let worktree = fixture_path(name); +fn fixture_with_index( + name: &str, + prepare_index: impl FnMut(&mut gix_index::State), + expected_status: &[Expectation<'_>], +) -> Outcome { + fixture_filtered_detailed(name, "", &[], expected_status, prepare_index, false) +} + +fn submodule_fixture(name: &str, expected_status: &[Expectation<'_>]) -> Outcome { + fixture_filtered_detailed("status_submodule", name, &[], expected_status, |_| {}, false) +} + +fn conflict_fixture(name: &str, expected_status: &[Expectation<'_>]) -> Outcome { + fixture_filtered_detailed("conflicts", name, &[], expected_status, |_| {}, false) +} + +fn submodule_fixture_status(name: &str, expected_status: &[Expectation<'_>], submodule_dirty: bool) -> Outcome { + fixture_filtered_detailed("status_submodule", name, &[], expected_status, |_| {}, submodule_dirty) +} + +fn fixture_filtered(name: &str, pathspecs: &[&str], expected_status: &[Expectation<'_>]) -> Outcome { + fixture_filtered_detailed(name, "", pathspecs, expected_status, |_| {}, false) +} + +fn fixture_filtered_detailed( + name: &str, + subdir: &str, + pathspecs: &[&str], + expected_status: &[Expectation<'_>], + mut prepare_index: impl FnMut(&mut gix_index::State), + submodule_dirty: bool, +) -> Outcome { + // This can easily happen in some fixtures, which can cause flakyness. It's time-dependent after all. + fn ignore_racyclean(mut out: Outcome) -> Outcome { + out.racy_clean = 0; + out + } + + fn ignore_updated(mut out: Outcome) -> Outcome { + out.entries_to_update = 0; + out + } + + fn ignore_worktree_stats(mut out: Outcome) -> Outcome { + out.worktree_bytes = 0; + out.worktree_files_read = 0; + out + } + + let worktree = fixture_path(name).join(subdir); let git_dir = worktree.join(".git"); let mut index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, false, Default::default()).unwrap(); + prepare_index(&mut index); let mut recorder = Recorder::default(); let search = gix_pathspec::Search::from_specs(to_pathspecs(pathspecs), None, std::path::Path::new("")) .expect("valid specs can be normalized"); - index_as_worktree( - &mut index, + let outcome = index_as_worktree( + &index, &worktree, &mut recorder, FastEq, + SubmoduleStatusMock { dirty: submodule_dirty }, |_, _| Ok::<_, std::convert::Infallible>(gix_object::BlobRef { data: &[] }), &mut gix_features::progress::Discard, Pathspec(search), + Default::default(), + &AtomicBool::default(), Options { fs: gix_fs::Capabilities::probe(&git_dir), stat: TEST_OPTIONS, @@ -55,8 +114,54 @@ fn fixture_filtered(name: &str, pathspecs: &[&str], expected_status: &[(&BStr, O }, ) .unwrap(); - recorder.records.sort_unstable_by_key(|(name, _, _)| *name); - assert_eq!(recorder.records, expected_status) + recorder.records.sort_unstable_by_key(|r| r.relative_path); + assert_eq!(records_to_tuple(recorder.records), expected_status); + ignore_racyclean(ignore_updated(ignore_worktree_stats(outcome))) +} + +/// Note that we also reset certain information to assure there is no flakyness - everything regarding race-detection otherwise can cause failures. +fn records_to_tuple<'index>(records: impl IntoIterator>) -> Vec> { + records + .into_iter() + .filter_map(|r| deracify_status(r.status).map(|status| (r.relative_path, r.entry_index, status))) + .collect() +} + +fn deracify_status(status: EntryStatus) -> Option { + Some(match status { + EntryStatus::Conflict(c) => EntryStatus::Conflict(c), + EntryStatus::Change(c) => match c { + Change::Removed => Change::Removed, + Change::Type => Change::Type, + Change::Modification { + executable_bit_changed, + content_change, + set_entry_stat_size_zero: _, + } => Change::Modification { + executable_bit_changed, + content_change, + set_entry_stat_size_zero: false, + }, + Change::SubmoduleModification(c) => Change::SubmoduleModification(c), + } + .into(), + EntryStatus::NeedsUpdate(_) => return None, + EntryStatus::IntentToAdd => EntryStatus::IntentToAdd, + }) +} + +#[derive(Clone)] +struct SubmoduleStatusMock { + dirty: bool, +} + +impl SubmoduleStatus for SubmoduleStatusMock { + type Output = (); + type Error = std::convert::Infallible; + + fn status(&mut self, _entry: &Entry, _rela_path: &BStr) -> Result, Self::Error> { + Ok(self.dirty.then_some(())) + } } fn to_pathspecs(input: &[&str]) -> Vec { @@ -66,48 +171,228 @@ fn to_pathspecs(input: &[&str]) -> Vec { .collect() } +fn status_removed() -> EntryStatus { + Change::Removed.into() +} + #[test] fn removed() { - fixture( + let out = fixture( "status_removed", &[ - (BStr::new(b"dir/content"), Some(Change::Removed), false), - (BStr::new(b"dir/sub-dir/symlink"), Some(Change::Removed), false), - (BStr::new(b"empty"), Some(Change::Removed), false), - (BStr::new(b"executable"), Some(Change::Removed), false), + (BStr::new(b"dir/content"), 0, status_removed()), + (BStr::new(b"dir/sub-dir/symlink"), 1, status_removed()), + (BStr::new(b"empty"), 2, status_removed()), + (BStr::new(b"executable"), 3, status_removed()), ], ); + assert_eq!( + out, + Outcome { + entries_to_process: 4, + entries_processed: 4, + symlink_metadata_calls: 4, + ..Default::default() + } + ); - fixture_filtered( + let out = fixture_filtered( "status_removed", &["dir"], &[ - (BStr::new(b"dir/content"), Some(Change::Removed), false), - (BStr::new(b"dir/sub-dir/symlink"), Some(Change::Removed), false), + (BStr::new(b"dir/content"), 0, status_removed()), + (BStr::new(b"dir/sub-dir/symlink"), 1, status_removed()), ], ); + assert_eq!( + out, + Outcome { + entries_to_process: 2, + entries_processed: 2, + entries_skipped_by_common_prefix: 2, + symlink_metadata_calls: 2, + ..Default::default() + } + ); +} + +#[test] +fn subomdule_nochange() { + assert_eq!( + submodule_fixture("no-change", &[]), + Outcome { + entries_to_process: 2, + entries_processed: 2, + symlink_metadata_calls: 2, + ..Default::default() + } + ); +} + +#[test] +fn subomdule_deleted_dir() { + assert_eq!( + submodule_fixture("deleted-dir", &[(BStr::new(b"m1"), 1, status_removed())]), + Outcome { + entries_to_process: 2, + entries_processed: 2, + symlink_metadata_calls: 2, + ..Default::default() + } + ); +} + +#[test] +fn subomdule_typechange() { + assert_eq!( + submodule_fixture("type-change", &[(BStr::new(b"m1"), 1, Change::Type.into())]), + Outcome { + entries_to_process: 2, + entries_processed: 2, + symlink_metadata_calls: 2, + ..Default::default() + } + ) +} + +#[test] +fn subomdule_empty_dir_no_change() { + assert_eq!( + submodule_fixture("empty-dir-no-change", &[]), + Outcome { + entries_to_process: 2, + entries_processed: 2, + symlink_metadata_calls: 2, + ..Default::default() + } + ); +} + +#[test] +fn subomdule_empty_dir_no_change_is_passed_to_submodule_handler() { + assert_eq!( + submodule_fixture_status( + "empty-dir-no-change", + &[(BStr::new(b"m1"), 1, Change::SubmoduleModification(()).into())], + true, + ), + Outcome { + entries_to_process: 2, + entries_processed: 2, + symlink_metadata_calls: 2, + ..Default::default() + } + ); } #[test] fn intent_to_add() { - fixture( - "status_intent_to_add", - &[(BStr::new(b"content"), Some(Change::IntentToAdd), false)], + assert_eq!( + fixture( + "status_intent_to_add", + &[(BStr::new(b"content"), 0, EntryStatus::IntentToAdd)], + ), + Outcome { + entries_to_process: 1, + entries_processed: 1, + symlink_metadata_calls: 1, + ..Default::default() + } ); } #[test] fn conflict() { - fixture( - "status_conflict", - &[( - BStr::new(b"content"), - Some(Change::Modification { - executable_bit_changed: false, - content_change: Some(()), - }), - true, - )], + assert_eq!( + fixture( + "status_conflict", + &[(BStr::new(b"content"), 0, EntryStatus::Conflict(Conflict::BothModified))], + ), + Outcome { + entries_to_process: 3, + entries_processed: 1, + ..Default::default() + }, + "2 entries were just related to the conflict, which we don't count as processed then" + ); +} + +#[test] +fn conflict_both_deleted_and_added_by_them_and_added_by_us() { + use Conflict::*; + assert_eq!( + conflict_fixture( + "both-deleted", + &[ + (BStr::new(b"added-by-them"), 0, EntryStatus::Conflict(AddedByThem)), + (BStr::new(b"added-by-us"), 1, EntryStatus::Conflict(AddedByUs)), + (BStr::new(b"file"), 2, EntryStatus::Conflict(BothDeleted)), + ], + ), + Outcome { + entries_to_process: 3, + entries_processed: 3, + ..Default::default() + }, + ); +} + +#[test] +fn conflict_both_added_and_deleted_by_them() { + use Conflict::*; + assert_eq!( + conflict_fixture( + "both-added", + &[ + (BStr::new(b"both-added"), 0, EntryStatus::Conflict(BothAdded)), + (BStr::new(b"deleted-by-them"), 2, EntryStatus::Conflict(DeletedByThem)), + ], + ), + Outcome { + entries_to_process: 4, + entries_processed: 2, + ..Default::default() + }, + ); +} + +#[test] +fn conflict_detailed_single() { + use Conflict::*; + for (name, expected, entry_index, entries_to_process, entries_processed) in [ + ("deleted-by-them", DeletedByThem, 0, 2, 1), + ("deleted-by-us", DeletedByUs, 0, 2, 1), + ("both-modified", BothModified, 0, 3, 1), + ] { + assert_eq!( + conflict_fixture( + name, + &[(BStr::new(b"file"), entry_index, EntryStatus::Conflict(expected))], + ), + Outcome { + entries_to_process, + entries_processed, + ..Default::default() + }, + "{name}" + ); + } +} + +#[test] +fn submodule_conflict() { + assert_eq!( + submodule_fixture( + "conflict", + &[(BStr::new(b"m1"), 1, EntryStatus::Conflict(Conflict::DeletedByUs))] + ), + Outcome { + entries_to_process: 3, + entries_processed: 2, + symlink_metadata_calls: 1, + ..Default::default() + }, + "1 metadata call for .gitmodules, conflicting entries are not queried for status anymore." ); } @@ -117,41 +402,168 @@ fn unchanged() { } #[test] -#[cfg_attr( - windows, - ignore = "needs work, on windows plenty of additional files are considered modified for some reason" -)] +fn refresh() { + let expected_outcome = Outcome { + entries_to_process: 5, + entries_processed: 5, + symlink_metadata_calls: 5, + ..Default::default() + }; + assert_eq!( + fixture_with_index( + "status_changed", + |index| { index.entries_mut().iter_mut().for_each(|e| e.stat = Default::default()) }, + #[cfg(not(windows))] + &[ + ( + BStr::new(b"dir/content"), + 0, + Change::Modification { + executable_bit_changed: true, + content_change: None, + set_entry_stat_size_zero: false + } + .into(), + ), + ( + BStr::new(b"dir/content2"), + 1, + Change::Modification { + executable_bit_changed: false, + content_change: Some(()), + set_entry_stat_size_zero: false + } + .into(), + ), + (BStr::new(b"empty"), 3, Change::Type.into()), + ( + BStr::new(b"executable"), + 4, + Change::Modification { + executable_bit_changed: true, + content_change: Some(()), + set_entry_stat_size_zero: false + } + .into(), + ), + ], + #[cfg(windows)] + &[ + ( + BStr::new("dir/content2"), + 1, + Change::Modification { + executable_bit_changed: false, + content_change: Some(()), + set_entry_stat_size_zero: false + } + .into(), + ), + ( + BStr::new("empty"), + 3, + Change::Modification { + executable_bit_changed: false, + content_change: Some(()), + set_entry_stat_size_zero: false + } + .into(), + ), + ( + BStr::new("executable"), + 4, + Change::Modification { + executable_bit_changed: false, + content_change: Some(()), + set_entry_stat_size_zero: false + } + .into(), + ) + ], + ), + expected_outcome, + ); +} + +#[test] fn modified() { - fixture( + let expected_outcome = Outcome { + entries_to_process: 5, + entries_processed: 5, + symlink_metadata_calls: 5, + ..Default::default() + }; + let actual_outcome = fixture( "status_changed", + #[cfg(not(windows))] &[ ( BStr::new(b"dir/content"), - Some(Change::Modification { + 0, + Change::Modification { executable_bit_changed: true, content_change: None, - }), - false, + set_entry_stat_size_zero: false, + } + .into(), ), ( BStr::new(b"dir/content2"), - Some(Change::Modification { + 1, + Change::Modification { executable_bit_changed: false, content_change: Some(()), - }), - false, + set_entry_stat_size_zero: false, + } + .into(), ), - (BStr::new(b"empty"), Some(Change::Type), false), + (BStr::new(b"empty"), 3, Change::Type.into()), ( BStr::new(b"executable"), - Some(Change::Modification { + 4, + Change::Modification { executable_bit_changed: true, content_change: Some(()), - }), - false, + set_entry_stat_size_zero: false, + } + .into(), + ), + ], + #[cfg(windows)] + &[ + ( + BStr::new("dir/content2"), + 1, + Change::Modification { + executable_bit_changed: false, + content_change: Some(()), + set_entry_stat_size_zero: false, + } + .into(), + ), + ( + BStr::new("empty"), + 3, + Change::Modification { + executable_bit_changed: false, + content_change: Some(()), + set_entry_stat_size_zero: false, + } + .into(), + ), + ( + BStr::new("executable"), + 4, + Change::Modification { + executable_bit_changed: false, + content_change: Some(()), + set_entry_stat_size_zero: false, + } + .into(), ), ], ); + assert_eq!(actual_outcome, expected_outcome,); } #[test] @@ -172,16 +584,15 @@ fn racy_git() { impl CompareBlobs for CountCalls { type Output = (); - fn compare_blobs<'a, E>( + fn compare_blobs<'a, 'b>( &mut self, - entry: &'a Entry, - worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a, E>, - entry_blob: impl ReadDataOnce<'a, E>, - ) -> Result, E> { + entry: &Entry, + worktree_file_size: u64, + data: impl ReadData<'a>, + buf: &mut Vec, + ) -> Result, gix_status::index_as_worktree::Error> { self.0.fetch_add(1, Ordering::Relaxed); - self.1 - .compare_blobs(entry, worktree_blob_size, worktree_blob, entry_blob) + self.1.compare_blobs(entry, worktree_file_size, data, buf) } } @@ -198,14 +609,17 @@ fn racy_git() { let count = Arc::new(AtomicUsize::new(0)); let counter = CountCalls(count.clone(), FastEq); - index_as_worktree( - &mut index, + let out = index_as_worktree( + &index, worktree, &mut recorder, counter.clone(), + SubmoduleStatusMock { dirty: false }, |_, _| Err(std::io::Error::new(std::io::ErrorKind::Other, "no odb access expected")), &mut gix_features::progress::Discard, Pathspec::default(), + Default::default(), + &AtomicBool::default(), Options { fs, stat: TEST_OPTIONS, @@ -213,22 +627,38 @@ fn racy_git() { }, ) .unwrap(); + assert_eq!( + out, + Outcome { + entries_to_process: 1, + entries_processed: 1, + symlink_metadata_calls: 1, + ..Default::default() + } + ); assert_eq!(count.load(Ordering::Relaxed), 0, "no blob content is accessed"); - assert_eq!(recorder.records, &[], "the testcase triggers racy git"); + assert_eq!( + records_to_tuple(recorder.records), + &[], + "the testcase triggers racy git" + ); // Now we also backdate the index timestamp to match the artificially created // mtime above this is now a realistic realworld race-condition which should trigger racy git // and cause proper output. index.set_timestamp(FileTime::from_unix_time(timestamp as i64, 0)); let mut recorder = Recorder::default(); - index_as_worktree( - &mut index, + let out = index_as_worktree( + &index, worktree, &mut recorder, counter, + SubmoduleStatusMock { dirty: false }, |_, _| Err(std::io::Error::new(std::io::ErrorKind::Other, "no odb access expected")), &mut gix_features::progress::Discard, Pathspec::default(), + Default::default(), + &AtomicBool::default(), Options { fs, stat: TEST_OPTIONS, @@ -236,20 +666,34 @@ fn racy_git() { }, ) .unwrap(); + assert_eq!( + out, + Outcome { + entries_to_process: 1, + entries_processed: 1, + symlink_metadata_calls: 1, + racy_clean: 1, + worktree_bytes: 3, + worktree_files_read: 1, + ..Default::default() + } + ); assert_eq!( count.load(Ordering::Relaxed), 1, "no we needed to access the blob content" ); assert_eq!( - recorder.records, + records_to_tuple(recorder.records), &[( BStr::new(b"content"), - Some(Change::Modification { + 0, + Change::Modification { executable_bit_changed: false, content_change: Some(()), - }), - false + set_entry_stat_size_zero: false + } + .into(), )], "racy change is correctly detected" ); diff --git a/gix-status/tests/worktree.rs b/gix-status/tests/worktree.rs index 05bcf94ca2b..8508f38a9c4 100644 --- a/gix-status/tests/worktree.rs +++ b/gix-status/tests/worktree.rs @@ -1,2 +1,4 @@ +pub use gix_testtools::Result; +mod stack; mod status; use status::*; diff --git a/gix-worktree-state/src/checkout/function.rs b/gix-worktree-state/src/checkout/function.rs index 4e3f454ccdd..8b0ea923b55 100644 --- a/gix-worktree-state/src/checkout/function.rs +++ b/gix-worktree-state/src/checkout/function.rs @@ -54,11 +54,6 @@ where let num_files = files.counter(); let num_bytes = bytes.counter(); let dir = dir.into(); - let case = if options.fs.ignore_case { - gix_glob::pattern::Case::Fold - } else { - gix_glob::pattern::Case::Sensitive - }; let (chunk_size, thread_limit, num_threads) = gix_features::parallel::optimize_chunk_size_and_thread_limit( 100, index.entries().len().into(), @@ -66,12 +61,16 @@ where None, ); - let state = stack::State::for_checkout(options.overwrite_existing, std::mem::take(&mut options.attributes)); - let attribute_files = state.id_mappings_from_index(index, paths, case); let mut ctx = chunk::Context { buf: Vec::new(), options: (&options).into(), - path_cache: Stack::new(dir, state, case, Vec::with_capacity(512), attribute_files), + path_cache: Stack::from_state_and_ignore_case( + dir, + options.fs.ignore_case, + stack::State::for_checkout(options.overwrite_existing, std::mem::take(&mut options.attributes)), + index, + paths, + ), filters: options.filters, find, }; diff --git a/gix-worktree/src/lib.rs b/gix-worktree/src/lib.rs index 32d1d7c0e3b..b8e2e89746b 100644 --- a/gix-worktree/src/lib.rs +++ b/gix-worktree/src/lib.rs @@ -10,6 +10,7 @@ #![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] #![deny(missing_docs, rust_2018_idioms, unsafe_code)] use bstr::BString; +pub use gix_glob; /// A cache for efficiently executing operations on directories and files which are encountered in sorted order. /// That way, these operations can be re-used for subsequent invocations in the same directory. diff --git a/gix-worktree/src/stack/delegate.rs b/gix-worktree/src/stack/delegate.rs index 28d8ecf348d..8e48057dea7 100644 --- a/gix-worktree/src/stack/delegate.rs +++ b/gix-worktree/src/stack/delegate.rs @@ -56,7 +56,7 @@ impl<'a, 'find> gix_fs::stack::Delegate for StackDelegate<'a, 'find> { }; match &mut self.state { #[cfg(feature = "attributes")] - State::CreateDirectoryAndAttributesStack { attributes, .. } => { + State::CreateDirectoryAndAttributesStack { attributes, .. } | State::AttributesStack(attributes) => { attributes.push_directory( stack.root(), stack.current(), @@ -89,16 +89,6 @@ impl<'a, 'find> gix_fs::stack::Delegate for StackDelegate<'a, 'find> { &mut self.statistics.ignore, )? } - #[cfg(feature = "attributes")] - State::AttributesStack(attributes) => attributes.push_directory( - stack.root(), - stack.current(), - rela_dir, - self.buf, - self.id_mappings, - &mut self.find, - &mut self.statistics.attributes, - )?, State::IgnoreStack(ignore) => ignore.push_directory( stack.root(), stack.current(), @@ -139,7 +129,7 @@ impl<'a, 'find> gix_fs::stack::Delegate for StackDelegate<'a, 'find> { self.statistics.delegate.pop_directory += 1; match &mut self.state { #[cfg(feature = "attributes")] - State::CreateDirectoryAndAttributesStack { attributes, .. } => { + State::CreateDirectoryAndAttributesStack { attributes, .. } | State::AttributesStack(attributes) => { attributes.pop_directory(); } #[cfg(feature = "attributes")] @@ -147,10 +137,6 @@ impl<'a, 'find> gix_fs::stack::Delegate for StackDelegate<'a, 'find> { attributes.pop_directory(); ignore.pop_directory(); } - #[cfg(feature = "attributes")] - State::AttributesStack(attributes) => { - attributes.pop_directory(); - } State::IgnoreStack(ignore) => { ignore.pop_directory(); } diff --git a/gix-worktree/src/stack/mod.rs b/gix-worktree/src/stack/mod.rs index c103201996f..228467f5bee 100644 --- a/gix-worktree/src/stack/mod.rs +++ b/gix-worktree/src/stack/mod.rs @@ -77,6 +77,27 @@ impl Stack { statistics: Statistics::default(), } } + + /// Create a new stack that takes into consideration the `ignore_case` result of a filesystem probe in `root`. It takes a configured + /// `state` to control what it can do, while initializing attribute or ignore files that are to be queried from the ODB using + /// `index` and `path_backing`. + /// + /// This is the easiest way to correctly setup a stack. + pub fn from_state_and_ignore_case( + root: impl Into, + ignore_case: bool, + state: State, + index: &gix_index::State, + path_backing: &gix_index::PathStorageRef, + ) -> Self { + let case = if ignore_case { + gix_glob::pattern::Case::Fold + } else { + gix_glob::pattern::Case::Sensitive + }; + let attribute_files = state.id_mappings_from_index(index, path_backing, case); + Stack::new(root, state, case, Vec::with_capacity(512), attribute_files) + } } /// Entry points for attribute query diff --git a/gix-worktree/src/stack/state/mod.rs b/gix-worktree/src/stack/state/mod.rs index 0b371425acd..30e3c609f1b 100644 --- a/gix-worktree/src/stack/state/mod.rs +++ b/gix-worktree/src/stack/state/mod.rs @@ -71,11 +71,6 @@ impl State { pub fn for_add(attributes: Attributes, ignore: Ignore) -> Self { State::AttributesAndIgnoreStack { attributes, ignore } } - - /// Configure a state for status retrieval, which needs access to ignore files only. - pub fn for_status(ignore: Ignore) -> Self { - State::IgnoreStack(ignore) - } } /// Utilities diff --git a/gix-worktree/tests/fixtures/generated-archives/.gitignore b/gix-worktree/tests/fixtures/generated-archives/.gitignore index 6f631797de0..b39f9769fde 100644 --- a/gix-worktree/tests/fixtures/generated-archives/.gitignore +++ b/gix-worktree/tests/fixtures/generated-archives/.gitignore @@ -1,2 +1,3 @@ make_ignore_and_attributes_setup.tar.xz make_attributes_baseline.tar.xz +symlink_stack.tar.xz diff --git a/gix-worktree/tests/fixtures/symlink_stack.sh b/gix-worktree/tests/fixtures/symlink_stack.sh new file mode 100644 index 00000000000..0bddba20d8d --- /dev/null +++ b/gix-worktree/tests/fixtures/symlink_stack.sh @@ -0,0 +1,26 @@ +#!/bin/bash +set -eu -o pipefail + +git init base; +(cd base + touch file + mkdir dir + touch dir/file-in-dir + (cd dir + ln -s file-in-dir filelink + mkdir subdir + ln -s subdir dirlink + ) + + ln -s file root-filelink + ln -s dir root-dirlink + +cat < .gitattributes +/file file-attr +/dir/file-in-dir dir-file-attr +EOF + git add . && git commit -m "init" +) + +ln -s base symlink-base + diff --git a/gix/src/config/cache/access.rs b/gix/src/config/cache/access.rs index 352bc97124b..5f4fbe72f11 100644 --- a/gix/src/config/cache/access.rs +++ b/gix/src/config/cache/access.rs @@ -158,13 +158,7 @@ impl Cache { pub(crate) fn stat_options(&self) -> Result { use crate::config::tree::gitoxide; Ok(gix_index::entry::stat::Options { - trust_ctime: boolean( - self, - "core.trustCTime", - &Core::TRUST_C_TIME, - // For now, on MacOS it's known to not be trust-worthy at least with the Rust STDlib, being 2s off - !cfg!(target_os = "macos"), - )?, + trust_ctime: boolean(self, "core.trustCTime", &Core::TRUST_C_TIME, true)?, use_nsec: boolean(self, "gitoxide.core.useNsec", &gitoxide::Core::USE_NSEC, false)?, use_stdev: boolean(self, "gitoxide.core.useStdev", &gitoxide::Core::USE_STDEV, false)?, check_stat: self diff --git a/gix/src/config/tree/sections/core.rs b/gix/src/config/tree/sections/core.rs index ab3e2bab93f..2ec5c279ea3 100644 --- a/gix/src/config/tree/sections/core.rs +++ b/gix/src/config/tree/sections/core.rs @@ -45,8 +45,7 @@ impl Core { /// The `core.symlinks` key. pub const SYMLINKS: keys::Boolean = keys::Boolean::new_boolean("symlinks", &config::Tree::CORE); /// The `core.trustCTime` key. - pub const TRUST_C_TIME: keys::Boolean = keys::Boolean::new_boolean("trustCTime", &config::Tree::CORE) - .with_deviation("Currently the default is false, instead of true, as it seems to be 2s off in tests"); + pub const TRUST_C_TIME: keys::Boolean = keys::Boolean::new_boolean("trustCTime", &config::Tree::CORE); /// The `core.worktree` key. pub const WORKTREE: keys::Any = keys::Any::new("worktree", &config::Tree::CORE) .with_environment_override("GIT_WORK_TREE") diff --git a/gix/src/filter.rs b/gix/src/filter.rs index 935c91108ae..b106840a79a 100644 --- a/gix/src/filter.rs +++ b/gix/src/filter.rs @@ -150,8 +150,8 @@ impl<'repo> Pipeline<'repo> { &mut |_, attrs| { entry.matching_attributes(attrs); }, - &mut |rela_path, buf| -> Result<_, gix_odb::find::Error> { - let entry = match index.entry_by_path(rela_path) { + &mut |buf| -> Result<_, gix_odb::find::Error> { + let entry = match index.entry_by_path(gix_path::into_bstr(rela_path).as_ref()) { None => return Ok(None), Some(entry) => entry, }; diff --git a/gix/src/repository/index.rs b/gix/src/repository/index.rs index a21b138a569..59666fc5f10 100644 --- a/gix/src/repository/index.rs +++ b/gix/src/repository/index.rs @@ -55,6 +55,16 @@ impl crate::Repository { }) } + /// Return the shared worktree index if present, or return a new empty one which has an association to the place where the index would be. + pub fn index_or_empty(&self) -> Result { + Ok(self.try_index()?.unwrap_or_else(|| { + worktree::Index::new(gix_fs::FileSnapshot::new(gix_index::File::from_state( + gix_index::State::new(self.object_hash()), + self.index_path(), + ))) + })) + } + /// Return a shared worktree index which is updated automatically if the in-memory snapshot has become stale as the underlying file /// on disk has changed, or `None` if no such file exists. /// diff --git a/gix/tests/repository/worktree.rs b/gix/tests/repository/worktree.rs index ddeba514274..a3cc89b250f 100644 --- a/gix/tests/repository/worktree.rs +++ b/gix/tests/repository/worktree.rs @@ -111,6 +111,7 @@ mod with_core_worktree_config { } #[test] + #[cfg(feature = "index")] fn bare_relative() -> crate::Result { let repo = repo("bare-relative-worktree"); @@ -123,6 +124,8 @@ mod with_core_worktree_config { repo.work_dir().is_none(), "we simply don't load core.worktree in bare repos either to match this behaviour" ); + assert!(repo.try_index()?.is_none()); + assert!(repo.index_or_empty()?.entries().is_empty()); Ok(()) } diff --git a/justfile b/justfile index 0ff0d59771a..39b8d3fad58 100755 --- a/justfile +++ b/justfile @@ -67,7 +67,10 @@ check: cargo check --features verbose-object-parsing-errors cd gix-attributes && cargo check --features serde cd gix-glob && cargo check --features serde - cd gix-worktree && cargo check --features serde + cd gix-worktree; \ + set -ex; \ + cargo check --features serde; \ + cargo check --no-default-features; cd gix-actor && cargo check --features serde cd gix-date && cargo check --features serde cargo check -p gix-tempfile --features signals diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 04a4fa36bcd..a7c8d8a69bc 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -133,7 +133,12 @@ pub fn main() -> Result<()> { })?; match cmd { - Subcommands::Status(crate::plumbing::options::status::Platform { submodules, pathspec }) => prepare_and_run( + Subcommands::Status(crate::plumbing::options::status::Platform { + statistics, + submodules, + no_write, + pathspec, + }) => prepare_and_run( "status", trace, auto_verbose, @@ -150,7 +155,9 @@ pub fn main() -> Result<()> { progress, core::repository::status::Options { format, - thread_limit: thread_limit.or(cfg!(target_os = "macos").then_some(3)), // TODO: make this a configurable when in `gix`, this seems to be optimal on MacOS, linux scales though! + statistics, + thread_limit: thread_limit.or(cfg!(target_os = "macos").then_some(3)), // TODO: make this a configurable when in `gix`, this seems to be optimal on MacOS, linux scales though! MacOS also scales if reading a lot of files for refresh index + allow_write: !no_write, submodules: match submodules { Submodules::All => core::repository::status::Submodules::All, Submodules::RefChange => core::repository::status::Submodules::RefChange, diff --git a/src/plumbing/options/mod.rs b/src/plumbing/options/mod.rs index 300f1810e61..64c67e43e5a 100644 --- a/src/plumbing/options/mod.rs +++ b/src/plumbing/options/mod.rs @@ -205,6 +205,12 @@ pub mod status { /// Define how to display submodule status. #[clap(long, default_value = "all")] pub submodules: Submodules, + /// Print additional statistics to help understanding performance. + #[clap(long, short = 's')] + pub statistics: bool, + /// Don't write back a changed index, which forces this operation to always be idempotent. + #[clap(long)] + pub no_write: bool, /// The git path specifications to list attributes for, or unset to read from stdin one per line. #[clap(value_parser = CheckPathSpec)] pub pathspec: Vec,