From 91b65495f14cd58c8b178f359354749b39dc4008 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 8 May 2024 08:49:02 +0200 Subject: [PATCH 1/9] fix MSRV check on Windows turns out that Powershell is used by default there, and variable substitutions are different there. What's worse is that it doesn't seem to fail if something doesn't work as one would expect. --- .github/workflows/msrv.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/msrv.yml b/.github/workflows/msrv.yml index 19e6fb382a6..4d2e98cbfd7 100644 --- a/.github/workflows/msrv.yml +++ b/.github/workflows/msrv.yml @@ -25,6 +25,6 @@ jobs: - uses: actions/checkout@v4 - uses: extractions/setup-just@v2 - run: | - rustup toolchain install $rust_version --profile minimal --no-self-update - rustup default $rust_version + rustup toolchain install ${{ env.rust_version }} --profile minimal --no-self-update + rustup default ${{ env.rust_version }} - run: just ci-check-msrv From 5197b5abd988002ffbb40f34bbe000ce5dcaffcf Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 9 May 2024 16:26:39 +0200 Subject: [PATCH 2/9] improve docs to be more approachable from `git2` --- gix/src/status/index_worktree.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/gix/src/status/index_worktree.rs b/gix/src/status/index_worktree.rs index 43d517a07a2..6b47e62f81f 100644 --- a/gix/src/status/index_worktree.rs +++ b/gix/src/status/index_worktree.rs @@ -611,6 +611,7 @@ pub mod iter { /// /// * `patterns` /// - Optional patterns to use to limit the paths to look at. If empty, all paths are considered. + #[doc(alias = "diff_index_to_workdir", alias = "git2")] pub fn into_index_worktree_iter( self, patterns: impl IntoIterator, From 3448fd9cc0edc93d7a5b511fd4ec0a8e84b87e51 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 11 May 2024 10:45:27 +0200 Subject: [PATCH 3/9] fix: assure writing invalid dates doesn't panic (#1367). --- gix-date/src/time/write.rs | 22 +++-- gix-date/tests/time/mod.rs | 162 +++++++++++++++++++++++------------ gix-date/tests/time/parse.rs | 21 +++++ 3 files changed, 146 insertions(+), 59 deletions(-) diff --git a/gix-date/src/time/write.rs b/gix-date/src/time/write.rs index efc02cc1e3a..5489dd3871d 100644 --- a/gix-date/src/time/write.rs +++ b/gix-date/src/time/write.rs @@ -5,6 +5,10 @@ use crate::{time::Sign, Time}; /// Serialization with standard `git` format impl Time { /// Serialize this instance into memory, similar to what [`write_to()`][Self::write_to()] would do with arbitrary `Write` implementations. + /// + /// # Panics + /// + /// If the underlying call fails as this instance can't be represented, typically due to an invalid offset. pub fn to_bstring(&self) -> BString { let mut buf = Vec::with_capacity(64); self.write_to(&mut buf).expect("write to memory cannot fail"); @@ -13,6 +17,18 @@ impl Time { /// Serialize this instance to `out` in a format suitable for use in header fields of serialized git commits or tags. pub fn write_to(&self, out: &mut dyn std::io::Write) -> std::io::Result<()> { + const SECONDS_PER_HOUR: u32 = 60 * 60; + let offset = self.offset.unsigned_abs(); + let hours = offset / SECONDS_PER_HOUR; + let minutes = (offset - (hours * SECONDS_PER_HOUR)) / 60; + + if hours > 99 { + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + "Cannot represent offsets larger than +-9900", + )); + } + let mut itoa = itoa::Buffer::new(); out.write_all(itoa.format(self.seconds).as_bytes())?; out.write_all(b" ")?; @@ -23,12 +39,6 @@ impl Time { const ZERO: &[u8; 1] = b"0"; - const SECONDS_PER_HOUR: u32 = 60 * 60; - let offset = self.offset.unsigned_abs(); - let hours = offset / SECONDS_PER_HOUR; - assert!(hours < 25, "offset is more than a day: {hours}"); - let minutes = (offset - (hours * SECONDS_PER_HOUR)) / 60; - if hours < 10 { out.write_all(ZERO)?; } diff --git a/gix-date/tests/time/mod.rs b/gix-date/tests/time/mod.rs index 0fb4befd058..82f1f37a81e 100644 --- a/gix-date/tests/time/mod.rs +++ b/gix-date/tests/time/mod.rs @@ -1,5 +1,4 @@ -use bstr::ByteSlice; -use gix_date::{time::Sign, SecondsSinceUnixEpoch, Time}; +use gix_date::Time; mod baseline; mod format; @@ -32,57 +31,114 @@ fn is_set() { .is_set()); } -#[test] -fn write_to() -> Result<(), Box> { - for (time, expected) in [ - ( - Time { - seconds: SecondsSinceUnixEpoch::MAX, - offset: 0, - sign: Sign::Minus, - }, - "9223372036854775807 -0000", - ), - ( - Time { - seconds: SecondsSinceUnixEpoch::MIN, - offset: 0, - sign: Sign::Minus, - }, - "-9223372036854775808 -0000", - ), - ( - Time { - seconds: 500, - offset: 9000, - sign: Sign::Plus, - }, - "500 +0230", - ), - ( - Time { - seconds: 189009009, - offset: -36000, - sign: Sign::Minus, - }, - "189009009 -1000", - ), - ( - Time { - seconds: 0, - offset: 0, - sign: Sign::Minus, - }, - "0 -0000", - ), - ] { - let mut output = Vec::new(); - time.write_to(&mut output)?; - assert_eq!(output.as_bstr(), expected); - assert_eq!(time.size(), output.len()); +mod write_to { + use bstr::ByteSlice; + use gix_date::time::Sign; + use gix_date::{SecondsSinceUnixEpoch, Time}; + + #[test] + fn invalid() { + let time = Time { + seconds: 0, + offset: (100 * 60 * 60) + 30 * 60, + sign: Sign::Plus, + }; + let err = time.write_to(&mut Vec::new()).unwrap_err(); + assert_eq!(err.to_string(), "Cannot represent offsets larger than +-9900"); + } - let actual = gix_date::parse(&output.as_bstr().to_string(), None).expect("round-trippable"); - assert_eq!(time, actual); + #[test] + fn valid_roundtrips() -> Result<(), Box> { + for (time, expected) in [ + ( + Time { + seconds: SecondsSinceUnixEpoch::MAX, + offset: 0, + sign: Sign::Minus, + }, + "9223372036854775807 -0000", + ), + ( + Time { + seconds: SecondsSinceUnixEpoch::MIN, + offset: 0, + sign: Sign::Minus, + }, + "-9223372036854775808 -0000", + ), + ( + Time { + seconds: 500, + offset: 9000, + sign: Sign::Plus, + }, + "500 +0230", + ), + ( + Time { + seconds: 189009009, + offset: -36000, + sign: Sign::Minus, + }, + "189009009 -1000", + ), + ( + Time { + seconds: 0, + offset: 0, + sign: Sign::Minus, + }, + "0 -0000", + ), + ( + Time { + seconds: 0, + offset: -24 * 60 * 60, + sign: Sign::Minus, + }, + "0 -2400", + ), + ( + Time { + seconds: 0, + offset: 24 * 60 * 60, + sign: Sign::Plus, + }, + "0 +2400", + ), + ( + Time { + seconds: 0, + offset: (25 * 60 * 60) + 30 * 60, + sign: Sign::Plus, + }, + "0 +2530", + ), + ( + Time { + seconds: 0, + offset: (-25 * 60 * 60) - 30 * 60, + sign: Sign::Minus, + }, + "0 -2530", + ), + ( + Time { + seconds: 0, + offset: (99 * 60 * 60) + 59 * 60, + sign: Sign::Plus, + }, + "0 +9959", + ), + ] { + let mut output = Vec::new(); + time.write_to(&mut output)?; + assert_eq!(output.as_bstr(), expected); + assert_eq!(time.size(), output.len()); + + let actual = gix_date::parse(&output.as_bstr().to_string(), None).expect("round-trippable"); + assert_eq!(time, actual); + } + Ok(()) } - Ok(()) } diff --git a/gix-date/tests/time/parse.rs b/gix-date/tests/time/parse.rs index 5b23fe1077a..8b96cb16ed2 100644 --- a/gix-date/tests/time/parse.rs +++ b/gix-date/tests/time/parse.rs @@ -91,6 +91,7 @@ fn bad_raw() { "123456 +060", "123456 -060", "123456 +06000", + "123456 +10030", "123456 06000", "123456 0600", "123456 +0600 extra", @@ -101,6 +102,26 @@ fn bad_raw() { } } +#[test] +fn double_negation_in_offset() { + let actual = gix_date::parse("1288373970 --700", None).unwrap(); + assert_eq!( + actual, + gix_date::Time { + seconds: 1288373970, + offset: 25200, + sign: Sign::Minus, + }, + "double-negation stays negative, and is parseable." + ); + + assert_eq!( + actual.to_bstring(), + "1288373970 -0700", + "serialization corrects the issue" + ); +} + #[test] fn git_default() { assert_eq!( From 7b3dc92d04e5240a7bdbcdaa4019ac93cec6eaa3 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 11 May 2024 15:26:58 +0200 Subject: [PATCH 4/9] fix: use `tempfile` permissions support to set the correct mode on unix. Previousoly it would make an additional syscall to change permissions, which is slower than necessary. --- gix-odb/Cargo.toml | 16 ++++----------- gix-odb/src/store_impls/loose/write.rs | 28 +++++++++----------------- 2 files changed, 13 insertions(+), 31 deletions(-) diff --git a/gix-odb/Cargo.toml b/gix-odb/Cargo.toml index f58e5775119..d7a15d873ca 100644 --- a/gix-odb/Cargo.toml +++ b/gix-odb/Cargo.toml @@ -15,10 +15,10 @@ doctest = false [features] ## Data structures implement `serde::Serialize` and `serde::Deserialize`. -serde= ["dep:serde", "gix-hash/serde", "gix-object/serde", "gix-pack/serde"] +serde = ["dep:serde", "gix-hash/serde", "gix-object/serde", "gix-pack/serde"] [dependencies] -gix-features = { version = "^0.38.1", path = "../gix-features", features = ["rustsha1", "walkdir", "zlib", "crc32" ] } +gix-features = { version = "^0.38.1", path = "../gix-features", features = ["rustsha1", "walkdir", "zlib", "crc32"] } gix-hash = { version = "^0.14.2", path = "../gix-hash" } gix-date = { version = "^0.8.5", path = "../gix-date" } gix-path = { version = "^0.10.7", path = "../gix-path" } @@ -26,22 +26,14 @@ gix-quote = { version = "^0.4.12", path = "../gix-quote" } gix-object = { version = "^0.42.0", path = "../gix-object" } gix-pack = { version = "^0.50.0", path = "../gix-pack", default-features = false } gix-fs = { version = "^0.10.2", path = "../gix-fs" } -serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} +serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } -tempfile = "3.1.0" +tempfile = "3.10.0" thiserror = "1.0.26" parking_lot = { version = "0.12.0" } arc-swap = "1.5.0" document-features = { version = "0.2.0", optional = true } -#[dev-dependencies] -#gix-testtools = { path = "../tests/tools"} -#gix-actor = { path = "../gix-actor" } -#pretty_assertions = "1.0.0" -#filetime = "0.2.15" -#maplit = "1.0.2" -#crossbeam-channel = "0.5.6" - [package.metadata.docs.rs] features = ["document-features", "serde"] diff --git a/gix-odb/src/store_impls/loose/write.rs b/gix-odb/src/store_impls/loose/write.rs index 2cac12d1814..a70351a1574 100644 --- a/gix-odb/src/store_impls/loose/write.rs +++ b/gix-odb/src/store_impls/loose/write.rs @@ -107,8 +107,16 @@ impl Store { impl Store { fn dest(&self) -> Result, Error> { + #[cfg_attr(not(unix), allow(unused_mut))] + let mut builder = tempfile::Builder::new(); + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let perms = std::fs::Permissions::from_mode(0o444); + builder.permissions(perms); + } Ok(hash::Write::new( - deflate::Write::new(NamedTempFile::new_in(&self.path).map_err(|err| Error::Io { + deflate::Write::new(builder.tempfile_in(&self.path).map_err(|err| Error::Io { source: err, message: "create named temp file in", path: self.path.to_owned(), @@ -144,24 +152,6 @@ impl Store { return Ok(id); } } - #[cfg(unix)] - if let Ok(mut perm) = object_path.metadata().map(|m| m.permissions()) { - use std::os::unix::fs::PermissionsExt; - /// For now we assume the default with standard umask. This can be more sophisticated, - /// but we have the bare minimum. - fn comp_mode(_mode: u32) -> u32 { - 0o444 - } - let new_mode = comp_mode(perm.mode()); - if (perm.mode() ^ new_mode) & !0o170000 != 0 { - perm.set_mode(new_mode); - std::fs::set_permissions(&object_path, perm).map_err(|err| Error::Io { - source: err, - message: "Failed to set permission bits", - path: object_path.clone(), - })?; - } - } res.map_err(|err| Error::Persist { source: err, target: object_path, From addf446f052ff74edcdb083f2b2968b313daa940 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 11 May 2024 15:40:41 +0200 Subject: [PATCH 5/9] fix: more robustness in the face of a trampling-herd of threads loading a single index. The motivating example is here: https://github.com/praetorian-inc/noseyparker/issues/179 Previously, it was possible for a trampling herd of threads to consolidate the disk state. Most of them would be 'needs-init' threads which could notice that the initialization already happened, and just use that. But a thread might be late for the party and somehow manages to not get any newly loaded index, and thus tries to consolidate with what's on disk again. Then it would again determine no change, and return nothing, causing the caller to abort and not find objects it should find because it wouldn't see the index that it should have seen. The reason the thread got into this mess is that the 'is-load-ongoing' flagging was racy itself, so it would not wait for ongoing loads and just conclude nothing happened. An extra delay (by yielding) now assures it either seees the loading state and waits for it, sees the newly loaded indices. Note that this issue can be reproduced with: ``` './target/release/gix -r repo-with-one-pack -t10 --trace odb stats --extra-header-lookup' ``` --- gitoxide-core/src/repository/odb.rs | 76 ++++++++++-- gix-odb/src/store_impls/dynamic/load_index.rs | 111 ++++++++++-------- gix-odb/src/store_impls/dynamic/types.rs | 4 +- src/plumbing/main.rs | 8 +- src/plumbing/options/mod.rs | 6 +- 5 files changed, 142 insertions(+), 63 deletions(-) diff --git a/gitoxide-core/src/repository/odb.rs b/gitoxide-core/src/repository/odb.rs index 85c35618db2..e8cb1f7a0b1 100644 --- a/gitoxide-core/src/repository/odb.rs +++ b/gitoxide-core/src/repository/odb.rs @@ -1,4 +1,5 @@ use std::io; +use std::sync::atomic::Ordering; use anyhow::bail; @@ -50,6 +51,8 @@ pub mod statistics { pub struct Options { pub format: OutputFormat, pub thread_limit: Option, + /// A debug-flag that triggers looking up the headers of all objects again, but without indices preloaded + pub extra_header_lookup: bool, } } @@ -59,7 +62,11 @@ pub fn statistics( mut progress: impl gix::Progress, out: impl io::Write, mut err: impl io::Write, - statistics::Options { format, thread_limit }: statistics::Options, + statistics::Options { + format, + thread_limit, + extra_header_lookup, + }: statistics::Options, ) -> anyhow::Result<()> { use bytesize::ByteSize; use gix::odb::{find, HeaderExt}; @@ -76,6 +83,10 @@ pub fn statistics( #[cfg_attr(feature = "serde", derive(serde::Serialize))] #[derive(Default)] struct Statistics { + /// All objects that were used to produce these statistics. + /// Only `Some` if we are doing an extra round of header queries on a repository without loaded indices. + #[cfg_attr(feature = "serde", serde(skip_serializing))] + ids: Option>, total_objects: usize, loose_objects: usize, packed_objects: usize, @@ -135,14 +146,17 @@ pub fn statistics( } impl gix::parallel::Reduce for Reduce { - type Input = Result, anyhow::Error>; + type Input = Result, anyhow::Error>; type FeedProduce = (); type Output = Statistics; type Error = anyhow::Error; fn feed(&mut self, items: Self::Input) -> Result { - for item in items? { + for (id, item) in items? { self.stats.consume(item); + if let Some(ids) = self.stats.ids.as_mut() { + ids.push(id); + } } Ok(()) } @@ -154,9 +168,9 @@ pub fn statistics( } let cancelled = || anyhow::anyhow!("Cancelled by user"); - let object_ids = repo.objects.store_ref().iter()?.filter_map(Result::ok); + let object_ids = repo.objects.iter()?.filter_map(Result::ok); let chunk_size = 1_000; - let stats = if gix::parallel::num_threads(thread_limit) > 1 { + let mut stats = if gix::parallel::num_threads(thread_limit) > 1 { gix::parallel::in_parallel( gix::interrupt::Iter::new( gix::features::iter::Chunks { @@ -166,19 +180,30 @@ pub fn statistics( cancelled, ), thread_limit, - move |_| (repo.objects.clone().into_inner(), counter), + { + let objects = repo.objects.clone(); + move |_| (objects.clone().into_inner(), counter) + }, |ids, (handle, counter)| { let ids = ids?; - counter.fetch_add(ids.len(), std::sync::atomic::Ordering::Relaxed); + counter.fetch_add(ids.len(), Ordering::Relaxed); let out = ids .into_iter() - .map(|id| handle.header(id)) + .map(|id| handle.header(id).map(|hdr| (id, hdr))) .collect::, _>>()?; Ok(out) }, - Reduce::default(), + Reduce { + stats: Statistics { + ids: extra_header_lookup.then(Vec::new), + ..Default::default() + }, + }, )? } else { + if extra_header_lookup { + bail!("extra-header-lookup is only meaningful in threaded mode"); + } let mut stats = Statistics::default(); for (count, id) in object_ids.enumerate() { @@ -193,6 +218,39 @@ pub fn statistics( progress.show_throughput(start); + if let Some(mut ids) = stats.ids.take() { + // Critical to re-open the repo to assure we don't have any ODB state and start fresh. + let start = std::time::Instant::now(); + let repo = gix::open_opts(repo.git_dir(), repo.open_options().to_owned())?; + progress.set_name("re-counting".into()); + progress.init(Some(ids.len()), gix::progress::count("objects")); + let counter = progress.counter(); + counter.store(0, Ordering::Relaxed); + let errors = gix::parallel::in_parallel_with_slice( + &mut ids, + thread_limit, + { + let objects = repo.objects.clone(); + move |_| (objects.clone().into_inner(), counter, false) + }, + |id, (odb, counter, has_error), _threads_left, _stop_everything| -> anyhow::Result<()> { + counter.fetch_add(1, Ordering::Relaxed); + if let Err(_err) = odb.header(id) { + *has_error = true; + gix::trace::error!(err = ?_err, "Object that is known to be present wasn't found"); + } + Ok(()) + }, + || Some(std::time::Duration::from_millis(100)), + |(_, _, has_error)| has_error, + )?; + + progress.show_throughput(start); + if errors.contains(&true) { + bail!("At least one object couldn't be looked up even though it must exist"); + } + } + #[cfg(feature = "serde")] { serde_json::to_writer_pretty(out, &stats)?; diff --git a/gix-odb/src/store_impls/dynamic/load_index.rs b/gix-odb/src/store_impls/dynamic/load_index.rs index a255f6c4b58..18afbc86958 100644 --- a/gix-odb/src/store_impls/dynamic/load_index.rs +++ b/gix-odb/src/store_impls/dynamic/load_index.rs @@ -4,7 +4,7 @@ use std::{ ops::Deref, path::{Path, PathBuf}, sync::{ - atomic::{AtomicU16, AtomicUsize, Ordering}, + atomic::{AtomicU16, Ordering}, Arc, }, time::SystemTime, @@ -86,7 +86,7 @@ impl super::Store { Ok(Some(self.collect_snapshot())) } else { // always compare to the latest state - // Nothing changed in the mean time, try to load another index… + // Nothing changed in the meantime, try to load another index… if self.load_next_index(index) { Ok(Some(self.collect_snapshot())) } else { @@ -119,7 +119,7 @@ impl super::Store { let slot = &self.files[index.slot_indices[slot_map_index]]; let _lock = slot.write.lock(); if slot.generation.load(Ordering::SeqCst) > index.generation { - // There is a disk consolidation in progress which just overwrote a slot that cold be disposed with some other + // There is a disk consolidation in progress which just overwrote a slot that could be disposed with some other // index, one we didn't intend to load. // Continue with the next slot index in the hope there is something else we can do… continue 'retry_with_next_slot_index; @@ -128,14 +128,18 @@ impl super::Store { let bundle_mut = Arc::make_mut(&mut bundle); if let Some(files) = bundle_mut.as_mut() { // these are always expected to be set, unless somebody raced us. We handle this later by retrying. - let _loaded_count = IncOnDrop(&index.loaded_indices); - match files.load_index(self.object_hash) { + let res = { + let res = files.load_index(self.object_hash); + slot.files.store(bundle); + index.loaded_indices.fetch_add(1, Ordering::SeqCst); + res + }; + match res { Ok(_) => { - slot.files.store(bundle); break 'retry_with_next_slot_index; } - Err(_) => { - slot.files.store(bundle); + Err(_err) => { + gix_features::trace::error!(err=?_err, "Failed to load index file - some objects may seem to not exist"); continue 'retry_with_next_slot_index; } } @@ -145,9 +149,14 @@ impl super::Store { // There can be contention as many threads start working at the same time and take all the // slots to load indices for. Some threads might just be left-over and have to wait for something // to change. - let num_load_operations = index.num_indices_currently_being_loaded.deref(); // TODO: potentially hot loop - could this be a condition variable? - while num_load_operations.load(Ordering::Relaxed) != 0 { + // This is a timing-based fix for the case that the `num_indices_being_loaded` isn't yet incremented, + // and we might break out here without actually waiting for the loading operation. Then we'd fail to + // observe a change and the underlying handler would not have all the indices it needs at its disposal. + // Yielding means we will definitely loose enough time to observe the ongoing operation, + // or its effects. + std::thread::yield_now(); + while index.num_indices_currently_being_loaded.load(Ordering::SeqCst) != 0 { std::thread::yield_now() } break 'retry_with_next_slot_index; @@ -197,7 +206,7 @@ impl super::Store { // We might not be able to detect by pointer if the state changed, as this itself is racy. So we keep track of double-initialization // using a flag, which means that if `needs_init` was true we saw the index uninitialized once, but now that we are here it's - // initialized meaning that somebody was faster and we couldn't detect it by comparisons to the index. + // initialized meaning that somebody was faster, and we couldn't detect it by comparisons to the index. // If so, make sure we collect the snapshot instead of returning None in case nothing actually changed, which is likely with a // race like this. if !was_uninitialized && needs_init { @@ -397,18 +406,19 @@ impl super::Store { // generation stays the same, as it's the same value still but scheduled for eventual removal. } } else { + // set the generation before we actually change the value, otherwise readers of old generations could observe the new one. + // We rather want them to turn around here and update their index, which, by that time, might actually already be available. + // If not, they would fail unable to load a pack or index they need, but that's preferred over returning wrong objects. + // Safety: can't race as we hold the lock, have to set the generation beforehand to help avoid others to observe the value. + slot.generation.store(generation, Ordering::SeqCst); *files_mut = None; }; slot.files.store(files); - if !needs_stable_indices { - // Not racy due to lock, generation must be set after unsetting the slot value AND storing it. - slot.generation.store(generation, Ordering::SeqCst); - } } let new_index = self.index.load(); Ok(if index.state_id() == new_index.state_id() { - // there was no change, and nothing was loaded in the meantime, reflect that in the return value to not get into loops + // there was no change, and nothing was loaded in the meantime, reflect that in the return value to not get into loops. None } else { if load_new_index { @@ -619,34 +629,44 @@ impl super::Store { } pub(crate) fn collect_snapshot(&self) -> Snapshot { + // We don't observe changes-on-disk in our 'wait-for-load' loop. + // That loop is meant to help assure the marker (which includes the amount of loaded indices) matches + // the actual amount of indices we collect. let index = self.index.load(); - let indices = if index.is_initialized() { - index - .slot_indices - .iter() - .map(|idx| (*idx, &self.files[*idx])) - .filter_map(|(id, file)| { - let lookup = match (**file.files.load()).as_ref()? { - types::IndexAndPacks::Index(bundle) => handle::SingleOrMultiIndex::Single { - index: bundle.index.loaded()?.clone(), - data: bundle.data.loaded().cloned(), - }, - types::IndexAndPacks::MultiIndex(multi) => handle::SingleOrMultiIndex::Multi { - index: multi.multi_index.loaded()?.clone(), - data: multi.data.iter().map(|f| f.loaded().cloned()).collect(), - }, - }; - handle::IndexLookup { file: lookup, id }.into() - }) - .collect() - } else { - Vec::new() - }; + loop { + if index.num_indices_currently_being_loaded.deref().load(Ordering::SeqCst) != 0 { + std::thread::yield_now(); + continue; + } + let marker = index.marker(); + let indices = if index.is_initialized() { + index + .slot_indices + .iter() + .map(|idx| (*idx, &self.files[*idx])) + .filter_map(|(id, file)| { + let lookup = match (**file.files.load()).as_ref()? { + types::IndexAndPacks::Index(bundle) => handle::SingleOrMultiIndex::Single { + index: bundle.index.loaded()?.clone(), + data: bundle.data.loaded().cloned(), + }, + types::IndexAndPacks::MultiIndex(multi) => handle::SingleOrMultiIndex::Multi { + index: multi.multi_index.loaded()?.clone(), + data: multi.data.iter().map(|f| f.loaded().cloned()).collect(), + }, + }; + handle::IndexLookup { file: lookup, id }.into() + }) + .collect() + } else { + Vec::new() + }; - Snapshot { - indices, - loose_dbs: Arc::clone(&index.loose_dbs), - marker: index.marker(), + return Snapshot { + indices, + loose_dbs: Arc::clone(&index.loose_dbs), + marker, + }; } } } @@ -669,13 +689,6 @@ impl<'a> Drop for IncOnNewAndDecOnDrop<'a> { } } -struct IncOnDrop<'a>(&'a AtomicUsize); -impl<'a> Drop for IncOnDrop<'a> { - fn drop(&mut self) { - self.0.fetch_add(1, Ordering::SeqCst); - } -} - pub(crate) enum Either { IndexPath(PathBuf), MultiIndexFile(Arc), diff --git a/gix-odb/src/store_impls/dynamic/types.rs b/gix-odb/src/store_impls/dynamic/types.rs index 473c587bbea..35257ee7ee8 100644 --- a/gix-odb/src/store_impls/dynamic/types.rs +++ b/gix-odb/src/store_impls/dynamic/types.rs @@ -18,7 +18,7 @@ pub(crate) type AtomicGeneration = AtomicU32; /// A way to indicate which pack indices we have seen already and which of them are loaded, along with an idea /// of whether stored `PackId`s are still usable. -#[derive(Default, Copy, Clone)] +#[derive(Default, Copy, Clone, Debug)] pub struct SlotIndexMarker { /// The generation the `loaded_until_index` belongs to. Indices of different generations are completely incompatible. /// This value changes once the internal representation is compacted, something that may happen only if there is no handle @@ -262,7 +262,7 @@ impl IndexAndPacks { } } - /// If we are garbage, put ourselves into the loaded state. Otherwise put ourselves back to unloaded. + /// If we are garbage, put ourselves into the loaded state. Otherwise, put ourselves back to unloaded. pub(crate) fn put_back(&mut self) { match self { IndexAndPacks::Index(bundle) => { diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 0f5a146d0ed..49b2c50fe9e 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -1153,7 +1153,7 @@ pub fn main() -> Result<()> { ), }, Subcommands::Odb(cmd) => match cmd { - odb::Subcommands::Stats => prepare_and_run( + odb::Subcommands::Stats { extra_header_lookup } => prepare_and_run( "odb-stats", trace, auto_verbose, @@ -1166,7 +1166,11 @@ pub fn main() -> Result<()> { progress, out, err, - core::repository::odb::statistics::Options { format, thread_limit }, + core::repository::odb::statistics::Options { + format, + thread_limit, + extra_header_lookup, + }, ) }, ), diff --git a/src/plumbing/options/mod.rs b/src/plumbing/options/mod.rs index b5fa3649a68..10d0312bc7b 100644 --- a/src/plumbing/options/mod.rs +++ b/src/plumbing/options/mod.rs @@ -586,7 +586,11 @@ pub mod odb { Info, /// Count and obtain information on all, possibly duplicate, objects in the database. #[clap(visible_alias = "statistics")] - Stats, + Stats { + /// Lookup headers again, but without preloading indices. + #[clap(long)] + extra_header_lookup: bool, + }, } } From b32a847e10b742834169fac56c284645c1ed28f3 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 12 May 2024 16:24:23 +0200 Subject: [PATCH 6/9] fix!: don't panic when unknown entry types are encountered. Related to https://github.com/helix-editor/helix/issues/10660 which runs into object types that are unknown. I have looked into this and [couldn't find evidence of a new pack-entry type](https://github.com/git/git/blob/0f3415f1f8478b05e64db11eb8aaa2915e48fef6/packfile.c#L1303-L1358) in the Git codebase. It also looks like that Git [will never write packs that aren't V2](https://github.com/git/git/blob/0f3415f1f8478b05e64db11eb8aaa2915e48fef6/pack-write.c#L352) - initially I thought it might write V3 based on some other criteria. For now, the thesis is that one would have to be able to mark bad objects to be able to handle this more gracefully, but all we can do is try to fail. --- gix-pack/src/bundle/find.rs | 12 ++++---- gix-pack/src/cache/delta/traverse/mod.rs | 2 ++ gix-pack/src/cache/delta/traverse/resolve.rs | 4 +-- gix-pack/src/data/entry/decode.rs | 29 +++++++++++++------- gix-pack/src/data/entry/mod.rs | 4 ++- gix-pack/src/data/file/decode/entry.rs | 12 ++------ gix-pack/src/data/file/decode/header.rs | 2 +- gix-pack/src/data/file/decode/mod.rs | 2 ++ gix-pack/src/data/output/entry/mod.rs | 8 +++++- gix-pack/src/index/traverse/error.rs | 2 ++ gix-pack/src/index/traverse/mod.rs | 9 +++--- gix-pack/src/multi_index/verify.rs | 1 + 12 files changed, 52 insertions(+), 35 deletions(-) diff --git a/gix-pack/src/bundle/find.rs b/gix-pack/src/bundle/find.rs index 98e28333d48..4a7fd23129d 100644 --- a/gix-pack/src/bundle/find.rs +++ b/gix-pack/src/bundle/find.rs @@ -37,7 +37,7 @@ impl crate::Bundle { cache: &mut dyn crate::cache::DecodeEntry, ) -> Result<(gix_object::Data<'a>, crate::data::entry::Location), crate::data::decode::Error> { let ofs = self.index.pack_offset_at_index(idx); - let pack_entry = self.pack.entry(ofs); + let pack_entry = self.pack.entry(ofs)?; let header_size = pack_entry.header_size(); self.pack .decode_entry( @@ -45,11 +45,11 @@ impl crate::Bundle { out, inflate, &|id, _out| { - self.index.lookup(id).map(|idx| { - crate::data::decode::entry::ResolvedBase::InPack( - self.pack.entry(self.index.pack_offset_at_index(idx)), - ) - }) + let idx = self.index.lookup(id)?; + self.pack + .entry(self.index.pack_offset_at_index(idx)) + .ok() + .map(crate::data::decode::entry::ResolvedBase::InPack) }, cache, ) diff --git a/gix-pack/src/cache/delta/traverse/mod.rs b/gix-pack/src/cache/delta/traverse/mod.rs index ea0c9c93be9..290f389bbe4 100644 --- a/gix-pack/src/cache/delta/traverse/mod.rs +++ b/gix-pack/src/cache/delta/traverse/mod.rs @@ -26,6 +26,8 @@ pub enum Error { }, #[error("The resolver failed to obtain the pack entry bytes for the entry at {pack_offset}")] ResolveFailed { pack_offset: u64 }, + #[error(transparent)] + EntryType(#[from] crate::data::entry::decode::Error), #[error("One of the object inspectors failed")] Inspect(#[from] Box), #[error("Interrupted")] diff --git a/gix-pack/src/cache/delta/traverse/resolve.rs b/gix-pack/src/cache/delta/traverse/resolve.rs index 92bff6b03e8..56d7432e776 100644 --- a/gix-pack/src/cache/delta/traverse/resolve.rs +++ b/gix-pack/src/cache/delta/traverse/resolve.rs @@ -128,7 +128,7 @@ where let bytes = resolve(slice.clone(), resolve_data).ok_or(Error::ResolveFailed { pack_offset: slice.start, })?; - let entry = data::Entry::from_bytes(bytes, slice.start, hash_len); + let entry = data::Entry::from_bytes(bytes, slice.start, hash_len)?; let compressed = &bytes[entry.header_size()..]; let decompressed_len = entry.decompressed_size as usize; decompress_all_at_once_with(&mut inflate, compressed, decompressed_len, out)?; @@ -304,7 +304,7 @@ where let bytes = resolve(slice.clone(), resolve_data).ok_or(Error::ResolveFailed { pack_offset: slice.start, })?; - let entry = data::Entry::from_bytes(bytes, slice.start, hash_len); + let entry = data::Entry::from_bytes(bytes, slice.start, hash_len)?; let compressed = &bytes[entry.header_size()..]; let decompressed_len = entry.decompressed_size as usize; decompress_all_at_once_with(&mut inflate, compressed, decompressed_len, out)?; diff --git a/gix-pack/src/data/entry/decode.rs b/gix-pack/src/data/entry/decode.rs index 37b8560ac03..6b38be0f33d 100644 --- a/gix-pack/src/data/entry/decode.rs +++ b/gix-pack/src/data/entry/decode.rs @@ -5,6 +5,14 @@ use gix_features::decode::{leb64, leb64_from_read}; use super::{BLOB, COMMIT, OFS_DELTA, REF_DELTA, TAG, TREE}; use crate::data; +/// The error returned by [data::Entry::from_bytes()]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +#[error("Object type {type_id} is unsupported")] +pub struct Error { + pub type_id: u8, +} + /// Decoding impl data::Entry { /// Decode an entry from the given entry data `d`, providing the `pack_offset` to allow tracking the start of the entry data section. @@ -12,7 +20,7 @@ impl data::Entry { /// # Panics /// /// If we cannot understand the header, garbage data is likely to trigger this. - pub fn from_bytes(d: &[u8], pack_offset: data::Offset, hash_len: usize) -> data::Entry { + pub fn from_bytes(d: &[u8], pack_offset: data::Offset, hash_len: usize) -> Result { let (type_id, size, mut consumed) = parse_header_info(d); use crate::data::entry::Header::*; @@ -36,21 +44,17 @@ impl data::Entry { TREE => Tree, COMMIT => Commit, TAG => Tag, - _ => panic!("We currently don't support any V3 features or extensions"), + other => return Err(Error { type_id: other }), }; - data::Entry { + Ok(data::Entry { header: object, decompressed_size: size, data_offset: pack_offset + consumed as u64, - } + }) } /// Instantiate an `Entry` from the reader `r`, providing the `pack_offset` to allow tracking the start of the entry data section. - pub fn from_read( - r: &mut dyn io::Read, - pack_offset: data::Offset, - hash_len: usize, - ) -> Result { + pub fn from_read(r: &mut dyn io::Read, pack_offset: data::Offset, hash_len: usize) -> io::Result { let (type_id, size, mut consumed) = streaming_parse_header_info(r)?; use crate::data::entry::Header::*; @@ -78,7 +82,12 @@ impl data::Entry { TREE => Tree, COMMIT => Commit, TAG => Tag, - _ => panic!("We currently don't support any V3 features or extensions"), + other => { + return Err(io::Error::new( + io::ErrorKind::Other, + format!("Object type {other} is unsupported"), + )) + } }; Ok(data::Entry { header: object, diff --git a/gix-pack/src/data/entry/mod.rs b/gix-pack/src/data/entry/mod.rs index 6225cbf0115..13788663b19 100644 --- a/gix-pack/src/data/entry/mod.rs +++ b/gix-pack/src/data/entry/mod.rs @@ -45,7 +45,9 @@ impl Entry { } } -mod decode; +/// +#[allow(clippy::empty_docs)] +pub mod decode; mod header; pub use header::Header; diff --git a/gix-pack/src/data/file/decode/entry.rs b/gix-pack/src/data/file/decode/entry.rs index dc306baf33a..5613d70ee74 100644 --- a/gix-pack/src/data/file/decode/entry.rs +++ b/gix-pack/src/data/file/decode/entry.rs @@ -100,18 +100,10 @@ impl File { .map_err(Into::into) } - fn assure_v2(&self) { - assert!( - matches!(self.version, crate::data::Version::V2), - "Only V2 is implemented" - ); - } - /// Obtain the [`Entry`][crate::data::Entry] at the given `offset` into the pack. /// /// The `offset` is typically obtained from the pack index file. - pub fn entry(&self, offset: data::Offset) -> data::Entry { - self.assure_v2(); + pub fn entry(&self, offset: data::Offset) -> Result { let pack_offset: usize = offset.try_into().expect("offset representable by machine"); assert!(pack_offset <= self.data.len(), "offset out of bounds"); @@ -246,7 +238,7 @@ impl File { }); use crate::data::entry::Header; cursor = match cursor.header { - Header::OfsDelta { base_distance } => self.entry(cursor.base_pack_offset(base_distance)), + Header::OfsDelta { base_distance } => self.entry(cursor.base_pack_offset(base_distance))?, Header::RefDelta { base_id } => match resolve(base_id.as_ref(), out) { Some(ResolvedBase::InPack(entry)) => entry, Some(ResolvedBase::OutOfPack { end, kind }) => { diff --git a/gix-pack/src/data/file/decode/header.rs b/gix-pack/src/data/file/decode/header.rs index fb23f482d0d..84568352a59 100644 --- a/gix-pack/src/data/file/decode/header.rs +++ b/gix-pack/src/data/file/decode/header.rs @@ -66,7 +66,7 @@ impl File { if first_delta_decompressed_size.is_none() { first_delta_decompressed_size = Some(self.decode_delta_object_size(inflate, &entry)?); } - entry = self.entry(entry.base_pack_offset(base_distance)) + entry = self.entry(entry.base_pack_offset(base_distance))? } RefDelta { base_id } => { num_deltas += 1; diff --git a/gix-pack/src/data/file/decode/mod.rs b/gix-pack/src/data/file/decode/mod.rs index 3db3eec51ad..5746dc8e000 100644 --- a/gix-pack/src/data/file/decode/mod.rs +++ b/gix-pack/src/data/file/decode/mod.rs @@ -17,6 +17,8 @@ pub enum Error { ZlibInflate(#[from] gix_features::zlib::inflate::Error), #[error("A delta chain could not be followed as the ref base with id {0} could not be found")] DeltaBaseUnresolved(gix_hash::ObjectId), + #[error(transparent)] + EntryType(#[from] crate::data::entry::decode::Error), #[error("Entry too large to fit in memory")] OutOfMemory, } diff --git a/gix-pack/src/data/output/entry/mod.rs b/gix-pack/src/data/output/entry/mod.rs index ed79a404d5a..188ecf51174 100644 --- a/gix-pack/src/data/output/entry/mod.rs +++ b/gix-pack/src/data/output/entry/mod.rs @@ -38,6 +38,8 @@ pub enum Kind { pub enum Error { #[error("{0}")] ZlibDeflate(#[from] std::io::Error), + #[error(transparent)] + EntryType(#[from] crate::data::entry::decode::Error), } impl output::Entry { @@ -74,7 +76,11 @@ impl output::Entry { }; let pack_offset_must_be_zero = 0; - let pack_entry = data::Entry::from_bytes(&entry.data, pack_offset_must_be_zero, count.id.as_slice().len()); + let pack_entry = match data::Entry::from_bytes(&entry.data, pack_offset_must_be_zero, count.id.as_slice().len()) + { + Ok(e) => e, + Err(err) => return Some(Err(err.into())), + }; use crate::data::entry::Header::*; match pack_entry.header { diff --git a/gix-pack/src/index/traverse/error.rs b/gix-pack/src/index/traverse/error.rs index 2310c3babca..f6cc7506548 100644 --- a/gix-pack/src/index/traverse/error.rs +++ b/gix-pack/src/index/traverse/error.rs @@ -12,6 +12,8 @@ pub enum Error { Tree(#[from] crate::cache::delta::from_offsets::Error), #[error("The tree traversal failed")] TreeTraversal(#[from] crate::cache::delta::traverse::Error), + #[error(transparent)] + EntryType(#[from] crate::data::entry::decode::Error), #[error("Object {id} at offset {offset} could not be decoded")] PackDecode { id: gix_hash::ObjectId, diff --git a/gix-pack/src/index/traverse/mod.rs b/gix-pack/src/index/traverse/mod.rs index 29abeac9701..e6731554a0a 100644 --- a/gix-pack/src/index/traverse/mod.rs +++ b/gix-pack/src/index/traverse/mod.rs @@ -161,7 +161,7 @@ impl index::File { C: crate::cache::DecodeEntry, E: std::error::Error + Send + Sync + 'static, { - let pack_entry = pack.entry(index_entry.pack_offset); + let pack_entry = pack.entry(index_entry.pack_offset)?; let pack_entry_data_offset = pack_entry.data_offset; let entry_stats = pack .decode_entry( @@ -169,9 +169,10 @@ impl index::File { buf, inflate, &|id, _| { - self.lookup(id).map(|index| { - crate::data::decode::entry::ResolvedBase::InPack(pack.entry(self.pack_offset_at_index(index))) - }) + let index = self.lookup(id)?; + pack.entry(self.pack_offset_at_index(index)) + .ok() + .map(crate::data::decode::entry::ResolvedBase::InPack) }, cache, ) diff --git a/gix-pack/src/multi_index/verify.rs b/gix-pack/src/multi_index/verify.rs index 1e38ae7d9cf..2f645e6c881 100644 --- a/gix-pack/src/multi_index/verify.rs +++ b/gix-pack/src/multi_index/verify.rs @@ -286,6 +286,7 @@ impl File { TreeTraversal(err) => TreeTraversal(err), PackDecode { id, offset, source } => PackDecode { id, offset, source }, PackMismatch { expected, actual } => PackMismatch { expected, actual }, + EntryType(err) => EntryType(err), PackObjectMismatch { expected, actual, From bad5b48e4f0d865b0b0937f136d9a0041aa88046 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 12 May 2024 16:58:20 +0200 Subject: [PATCH 7/9] adapt to changes in `gix-pack` --- gix-config/tests/mem.rs | 2 +- gix-macros/Cargo.toml | 2 +- gix-odb/src/store_impls/dynamic/find.rs | 28 +++++++++++++---------- gix-odb/src/store_impls/dynamic/header.rs | 18 ++++++++------- gix-pack/tests/pack/data/file.rs | 6 ++--- gix-pack/tests/pack/index.rs | 4 ++-- gix-pack/tests/pack/iter.rs | 2 +- 7 files changed, 34 insertions(+), 28 deletions(-) diff --git a/gix-config/tests/mem.rs b/gix-config/tests/mem.rs index d8ceb761697..0ae0a1b829a 100644 --- a/gix-config/tests/mem.rs +++ b/gix-config/tests/mem.rs @@ -4,7 +4,7 @@ use std::alloc; use std::time::Instant; #[global_allocator] -static ALLOCATOR: Cap = Cap::new(alloc::System, usize::max_value()); +static ALLOCATOR: Cap = Cap::new(alloc::System, usize::MAX); #[test] fn usage() { diff --git a/gix-macros/Cargo.toml b/gix-macros/Cargo.toml index 8662eaf2af9..e86591704cf 100644 --- a/gix-macros/Cargo.toml +++ b/gix-macros/Cargo.toml @@ -14,7 +14,7 @@ include = ["src/**/*", "LICENSE-*"] rust-version = "1.65" [lib] -proc_macro = true +proc-macro = true [dependencies] syn = { version = "2.0", features = ["full", "fold"] } diff --git a/gix-odb/src/store_impls/dynamic/find.rs b/gix-odb/src/store_impls/dynamic/find.rs index b420dc6ec73..13608b7c876 100644 --- a/gix-odb/src/store_impls/dynamic/find.rs +++ b/gix-odb/src/store_impls/dynamic/find.rs @@ -19,6 +19,8 @@ pub(crate) mod error { LoadIndex(#[from] crate::store::load_index::Error), #[error(transparent)] LoadPack(#[from] std::io::Error), + #[error(transparent)] + EntryType(#[from] gix_pack::data::entry::decode::Error), #[error("Reached recursion limit of {} while resolving ref delta bases for {}", .max_depth, .id)] DeltaBaseRecursionLimit { /// the maximum recursion depth we encountered. @@ -144,19 +146,21 @@ where } }, }; - let entry = pack.entry(pack_offset); + let entry = pack.entry(pack_offset)?; let header_size = entry.header_size(); - let res = match pack.decode_entry( + let res = pack.decode_entry( entry, buffer, inflate, &|id, _out| { - index_file.pack_offset_by_id(id).map(|pack_offset| { - gix_pack::data::decode::entry::ResolvedBase::InPack(pack.entry(pack_offset)) - }) + let pack_offset = index_file.pack_offset_by_id(id)?; + pack.entry(pack_offset) + .ok() + .map(gix_pack::data::decode::entry::ResolvedBase::InPack) }, pack_cache, - ) { + ); + let res = match res { Ok(r) => Ok(( gix_object::Data { kind: r.kind, @@ -230,7 +234,7 @@ where let pack = possibly_pack .as_ref() .expect("pack to still be available like just now"); - let entry = pack.entry(pack_offset); + let entry = pack.entry(pack_offset)?; let header_size = entry.header_size(); pack.decode_entry( entry, @@ -239,10 +243,10 @@ where &|id, out| { index_file .pack_offset_by_id(id) - .map(|pack_offset| { - gix_pack::data::decode::entry::ResolvedBase::InPack( - pack.entry(pack_offset), - ) + .and_then(|pack_offset| { + pack.entry(pack_offset) + .ok() + .map(gix_pack::data::decode::entry::ResolvedBase::InPack) }) .or_else(|| { (id == base_id).then(|| { @@ -398,7 +402,7 @@ where } }, }; - let entry = pack.entry(pack_offset); + let entry = pack.entry(pack_offset).ok()?; buf.resize(entry.decompressed_size.try_into().expect("representable size"), 0); assert_eq!(pack.id, pack_id.to_intrinsic_pack_id(), "both ids must always match"); diff --git a/gix-odb/src/store_impls/dynamic/header.rs b/gix-odb/src/store_impls/dynamic/header.rs index 1e4370a91ee..2ebcc73efb4 100644 --- a/gix-odb/src/store_impls/dynamic/header.rs +++ b/gix-odb/src/store_impls/dynamic/header.rs @@ -72,10 +72,12 @@ where } }, }; - let entry = pack.entry(pack_offset); + let entry = pack.entry(pack_offset)?; let res = match pack.decode_header(entry, inflate, &|id| { - index_file.pack_offset_by_id(id).map(|pack_offset| { - gix_pack::data::decode::header::ResolvedBase::InPack(pack.entry(pack_offset)) + index_file.pack_offset_by_id(id).and_then(|pack_offset| { + pack.entry(pack_offset) + .ok() + .map(gix_pack::data::decode::header::ResolvedBase::InPack) }) }) { Ok(header) => Ok(header.into()), @@ -129,14 +131,14 @@ where let pack = possibly_pack .as_ref() .expect("pack to still be available like just now"); - let entry = pack.entry(pack_offset); + let entry = pack.entry(pack_offset)?; pack.decode_header(entry, inflate, &|id| { index_file .pack_offset_by_id(id) - .map(|pack_offset| { - gix_pack::data::decode::header::ResolvedBase::InPack( - pack.entry(pack_offset), - ) + .and_then(|pack_offset| { + pack.entry(pack_offset) + .ok() + .map(gix_pack::data::decode::header::ResolvedBase::InPack) }) .or_else(|| { (id == base_id).then(|| { diff --git a/gix-pack/tests/pack/data/file.rs b/gix-pack/tests/pack/data/file.rs index 6a279fa84c0..a4967570e75 100644 --- a/gix-pack/tests/pack/data/file.rs +++ b/gix-pack/tests/pack/data/file.rs @@ -103,7 +103,7 @@ mod decode_entry { } let p = pack_at(SMALL_PACK); - let entry = p.entry(offset); + let entry = p.entry(offset).expect("valid object type"); let mut buf = Vec::new(); p.decode_entry( entry, @@ -159,7 +159,7 @@ mod resolve_header { } let p = pack_at(SMALL_PACK); - let entry = p.entry(offset); + let entry = p.entry(offset).expect("valid object type"); p.decode_header(entry, &mut Default::default(), &resolve_with_panic) .expect("valid offset provides valid entry") } @@ -208,7 +208,7 @@ mod decompress_entry { fn decompress_entry_at_offset(offset: u64) -> Vec { let p = pack_at(SMALL_PACK); - let entry = p.entry(offset); + let entry = p.entry(offset).expect("valid object type"); let size = entry.decompressed_size as usize; let mut buf = vec![0; size]; diff --git a/gix-pack/tests/pack/index.rs b/gix-pack/tests/pack/index.rs index ddc90cc87c0..e554f50611d 100644 --- a/gix-pack/tests/pack/index.rs +++ b/gix-pack/tests/pack/index.rs @@ -398,7 +398,7 @@ fn pack_lookup() -> Result<(), Box> { let sorted_offsets = idx.sorted_offsets(); assert_eq!(num_objects, sorted_offsets.len()); for idx_entry in idx.iter() { - let pack_entry = pack.entry(idx_entry.pack_offset); + let pack_entry = pack.entry(idx_entry.pack_offset)?; assert_ne!(pack_entry.data_offset, idx_entry.pack_offset); assert!(sorted_offsets.binary_search(&idx_entry.pack_offset).is_ok()); } @@ -410,7 +410,7 @@ fn pack_lookup() -> Result<(), Box> { ); let mut buf = vec![0u8; entry.decompressed_size as usize]; - let pack_entry = pack.entry(offset_from_index); + let pack_entry = pack.entry(offset_from_index)?; assert_eq!( pack_entry.pack_offset(), entry.pack_offset, diff --git a/gix-pack/tests/pack/iter.rs b/gix-pack/tests/pack/iter.rs index e814e2c4b6e..c9dd7d445c6 100644 --- a/gix-pack/tests/pack/iter.rs +++ b/gix-pack/tests/pack/iter.rs @@ -37,7 +37,7 @@ mod new_from_header { let mut buf = Vec::::new(); entry.header.write_to(entry.decompressed_size, &mut buf)?; let new_entry = - pack::data::Entry::from_bytes(&buf, entry.pack_offset, gix_hash::Kind::Sha1.len_in_bytes()); + pack::data::Entry::from_bytes(&buf, entry.pack_offset, gix_hash::Kind::Sha1.len_in_bytes())?; assert_eq!( new_entry.header_size(), From 31d02a89ee15a0df463a5b5c34864497f8adfae1 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 13 May 2024 09:39:53 +0200 Subject: [PATCH 8/9] fix: default to creating file-symlinks if it is dangling on Windows (#1354) This behaviour is the same as in Git. --- gix-fs/src/symlink.rs | 12 +++- .../make_dangling_symlink.tar.xz | Bin 0 -> 9812 bytes .../tests/fixtures/make_dangling_symlink.sh | 11 ++++ gix-worktree-state/tests/state/checkout.rs | 61 ++++++++++++------ 4 files changed, 65 insertions(+), 19 deletions(-) create mode 100644 gix-worktree-state/tests/fixtures/generated-archives/make_dangling_symlink.tar.xz create mode 100644 gix-worktree-state/tests/fixtures/make_dangling_symlink.sh diff --git a/gix-fs/src/symlink.rs b/gix-fs/src/symlink.rs index 55798daa3e9..c29d030d0f8 100644 --- a/gix-fs/src/symlink.rs +++ b/gix-fs/src/symlink.rs @@ -31,10 +31,20 @@ pub fn remove(path: &Path) -> io::Result<()> { #[cfg(windows)] /// Create a new symlink at `link` which points to `original`. +/// +/// Note that if a symlink target (the `original`) isn't present on disk, it's assumed to be a +/// file, creating a dangling file symlink. This is similar to a dangling symlink on Unix, +/// which doesn't have to care about the target type though. pub fn create(original: &Path, link: &Path) -> io::Result<()> { use std::os::windows::fs::{symlink_dir, symlink_file}; // TODO: figure out if links to links count as files or whatever they point at - if std::fs::metadata(link.parent().expect("dir for link").join(original))?.is_dir() { + let orig_abs = link.parent().expect("dir for link").join(original); + let is_dir = match std::fs::metadata(orig_abs) { + Ok(m) => m.is_dir(), + Err(err) if err.kind() == io::ErrorKind::NotFound => false, + Err(err) => return Err(err), + }; + if is_dir { symlink_dir(original, link) } else { symlink_file(original, link) diff --git a/gix-worktree-state/tests/fixtures/generated-archives/make_dangling_symlink.tar.xz b/gix-worktree-state/tests/fixtures/generated-archives/make_dangling_symlink.tar.xz new file mode 100644 index 0000000000000000000000000000000000000000..4d737dbd1f1b020e2e25deebc900ae9785a04d61 GIT binary patch literal 9812 zcmV-aCac-~H+ooF000E$*0e?f03iVs00030=j;jK-~T2NT>uvgyc~T2mB1Z8f})DV zo{cYQ-SvMkK=)Q#6n3S0F?tQM*-3aSwTv`B)gYWCI?smUIEj`S;iyZc>X_c2B)V7) z&kD&~xgi(3Xr~LQN^y{gH^%PvA4DHD*!xgQCieDt=v8&A-hTA*hIL=sw7^h;`P@|m zT`-N9x|3wdob(n`*A_q1IU2&z=qMs~(0~q=n2QqA0EkoNmT`5IHG8xYYWQgtS^!wg zL5owLaSbrh-WQ?~1&Y<8kb<;yRmaKh8Nyp1*O{ZGcSXa4p zXl7nG1T;+_b;cLkVTDRit3Uxp0fw<+Zm;qi303u|^)PTeMjV^QBKK3v|aO~R^e1}Zic52Vm-H|_V} z5X2wDf7Umzp(C?LJ4jk?O)1%H`#q#KwYp_~kgC6#oj{*#&V_i4EeU(7ieHTFS~5=f z%kAa{mq8I4{a(iC5OswilM^)r8W_P}eGtQlZRp8~N~a%0ljpLltML?GS0l@jX{8^a zHC(@jU^tSy9VGc4&cAdfoG*(MyedV)96lBDHg}>+|46L3g;pvEbOD!>Ov>zIj4V0V zl^2%S6JBBh>-hlMSfyQPk+no9q!M&B*u6A5@uk4A1X1rZlVH>WE!ZaRIzO=jF1I z+Z2(0m_>%&y$coT*j)kE(4v}zmUkxHvtLw=@FJbL+#p=P?*<`O6DuBF_WfL5^?%5E zJXs3xMqS}Y5dBn*_*!{?d^AwtHU?8yZpO_a2f$@|d65p!fNTI?&m#g)JyHEfcqz;x z+K5#Q-bZ!$j1KVPK$~zrhrSq{C&@ZA%62OEk6j`1)ueXO=@7Yky+pOVetHER$1HTd@Wm|q_P*k7+N5W zjctu>m*ECrsPUQ&kv@}x@*s*Oh=MB4mRbwsCde$3=&53z%jS^WP}OVKr>qR-LYLEC z40ybvZ!r2mXhS&tT6^Gd`D=_ZrlH?`qn{Wj>R-+C@OhZnj`9)H4*@*y+hpUKlNs}+ z_egk%tePZ?I`yF{{DZk~mA{VPL}rtKFxI-uu`XWW$Hx=t6KaI5YMfnWk^>=w^ zi_LkdzsC%GFoiv%w0Dg+;zU0cE4u>*fcdxMGeBgl30eyWj^7EZdhBhC)PSE&zs$6} z6+uB~sU`Rqc9-7wx1duwu>pBdOI-~t*rZ>vsmg%S>Vfs#&ow+`LO2uG9rMp^!pVSG zjmLmKDHLVW6d`INLl6CmdR8e!4B&gH9A%uSaW@T0z&H97~^YXoc$SpdH(Hg=M)K6#L`r$+6B zP=r3j)AFjC<-fW+D-tsX>ngm+!wwj%l&jOWl_?}4#xp2Ef^&ZQ8kXLw*$D%E?zQQ@z+oz%E&&%|{3DeSD#Q9k2w0RV z>mb&Hj2+NTymD<<8ydvA10XBA!s=u4x1N1g4}bZShs0#Wv^rhi4;#WvG%|l#z;??T;3w zjtT_~WGR|;Txm`?8Nloo)m_&iae5Z|bC@7)i&MJZMm=1|E*RB0chtjfx$DR42{mN* z_B3#iFwUK~Hg30jPM?^2sCA_f5PY09$|F4d8?8S&ySqb7qI01=KD_y9reNACyn*#q zenf88N8hA4j|m)4*L60_;WDa`SK0o1DaVM_IVh(WZUx&58SJ$RApHx`T4|{=Bf)@c zi*z(TGl?!ltt%A^aJyqjS4pD7h}sq$b87Eqp8!|XSZF764>75zzH>Wk^S4GnMd2By z+kcEC4{r_G3_OVW#eWB50e#8K(vx-}vwUXE& z<xp`s14eML=^wzCuM%IF8cZ2(eJs z>pz(r;~o52q?VPFB#l(}^|e2j)Ww{JPt+KbGO&X*za>#CP;8HlTO4`##~(Y2L$FoD z$qN8RF$+;7p@KH^jX)2u4ORyklo_mSpsEPs4XDh+QiUBR#NBl9DtH&T+SeDkiBAJ) z@9^hxYLp#pSwYUZZnfFLDh!FIVg zz9$B}sxMi|RioV|4Kiuc`WLfg_-9A7vvh$6Yox|8lh>ui-C)k#w&pWpI3-dv4%?=` zB@or=Q|RR&(`<5%temoN3`+(eS1qP50W_A00HNGEeb}bc)eQF?`k5`mhLnF&j#5hh zUR-1HgY$Vpad+9XwnG)kPAcCnMIB!!nc@DFiEYZe&QA_g4VEfap^hQsLJrB|+JQbpWL1Z9^x1)GFg69g0Z$R&smjdJ$ILG|#aCepw`CcgUL=Ld^R zj{Zw*_(lOp>prK@XOjjhI;{q9q+GlycAA`$aP4)AHwn6O1dEag(^(klw3l*ek)iI= zP{Ze&$ak~GrSJ5@M%M8xtA?`}8%Rt5(OZt!N3qjta z3>5_OnNKT5qic6FF9`0lz%6>0Pf~PN#i)JbtZeZCzd;ai9~e#U>HC}464K&+WrN?( z#~l}(2((UzbiCEIFut?kWzrCS-40zKITD=A40{q9L-a391HwcLwENml2B%@_f9Mv$ zxvEqs+Lk@q1-%mIC(Fg6)L>qc3%Lty#S=3DQt#7D%d_m#q8&O|!vZuGlJbbP>_&U2 zo2z~#M6h5HPzksw#ksc`Qtlxqr8|E6vpGjfzeaW}QmV#)Z;}^B zP)BCX_0x4J7SiTC7fm1veP>Gww`0pKlUMBKf!h(LD7;UtN~nz}#*Y&M(Y}HYWA~$R z?l!MOff9K?uRqFUt$>cu!AZ_uW;O(M1?+u{yj_o>SI=#JzS$yPRmlu z+$dmLKJH|xXrQpmDu3oI84~&Ym)NXLu+0e<7-k!Vp!X@J%_g!DTP;{+J%10dV5@ag z?FVF@9kG%D%TCULQkGD-!v&R+bceywx_Pe4I5QRCwl`Zcx#{`Use(W&K9=q`AKWeH zT{3MQ6!Gzt)OJoo52n4N|756mzql~MI}SjOlEHmp&=fO9ro~p{ZMp#M5oOz6fUyWZ zg9XB7bd`y}@Z>(Gu!A2eG9lnIcVt<`^`uQhfSZ7%hp=QsbWk%Urug92VM5-7jgbl7 z;f}jsEdXDK+*VO-)P$m1EEoj?Q8+o;cA5gsqT!k(6h7i<5S+gV{|&Z34k>;0{&36jB6|`IO_5vPXc@h*to~*&_O31;U^)dW`eZJ1h>Zratjq zVmGTJs3d}rM2$C=@1Jzzs8|*Htff~wN7c%(fPvYq%<83mB50*+Sgx@uBTj<1$Xif> zV+$-VRx|H^i;GT#NsLKc`Q=QSgrNt%W_Liu@F?@hP3w9e(FYB5-E7HA8tFVC{~xyw zvR?rhFJP^AMo}FAIxkzIO@d`V_StU(3$Q_R^q@gS6nVQqg6Ce-1}N^!wFp5IASAmQVL*O(F3 zN{blss8H|O5_|*jO+3c~KcHGy-1Kg~rPg|aWdT{2C#}{k?I1ycv@S(}@tvrOkKz-& zELkCdb07xp*o_P&hX}Rp9uqO(FR~Aw*o7iyP6&@OpTTbX!thz8ZNs3>$}mL$e3HpF z5;do3tOUhR*OgMcAm-;ebKzm1PgTowR;*BQ!GD-?ALE53Wb+A4U<@qC2-h7#KkCso zvnw<9^ifB0JbWHoYgmw_@goYq(pb=sNx)C(^j13LilwJy-f0mR(rarh-PNrb`k;j> z77GRdy$%Hq1=cqVrW7Q3vH)Y0u|~FNAIw4XwPzINA1bGQ&jVu+&#n6)H1ir7jT(E3 zrOR_tuRlDe(Cx?Oo?94zWIsUoqOqRYw_`l%z@JHHYe|_wHy{54tR8Kefz*@(NT!ZZq^|dn+;QQ;w78p_7W`uY&kN9+v z(!8K7yopk%oH*bj8pr8IOdVsp*T48|jjs;XQ=v78E}mcMJ-Ce|SB|A5Aa#C5UUrg_ z%Fi=I0(xIuyV9Ep?Vg*deQwSe&Z@%5_GdnBtp|ixCt^u*Fv{w3+N- z&Utn*YfUa<1;B-oj*Wk7^5i7o#8!F{P*WGv)*raW78iURage(k7Yw~gb##k)!KFnzs4zhRq(Su`u6!~#)V#)4 z+29c#gdq-!p%P|Jjq?`w)&O>aCC>jANUy_;n6r}<5YK^|f> zT#zO!tf8`*VH}m4%mc0BoN}n5E@zbp7mpEi6WxVPB|Up=p_MW&jGx31y5)6vMq>y$ z-H9ID?_jcE#pXEp=aFRX3h(ukcNQ#v$+L|>*`eW@q_5&qzp(uHI@&zbNAdLdU;)-@1+@UocY5k@*3AK!rafX?NE&5)-^L!VI4XNmqgK-r zpMxR(x-6K{lC1?gzr7+N-!* zQ-etd61SRyD2g_7b8VSRgHJlK_t*vJxf$Y&`l;+twaI&uL6nDP2&c6U`^Q8yF0K?n z?pQe0Bgo@?kU-3Y)*k!3eVmS8g-uNrd>POH5d+MnV+o;BqE|2EbGOBH1(?q=R_ono zx~)&HmI8M_6CeQlA7BQHSLNRS1Kn+6v!AA(Fu;g@pz4Y8p5 zZf4#yK={v83slcEJJRU}A;p<~0st55{(p3}HG2Nh)_1Ceia+#Zy)`9|t8d5%RffUG zvGBlyGh_rj-_9+N9dO;Ymf`DLBD{0PRd}@*ZGrr;Kx_*H`H>V{g0DZUC?MgFE!e2F zfjdrzJib%NZ_AvaC*h3ttDoFtN=(}=Y}L}^1-A~R6In;IsbNF`?4Wrad*PunnGh6n zE9BV>Gkk2<9hHq=o4F^$>Fhx_rp2x}PJ=!caJ=(#SAU^EsFx?6g_I@q=kdQ(@WI6u zddc1_2*^-`A8LX{Eqj$n+i|U4#m7mSBVTUVKEi+dZ(rdP>!}s>OvDh+C{OwJg*8v+ z^3LEwo-*QDkd&Axo_%!asnngh(vBXbI71!}(HN{4JB0~CtAjk9Q{gwPhmjvY1g|r- zPXx1l4RV{pJlER)xk{&WQH4@-I=#$9bIsL%p}!dN8yCqxa*|!5zLzS3dm^rVB#3El zglqPJQ}ho@0son=@z(nrzB0;)$eW z^EAa@#}WGu z?gdE3lrmIsPW0jV*)asgfcGOHMHz`Y-OUq@4`QY6yZ>#jI}IZ~qO0P{e#}^dYF*&o zPMfh33cZrd(krk+Yj~A>r&%#qQbBp2NSfQ5srbe~O znF}O>c+;6XG5Iaqpe+V7Fk{0Y)dh4gge-Fh>x#x7vl?oq)RBcgRquE7ep~9}3#%P?xh$C%m0I8d3!l=!Ii%i#M=}Q;o z9=aw|$o7uCY_C9P6fa@;h`=A)_aCK%&<%EnNNAgL8_vo;1Bq6r62-NJAiBIGeAR4S8Nst_A14 zI!>^66RDhE1MVy@#np}rOfHPR(#Bf?SY&nKI@^#@Z4i#6jg{rt%H*$31I(LYz+Zd_ zCzk+SDZ~&%g8k|1QjyPm4r&I1*Fvip#uf`IV-Dgk=y0J{-hnGi;2si-T7z8Z7YR=^ z?)0J$16mWLhgfsaoSYPkyBSxF0u%>?v|!a&3H!FGY$9wd&>CmOqdN?XBMp}n@`tJddkgTSXjvSon_Ou%z$zg zjt`ggq+oADVs78?;-ygz`HIx4Y6|vWo#n`3xN$$7IbRptf$SLZ7&()(4A;Q+n#5p1 zXP&5NYQ`Np%u8+%jvs%Gg=5g|R{9&j>^lxU$Q_1U)^o!LEUztC8a#78IH1Nl=tEAy z{EkwL&E|f?95B{Phh!FcUifZ%zOF?Pg8xaqqxFg4{scGPAG8kqZi|PEoZdHADwIg!Lw#7I^gT||oK_|$hhcxaEb9-`m;=eV)0rAyHJJiTkWA7$3`R{sZnsq!s z@`k|4_=YKAJ`g!90=?;>6``y{1@<+tnZOSD*Vx;It}#Qvu4rT*rYF}hh~T2mQhRuz zpFu`3!WUSR}V`{_f-5fCy8 zzISF>p)#mal6>!KK!qJ9m@{5WidEAMD&uAg^&A)}XZsJyt6&{k=T?7L2j0BUf>a>F zeZQ@q%I*#&x`df+srL!p|8rSG0kL0make*{o{p1qAyVI6`0eN*uU?CqBI5#>tevMb zL}^St;+cj%NKct)@()#18;s%fozdsCdTE~T0)`sHS0VWdjN2b=sSmc)2s9h1d#P>yqlAlM!JZPM9%OZ-40 z;>KK!%I3GL>@Vjc(8nmBitQ_9o@c^U;Eyv!d+X9Z1pr&7=d2DP|YCUnjXJX~YCbiE$Xz5YS9bBcUsM^Qm&g91IWmE;-=K z0L*_Eiu}>)Fl}RS-FF{Z2b5HEnb@__WGe`GM-7^hczdcjZ7evcYl`4VlN$gFVB{nC zCPOg$wSS0bMe~x!ZA2z2k2V$PVf?Gn159!8l}1TpD^04c(yr^JUQcP}98L7UmUb%G z4BUAngYJ87mK{ytNX`-H!Nd-^7ojBMCQf~=Z>Uy8IQDkyprMrQgQh$zek+%x2XAEc zVv7*e>gFBHLA2T~fe61LA@c%Rs;*oywehC0K69~y2?@P#8inpuz7aMgCDaMqseZI# ze%@%6{4`a^=zoMgGWItsVk*!~Vk~}xV`~VC;}J3PUB1ecQmc*H3UR`EU#yixUzWA~ zm^#+{SflG)`z7gXt2;}BA1;)zjp0;(bBX+c}EO&17qZp=`ykq z1w@DOaCdeG>9#7w-0ld3J;YZvWu{MABD+WVys_&(yfC0bp9iSaojzZ5B5&OSIHEh} zG9G7#KlON&0ZXKNk@xaP3<2!FnFZ0vp3X;o$a74zMwMT^)F~rER}-^PSfcb*rWU-O zH>W8kgC4DB6TWot%#LS2%!pwzF2K2yED7Lrj}TfjseD;`Gz&y+%sj?}|Q?z+?! z>OyGgE2t@=tA22ax1{e-p8Qn<1=~W)^vTds$ey(oF57nFE0-QT z=m8Y8MupqzEgXb&H!K&mf7eP>h3+Prc36kC6bnOoN&r4oT z4G@+YnguIy<);H2dIf~H+^fOM&ADliq|_iJ#={eK9Idf6+p#6e1#fQid}J`98_*|g zn2?69IDW|`4rSgaJ7hduaKN}(hsbjEuVVi<^?GdmJ;8{X6z*m@d79x5XOZ*X%FVc0 zB7R~U?%WI*HZ7WRZ1m4AZ^+}UKegpb@Ikyhfrfhy8u4;bt+(yktX1)I(AOCmj8aP2 zv zko*M|ygRt=ji&Xxo4i6J_9fLRCS)oAZ!;(>ycP~SiGb9-S`8+78yb6{BX0pq;e3*1 zQ_ANDVa6;#?|!P{z-ZG^x|}KKZbOZG}%Xqw_*Ue2?z}pGNKKBwue={@rfPUzxn^#<1)Ua zK{tW&WGSk0RFpxl;+RdAqvFoiDsEu()&g^)Y!S6Cb=xdPIbKQq6ZXp611>Sr_)#T~ zCN-Tu)Bd9?{@3&6VOd8|NHME$geQHu2;mVcrdn*A0p~aHEdo#@Y7o7))BSY>cK?&w zAhOP#(ybknk#d4sja9#v%42>C3VLU;vWK^cy!Z~;=vsaVF}GH4}^L$ zao;8X!X_s;OU+(zDdZn^0M(IuAk+Ai0OLxFfJZUjhcwS4C~De@7U;GOGr9i=GeX`3 z%kirvxWnunwnfqbejo>?xauX_-(DJVW+=owaiNGFbFa?jnRXkQ-aLRl!6$%cf}Y7C z_1$=?r944x07`|=@(1Y^RiRF{nKcL&09c{}f1m(e<;DymBbuC+O0U<;PUuz<&5V)> zwJ4-6NK}|v*E++L3U%f2uNV~tSU7oWwdZK3x4v&gHgRPmX>NVM=+wa=_y(Ca@&$^! z56*nZ>@_FJ8STb0f{)21V`Z8imca`V1(>Ao$>xjZR)|F$HC3wZsMP1-vKVo*S=dUJf-zxK==h0-RzDkZ=CP^ym%X zJ_vp#rm8(_wvB1XkQ>XuzOy80fe|LXNMhD0q4d*?fH9;h&LC9U%z^U4)Oe8J6}7z2 uh~?B-3n`5N0002LB`fy6Uc-z40j*4czykms5D%=e#Ao{g000001X)@(gUWpX literal 0 HcmV?d00001 diff --git a/gix-worktree-state/tests/fixtures/make_dangling_symlink.sh b/gix-worktree-state/tests/fixtures/make_dangling_symlink.sh new file mode 100644 index 00000000000..eb2b6a8edf6 --- /dev/null +++ b/gix-worktree-state/tests/fixtures/make_dangling_symlink.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -eu -o pipefail + +git init -q + +target_oid=$(echo -n "non-existing-target" | git hash-object -w --stdin) + git update-index --index-info <<-EOF +120000 $target_oid dangling +EOF + +git commit -m "dangling symlink in index" diff --git a/gix-worktree-state/tests/state/checkout.rs b/gix-worktree-state/tests/state/checkout.rs index 27a1df1dd48..c38a81877af 100644 --- a/gix-worktree-state/tests/state/checkout.rs +++ b/gix-worktree-state/tests/state/checkout.rs @@ -42,6 +42,11 @@ fn driver_exe() -> String { exe } +fn assure_is_empty(dir: impl AsRef) -> std::io::Result<()> { + assert_eq!(std::fs::read_dir(dir)?.count(), 0); + Ok(()) +} + #[test] fn submodules_are_instantiated_as_directories() -> crate::Result { let mut opts = opts_from_probe(); @@ -57,11 +62,6 @@ fn submodules_are_instantiated_as_directories() -> crate::Result { Ok(()) } -fn assure_is_empty(dir: impl AsRef) -> std::io::Result<()> { - assert_eq!(std::fs::read_dir(dir)?.count(), 0); - Ok(()) -} - #[test] fn accidental_writes_through_symlinks_are_prevented_if_overwriting_is_forbidden() { let mut opts = opts_from_probe(); @@ -125,7 +125,7 @@ fn writes_through_symlinks_are_prevented_even_if_overwriting_is_allowed() { if cfg!(windows) { "A-dir\\a" } else { "A-dir/a" }, "A-file", "FAKE-DIR", - if cfg!(windows) { "fake-file" } else { "FAKE-FILE" } + "FAKE-FILE" ]), ); assert!(outcome.collisions.is_empty()); @@ -257,6 +257,30 @@ fn symlinks_become_files_if_disabled() -> crate::Result { Ok(()) } +#[test] +fn dangling_symlinks_can_be_created() -> crate::Result { + let opts = opts_from_probe(); + if !opts.fs.symlink { + eprintln!("Skipping dangling symlink test on filesystem that doesn't support it"); + return Ok(()); + } + + let (_source_tree, destination, _index, outcome) = + checkout_index_in_tmp_dir(opts.clone(), "make_dangling_symlink")?; + let worktree_files = dir_structure(&destination); + let worktree_files_stripped = stripped_prefix(&destination, &worktree_files); + + assert_eq!(worktree_files_stripped, paths(["dangling"])); + let symlink_path = &worktree_files[0]; + assert!(symlink_path + .symlink_metadata() + .expect("dangling symlink is on disk") + .is_symlink()); + assert_eq!(std::fs::read_link(symlink_path)?, Path::new("non-existing-target")); + assert!(outcome.collisions.is_empty()); + Ok(()) +} + #[test] fn allow_or_disallow_symlinks() -> crate::Result { let mut opts = opts_from_probe(); @@ -303,12 +327,7 @@ fn keep_going_collects_results() { .iter() .map(|r| r.path.to_path_lossy().into_owned()) .collect::>(), - paths(if cfg!(unix) { - [".gitattributes", "dir/content"] - } else { - // not actually a symlink anymore, even though symlinks are supported but git think differently. - ["dir/content", "dir/sub-dir/symlink"] - }) + paths([".gitattributes", "dir/content"]) ); } @@ -322,11 +341,15 @@ fn keep_going_collects_results() { } else { assert_eq!( stripped_prefix(&destination, &dir_structure(&destination)), - paths(if cfg!(unix) { - Box::new(["dir/sub-dir/symlink", "empty", "executable"].into_iter()) as Box> - } else { - Box::new(["empty", "executable"].into_iter()) - }), + paths([ + if cfg!(unix) { + "dir/sub-dir/symlink" + } else { + "dir\\sub-dir\\symlink" + }, + "empty", + "executable", + ]), "some files could not be created" ); } @@ -550,8 +573,10 @@ fn probe_gitoxide_dir() -> crate::Result { } fn opts_from_probe() -> gix_worktree_state::checkout::Options { + static CAPABILITIES: Lazy = Lazy::new(|| probe_gitoxide_dir().unwrap()); + gix_worktree_state::checkout::Options { - fs: probe_gitoxide_dir().unwrap(), + fs: *CAPABILITIES, destination_is_initially_empty: true, thread_limit: gix_features::parallel::num_threads(None).into(), ..Default::default() From 7a3c583d51629697804c9f6d122b505b0afcefc8 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 13 May 2024 13:23:32 +0200 Subject: [PATCH 9/9] improve the symlink probing by simplifying it Less IO operations, and less that can go wrong. --- gix-fs/src/capabilities.rs | 13 ++----------- gix-fs/src/symlink.rs | 2 ++ gix-fs/tests/capabilities/mod.rs | 6 +++++- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/gix-fs/src/capabilities.rs b/gix-fs/src/capabilities.rs index 4c0d2f8d95e..3a384a26dca 100644 --- a/gix-fs/src/capabilities.rs +++ b/gix-fs/src/capabilities.rs @@ -101,22 +101,13 @@ impl Capabilities { } fn probe_symlink(root: &Path) -> std::io::Result { - let src_path = root.join("__link_src_file"); - std::fs::OpenOptions::new() - .create_new(true) - .write(true) - .open(&src_path)?; let link_path = root.join("__file_link"); - if crate::symlink::create(&src_path, &link_path).is_err() { - std::fs::remove_file(&src_path)?; + if crate::symlink::create("dangling".as_ref(), &link_path).is_err() { return Ok(false); } let res = std::fs::symlink_metadata(&link_path).map(|m| m.file_type().is_symlink()); - - let cleanup = crate::symlink::remove(&link_path).or_else(|_| std::fs::remove_file(&link_path)); - std::fs::remove_file(&src_path).and(cleanup)?; - + crate::symlink::remove(&link_path).or_else(|_| std::fs::remove_file(&link_path))?; res } } diff --git a/gix-fs/src/symlink.rs b/gix-fs/src/symlink.rs index c29d030d0f8..d37980f4674 100644 --- a/gix-fs/src/symlink.rs +++ b/gix-fs/src/symlink.rs @@ -2,6 +2,8 @@ use std::{io, io::ErrorKind::AlreadyExists, path::Path}; #[cfg(not(windows))] /// Create a new symlink at `link` which points to `original`. +/// +/// Note that `original` doesn't have to exist. pub fn create(original: &Path, link: &Path) -> io::Result<()> { std::os::unix::fs::symlink(original, link) } diff --git a/gix-fs/tests/capabilities/mod.rs b/gix-fs/tests/capabilities/mod.rs index edff3b022f8..42e5255d423 100644 --- a/gix-fs/tests/capabilities/mod.rs +++ b/gix-fs/tests/capabilities/mod.rs @@ -2,7 +2,7 @@ fn probe() { let dir = tempfile::tempdir().unwrap(); std::fs::File::create(dir.path().join("config")).unwrap(); - gix_fs::Capabilities::probe(dir.path()); + let caps = gix_fs::Capabilities::probe(dir.path()); let entries: Vec<_> = std::fs::read_dir(dir.path()) .unwrap() @@ -15,4 +15,8 @@ fn probe() { 0, "there should be no left-over files after probing, found {entries:?}" ); + if cfg!(unix) { + assert!(caps.symlink, "Unix should always be able to create symlinks"); + assert!(caps.executable_bit, "Unix should always honor executable bits"); + } }