From a36fa039b03e8e3773995d4df7bbc5ef33e3368d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 7 Feb 2022 22:55:41 +0800 Subject: [PATCH] feat: git-hash::Prefix::from_id() (#298) A way to obtain a prefix of an object id, with all non-prefix bytes set to zero. --- Cargo.lock | 1 + Cargo.toml | 2 +- git-hash/Cargo.toml | 3 + git-hash/src/lib.rs | 2 +- git-hash/src/owned.rs | 40 +++++ git-hash/tests/oid/mod.rs | 33 ++++ git-pack/src/multi_index/access.rs | 2 +- git-pack/tests/pack/multi_index.rs | 176 ---------------------- git-pack/tests/pack/multi_index/mod.rs | 74 +++++++++ git-pack/tests/pack/multi_index/verify.rs | 52 +++++++ git-pack/tests/pack/multi_index/write.rs | 66 ++++++++ 11 files changed, 272 insertions(+), 179 deletions(-) delete mode 100644 git-pack/tests/pack/multi_index.rs create mode 100644 git-pack/tests/pack/multi_index/mod.rs create mode 100644 git-pack/tests/pack/multi_index/verify.rs create mode 100644 git-pack/tests/pack/multi_index/write.rs diff --git a/Cargo.lock b/Cargo.lock index e3d81c21e99..92320f2478b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1244,6 +1244,7 @@ dependencies = [ name = "git-hash" version = "0.9.2" dependencies = [ + "git-testtools", "hex", "quick-error", "serde", diff --git a/Cargo.toml b/Cargo.toml index 829080eac2e..637b9121e3b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -100,7 +100,7 @@ document-features = { version = "0.1.0", optional = true } [profile.dev.package] git-object = { opt-level = 3 } git-ref = { opt-level = 3 } -git-pack = { opt-level = 3 } +#git-pack = { opt-level = 3 } git-hash = { opt-level = 3 } git-actor = { opt-level = 3 } git-config = { opt-level = 3 } diff --git a/git-hash/Cargo.toml b/git-hash/Cargo.toml index 9895719c0f8..d7d4dec9996 100644 --- a/git-hash/Cargo.toml +++ b/git-hash/Cargo.toml @@ -21,5 +21,8 @@ quick-error = "2.0.0" hex = "0.4.2" serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } +[dev-dependencies] +git-testtools = { path = "../tests/tools"} + [package.metadata.docs.rs] all-features = true diff --git a/git-hash/src/lib.rs b/git-hash/src/lib.rs index c4f5b2495a2..19822979534 100644 --- a/git-hash/src/lib.rs +++ b/git-hash/src/lib.rs @@ -11,7 +11,7 @@ use std::{convert::TryFrom, str::FromStr}; pub use borrowed::oid; mod owned; -pub use owned::ObjectId; +pub use owned::{ObjectId, Prefix}; #[allow(missing_docs)] pub mod decode { diff --git a/git-hash/src/owned.rs b/git-hash/src/owned.rs index 34d57e30fd4..a65fa49a834 100644 --- a/git-hash/src/owned.rs +++ b/git-hash/src/owned.rs @@ -2,6 +2,46 @@ use std::{borrow::Borrow, convert::TryInto, fmt, ops::Deref}; use crate::{borrowed::oid, Kind, SIZE_OF_SHA1_DIGEST}; +/// An partial owned hash possibly identifying an object uniquely, +/// whose non-prefix bytes are zeroed. +#[derive(PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct Prefix { + inner: ObjectId, + prefix_len: usize, +} + +impl Prefix { + /// TODO: docs + pub fn from_id(id: impl AsRef, hex_len: usize) -> Self { + let id = id.as_ref(); + assert!( + hex_len <= id.kind().len_in_hex(), + "hex_len must not be larger than the maximum hex len of the input id" + ); + let prefix = match id.kind() { + Kind::Sha1 => { + let mut b = [0u8; 20]; + let copy_len = (hex_len + 1) / 2; + b[..copy_len].copy_from_slice(&id.as_bytes()[..copy_len]); + if hex_len % 2 == 1 { + b[hex_len / 2] &= 0xf0; + } + ObjectId::Sha1(b) + } + }; + Prefix { + inner: prefix, + prefix_len: hex_len, + } + } + + /// TODO: docs + pub fn prefix(&self) -> &oid { + &self.inner + } +} + /// An owned hash identifying objects, most commonly Sha1 #[derive(PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)] #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] diff --git a/git-hash/tests/oid/mod.rs b/git-hash/tests/oid/mod.rs index 4284961c7db..8db2748df48 100644 --- a/git-hash/tests/oid/mod.rs +++ b/git-hash/tests/oid/mod.rs @@ -1,3 +1,36 @@ +mod prefix { + mod from_id { + use git_hash::{Kind, ObjectId}; + use git_testtools::hex_to_id; + + #[test] + fn various_valid_inputs() { + let oid_hex = "abcdefabcdefabcdefabcdefabcdefabcdefabcd"; + let oid = hex_to_id(oid_hex); + + assert_eq!(git_hash::Prefix::from_id(oid, 0).prefix(), ObjectId::null(oid.kind())); + + for prefix_len in 1..oid.kind().len_in_hex() { + let mut expected = String::from(&oid_hex[..prefix_len]); + let num_of_zeros = oid.kind().len_in_hex() - prefix_len; + expected.extend(std::iter::repeat('0').take(num_of_zeros)); + assert_eq!( + git_hash::Prefix::from_id(oid, prefix_len).prefix().to_hex().to_string(), + expected, + "{}", + prefix_len + ); + } + } + #[test] + #[should_panic] + fn panics_if_hex_len_is_longer_than_oid_len_in_hex() { + let kind = Kind::Sha1; + git_hash::Prefix::from_id(ObjectId::null(kind), kind.len_in_hex() + 1); + } + } +} + mod short_hex { #[test] fn display_entire_range_sha1() { diff --git a/git-pack/src/multi_index/access.rs b/git-pack/src/multi_index/access.rs index e95c48a6691..9ca0205de27 100644 --- a/git-pack/src/multi_index/access.rs +++ b/git-pack/src/multi_index/access.rs @@ -67,7 +67,7 @@ impl File { } /// TODO - pub fn lookup_abbrev(&self, _id: impl AsRef, _hex_len: usize) -> Option { + pub fn lookup_prefix(&self, _id: impl AsRef, _hex_len: usize) -> Option { todo!() } diff --git a/git-pack/tests/pack/multi_index.rs b/git-pack/tests/pack/multi_index.rs deleted file mode 100644 index ada4f344afe..00000000000 --- a/git-pack/tests/pack/multi_index.rs +++ /dev/null @@ -1,176 +0,0 @@ -use std::path::PathBuf; - -use git_pack::multi_index::File; -use git_testtools::hex_to_id; - -fn multi_index() -> (File, PathBuf) { - let path = git_testtools::scripted_fixture_repo_read_only("make_pack_gen_repo_multi_index.sh") - .expect("test fixture exists") - .join(".git/objects/pack/multi-pack-index"); - let file = git_pack::multi_index::File::at(&path).unwrap(); - (file, path) -} - -#[test] -fn access() { - let (file, path) = multi_index(); - - assert_eq!(file.version(), git_pack::multi_index::Version::V1); - assert_eq!(file.path(), path); - assert_eq!(file.num_indices(), 1); - assert_eq!(file.object_hash(), git_hash::Kind::Sha1); - assert_eq!(file.num_objects(), 868); - assert_eq!(file.checksum(), hex_to_id("39a3804d0a84de609e4fcb49e66dc1297c75ca11")); - // assert_eq!() - assert_eq!( - file.index_names(), - vec![PathBuf::from("pack-542ad1d1c7c762ea4e36907570ff9e4b5b7dde1b.idx")] - ); - - for (idx, expected_pack_offset, expected_oid) in &[ - (0u32, 25267u64, hex_to_id("000f574443efab4ddbeee3621e49124eb3f8b6d0")), - (140, 30421, hex_to_id("2935a65b1d69fb33c93dabc4cdf65a6f4d30ce4c")), - (867, 24540, hex_to_id("ffea360a6a54c1185eeae4f3cfefc927cf7a35a9")), - ] { - let actual_oid = file.oid_at_index(*idx); - assert_eq!(actual_oid, *expected_oid); - assert_eq!(file.lookup(actual_oid), Some(*idx)); - let (pack_id, pack_offset) = file.pack_id_and_pack_offset_at_index(*idx); - assert_eq!(pack_id, 0, "we only have one pack here"); - assert_eq!(pack_offset, *expected_pack_offset); - } - - let mut count = 0; - for (idx, entry) in file.iter().enumerate() { - assert_eq!(entry.oid, file.oid_at_index(idx as u32)); - let (pack_index, pack_offset) = file.pack_id_and_pack_offset_at_index(idx as u32); - assert_eq!(pack_index, entry.pack_index); - assert_eq!(pack_offset, entry.pack_offset); - count += 1; - } - assert_eq!(count, file.num_objects()); -} - -mod verify { - use std::sync::atomic::AtomicBool; - - use common_macros::b_tree_map; - use git_features::progress; - - use crate::pack::multi_index::multi_index; - - #[test] - fn checksum() -> crate::Result { - let (file, _) = multi_index(); - assert_eq!( - file.verify_checksum(progress::Discard, &AtomicBool::new(false))?, - file.checksum() - ); - Ok(()) - } - - #[test] - fn integrity() { - let (file, _) = multi_index(); - let outcome = file - .verify_integrity(progress::Discard, &AtomicBool::new(false), Default::default()) - .unwrap(); - assert_eq!(outcome.actual_index_checksum, file.checksum()); - assert_eq!( - outcome.pack_traverse_statistics, - vec![git_pack::index::traverse::Statistics { - average: git_pack::data::decode_entry::Outcome { - kind: git_object::Kind::Tree, - num_deltas: 1, - decompressed_size: 47, - compressed_size: 46, - object_size: 152 - }, - objects_per_chain_length: b_tree_map! { - 0 => 326, - 1 => 106, - 2 => 326, - 3 => 108, - 4 => 2, - }, - total_compressed_entries_size: 40628, - total_decompressed_entries_size: 40919, - total_object_size: 131993, - pack_size: 42856, - num_commits: 16, - num_trees: 40, - num_tags: 1, - num_blobs: 811 - }] - ); - } -} - -mod write { - use std::{path::PathBuf, sync::atomic::AtomicBool}; - - use git_features::progress; - use git_testtools::{fixture_path, hex_to_id}; - - #[test] - fn from_paths() -> crate::Result { - let dir = tempfile::TempDir::new()?; - let input_indices = std::fs::read_dir(fixture_path("objects/pack"))? - .filter_map(|r| { - r.ok() - .map(|e| e.path()) - .filter(|p| p.extension().and_then(|e| e.to_str()).unwrap_or("") == "idx") - }) - .collect::>(); - assert_eq!(input_indices.len(), 3); - let output_path = dir.path().join("multi-pack-index"); - let mut out = std::fs::OpenOptions::new() - .write(true) - .create_new(true) - .open(&output_path)?; - let outcome = git_pack::multi_index::File::write_from_index_paths( - input_indices.clone(), - &mut out, - progress::Discard, - &AtomicBool::new(false), - git_pack::multi_index::write::Options { - object_hash: git_hash::Kind::Sha1, - }, - )?; - - assert_eq!( - outcome.multi_index_checksum, - hex_to_id("d34d327039a3554f8a644b29e07b903fa71ef269") - ); - - let file = git_pack::multi_index::File::at(output_path)?; - assert_eq!(file.num_indices(), 3); - assert_eq!( - file.index_names(), - vec![ - PathBuf::from("pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx"), - PathBuf::from("pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx"), - PathBuf::from("pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx"), - ] - ); - assert_eq!(file.num_objects(), 139); - assert_eq!(file.checksum(), outcome.multi_index_checksum); - - for index in &input_indices { - std::fs::copy(index, dir.path().join(index.file_name().expect("present")))?; - let pack = index.with_extension("pack"); - std::fs::copy(&pack, dir.path().join(pack.file_name().expect("present")))?; - } - - assert_eq!( - file.verify_integrity(progress::Discard, &AtomicBool::new(false), Default::default())? - .actual_index_checksum, - outcome.multi_index_checksum - ); - - let outcome = file.verify_integrity_fast(progress::Discard, &AtomicBool::new(false))?; - - assert_eq!(outcome.0, file.checksum()); - Ok(()) - } -} diff --git a/git-pack/tests/pack/multi_index/mod.rs b/git-pack/tests/pack/multi_index/mod.rs new file mode 100644 index 00000000000..9c12969beed --- /dev/null +++ b/git-pack/tests/pack/multi_index/mod.rs @@ -0,0 +1,74 @@ +use std::path::PathBuf; + +use git_pack::multi_index::File; + +fn multi_index() -> (File, PathBuf) { + let path = git_testtools::scripted_fixture_repo_read_only("make_pack_gen_repo_multi_index.sh") + .expect("test fixture exists") + .join(".git/objects/pack/multi-pack-index"); + let file = git_pack::multi_index::File::at(&path).unwrap(); + (file, path) +} + +mod access { + use super::multi_index; + use git_testtools::hex_to_id; + use std::path::PathBuf; + + #[test] + fn lookup_abbrev() { + let (file, _path) = multi_index(); + + for (idx, entry) in file.iter().enumerate() { + let hex_len = (idx % file.object_hash().len_in_hex()).min(7); + let hex_oid = entry.oid.to_hex_with_len(hex_len).to_string(); + assert_eq!(hex_oid.len(), hex_len); + let _oid_prefix = git_hash::Prefix::from_id(&entry.oid, hex_len); + // file.lookup_prefix(oid_prefix, hex_len).expect("non-ambiguous") + } + } + + #[test] + fn general() { + let (file, path) = multi_index(); + + assert_eq!(file.version(), git_pack::multi_index::Version::V1); + assert_eq!(file.path(), path); + assert_eq!(file.num_indices(), 1); + assert_eq!(file.object_hash(), git_hash::Kind::Sha1); + assert_eq!(file.num_objects(), 868); + assert_eq!(file.checksum(), hex_to_id("39a3804d0a84de609e4fcb49e66dc1297c75ca11")); + // assert_eq!() + assert_eq!( + file.index_names(), + vec![PathBuf::from("pack-542ad1d1c7c762ea4e36907570ff9e4b5b7dde1b.idx")] + ); + + for (idx, expected_pack_offset, expected_oid) in &[ + (0u32, 25267u64, hex_to_id("000f574443efab4ddbeee3621e49124eb3f8b6d0")), + (140, 30421, hex_to_id("2935a65b1d69fb33c93dabc4cdf65a6f4d30ce4c")), + (867, 24540, hex_to_id("ffea360a6a54c1185eeae4f3cfefc927cf7a35a9")), + ] { + let actual_oid = file.oid_at_index(*idx); + assert_eq!(actual_oid, *expected_oid); + assert_eq!(file.lookup(actual_oid), Some(*idx)); + let (pack_id, pack_offset) = file.pack_id_and_pack_offset_at_index(*idx); + assert_eq!(pack_id, 0, "we only have one pack here"); + assert_eq!(pack_offset, *expected_pack_offset); + } + + let mut count = 0; + for (idx, entry) in file.iter().enumerate() { + assert_eq!(entry.oid, file.oid_at_index(idx as u32)); + let (pack_index, pack_offset) = file.pack_id_and_pack_offset_at_index(idx as u32); + assert_eq!(pack_index, entry.pack_index); + assert_eq!(pack_offset, entry.pack_offset); + count += 1; + } + assert_eq!(count, file.num_objects()); + } +} + +mod verify; + +mod write; diff --git a/git-pack/tests/pack/multi_index/verify.rs b/git-pack/tests/pack/multi_index/verify.rs new file mode 100644 index 00000000000..a2259f1bd98 --- /dev/null +++ b/git-pack/tests/pack/multi_index/verify.rs @@ -0,0 +1,52 @@ +use std::sync::atomic::AtomicBool; + +use common_macros::b_tree_map; +use git_features::progress; + +use crate::pack::multi_index::multi_index; + +#[test] +fn checksum() -> crate::Result { + let (file, _) = multi_index(); + assert_eq!( + file.verify_checksum(progress::Discard, &AtomicBool::new(false))?, + file.checksum() + ); + Ok(()) +} + +#[test] +fn integrity() { + let (file, _) = multi_index(); + let outcome = file + .verify_integrity(progress::Discard, &AtomicBool::new(false), Default::default()) + .unwrap(); + assert_eq!(outcome.actual_index_checksum, file.checksum()); + assert_eq!( + outcome.pack_traverse_statistics, + vec![git_pack::index::traverse::Statistics { + average: git_pack::data::decode_entry::Outcome { + kind: git_object::Kind::Tree, + num_deltas: 1, + decompressed_size: 47, + compressed_size: 46, + object_size: 152 + }, + objects_per_chain_length: b_tree_map! { + 0 => 326, + 1 => 106, + 2 => 326, + 3 => 108, + 4 => 2, + }, + total_compressed_entries_size: 40628, + total_decompressed_entries_size: 40919, + total_object_size: 131993, + pack_size: 42856, + num_commits: 16, + num_trees: 40, + num_tags: 1, + num_blobs: 811 + }] + ); +} diff --git a/git-pack/tests/pack/multi_index/write.rs b/git-pack/tests/pack/multi_index/write.rs new file mode 100644 index 00000000000..4f3affd1377 --- /dev/null +++ b/git-pack/tests/pack/multi_index/write.rs @@ -0,0 +1,66 @@ +use std::{path::PathBuf, sync::atomic::AtomicBool}; + +use git_features::progress; +use git_testtools::{fixture_path, hex_to_id}; + +#[test] +fn from_paths() -> crate::Result { + let dir = tempfile::TempDir::new()?; + let input_indices = std::fs::read_dir(fixture_path("objects/pack"))? + .filter_map(|r| { + r.ok() + .map(|e| e.path()) + .filter(|p| p.extension().and_then(|e| e.to_str()).unwrap_or("") == "idx") + }) + .collect::>(); + assert_eq!(input_indices.len(), 3); + let output_path = dir.path().join("multi-pack-index"); + let mut out = std::fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(&output_path)?; + let outcome = git_pack::multi_index::File::write_from_index_paths( + input_indices.clone(), + &mut out, + progress::Discard, + &AtomicBool::new(false), + git_pack::multi_index::write::Options { + object_hash: git_hash::Kind::Sha1, + }, + )?; + + assert_eq!( + outcome.multi_index_checksum, + hex_to_id("d34d327039a3554f8a644b29e07b903fa71ef269") + ); + + let file = git_pack::multi_index::File::at(output_path)?; + assert_eq!(file.num_indices(), 3); + assert_eq!( + file.index_names(), + vec![ + PathBuf::from("pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx"), + PathBuf::from("pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx"), + PathBuf::from("pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx"), + ] + ); + assert_eq!(file.num_objects(), 139); + assert_eq!(file.checksum(), outcome.multi_index_checksum); + + for index in &input_indices { + std::fs::copy(index, dir.path().join(index.file_name().expect("present")))?; + let pack = index.with_extension("pack"); + std::fs::copy(&pack, dir.path().join(pack.file_name().expect("present")))?; + } + + assert_eq!( + file.verify_integrity(progress::Discard, &AtomicBool::new(false), Default::default())? + .actual_index_checksum, + outcome.multi_index_checksum + ); + + let outcome = file.verify_integrity_fast(progress::Discard, &AtomicBool::new(false))?; + + assert_eq!(outcome.0, file.checksum()); + Ok(()) +}