Skip to content

Commit

Permalink
Merge branch 'fixes'
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Aug 27, 2023
2 parents a71b6de + 0d6d4ec commit 4bfd1cc
Show file tree
Hide file tree
Showing 40 changed files with 421 additions and 141 deletions.
2 changes: 1 addition & 1 deletion gitoxide-core/src/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pub struct Options {
pub mod information;

fn parse_file(index_path: impl AsRef<Path>, object_hash: gix::hash::Kind) -> anyhow::Result<gix::index::File> {
gix::index::File::at(index_path.as_ref(), object_hash, Default::default()).map_err(Into::into)
gix::index::File::at(index_path.as_ref(), object_hash, false, Default::default()).map_err(Into::into)
}

pub mod checkout_exclusive {
Expand Down
27 changes: 19 additions & 8 deletions gitoxide-core/src/repository/index/mod.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
use std::{ffi::OsString, path::PathBuf};

use anyhow::bail;
use gix::prelude::FindExt;

pub fn from_tree(
repo: gix::Repository,
mut spec: OsString,
index_path: Option<PathBuf>,
force: bool,
repo: gix::Repository,
skip_hash: bool,
) -> anyhow::Result<()> {
spec.push("^{tree}");
let spec = gix::path::os_str_into_bstr(&spec)?;
let tree = repo.rev_parse_single(spec)?;
let index = gix::index::State::from_tree(&tree, |oid, buf| repo.objects.find_tree_iter(oid, buf).ok())?;
let options = gix::index::write::Options::default();

let mut index = repo.index_from_tree(&tree)?;
let options = gix::index::write::Options {
skip_hash,
..Default::default()
};

match index_path {
Some(index_path) => {
Expand All @@ -23,11 +27,10 @@ pub fn from_tree(
index_path.display()
);
}
let mut index = gix::index::File::from_state(index, index_path);
index.set_path(index_path);
index.write(options)?;
}
None => {
let index = gix::index::File::from_state(index, std::path::PathBuf::new());
let mut out = Vec::with_capacity(512 * 1024);
index.write_to(&mut out, options)?;
}
Expand All @@ -36,7 +39,12 @@ pub fn from_tree(
Ok(())
}

pub fn from_list(entries_file: PathBuf, index_path: Option<PathBuf>, force: bool) -> anyhow::Result<()> {
pub fn from_list(
entries_file: PathBuf,
index_path: Option<PathBuf>,
force: bool,
skip_hash: bool,
) -> anyhow::Result<()> {
use std::io::BufRead;
let object_hash = gix::hash::Kind::Sha1;

Expand All @@ -57,7 +65,10 @@ pub fn from_list(entries_file: PathBuf, index_path: Option<PathBuf>, force: bool
}
index.sort_entries();

let options = gix::index::write::Options::default();
let options = gix::index::write::Options {
skip_hash,
..Default::default()
};
match index_path {
Some(index_path) => {
if index_path.is_file() && !force {
Expand Down
15 changes: 1 addition & 14 deletions gitoxide-core/src/repository/remote.rs
Original file line number Diff line number Diff line change
Expand Up @@ -334,18 +334,5 @@ pub(crate) fn by_name_or_url<'repo>(
repo: &'repo gix::Repository,
name_or_url: Option<&str>,
) -> anyhow::Result<gix::Remote<'repo>> {
use anyhow::Context;
Ok(match name_or_url {
Some(name) => {
if name.contains('/') || name.contains('.') {
repo.remote_at(gix::url::parse(name.into())?)?
} else {
repo.find_remote(name)?
}
}
None => repo
.head()?
.into_remote(gix::remote::Direction::Fetch)
.context("Cannot find a remote for unborn branch")??,
})
repo.find_fetch_remote(name_or_url.map(Into::into)).map_err(Into::into)
}
2 changes: 1 addition & 1 deletion gix-index/src/access/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ mod tests {
let file = PathBuf::from("tests")
.join("fixtures")
.join(Path::new("loose_index").join("conflicting-file.git-index"));
let file = crate::File::at(file, gix_hash::Kind::Sha1, Default::default()).expect("valid file");
let file = crate::File::at(file, gix_hash::Kind::Sha1, false, Default::default()).expect("valid file");
assert_eq!(
file.entries().len(),
3,
Expand Down
11 changes: 6 additions & 5 deletions gix-index/src/decode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ pub struct Options {

impl State {
/// Decode an index state from `data` and store `timestamp` in the resulting instance for pass-through, assuming `object_hash`
/// to be used through the file.
/// to be used through the file. Also return the stored hash over all bytes in `data` or `None` if none was written due to `index.skipHash`.
pub fn from_bytes(
data: &[u8],
timestamp: FileTime,
Expand All @@ -64,7 +64,7 @@ impl State {
min_extension_block_in_bytes_for_threading,
expected_checksum,
}: Options,
) -> Result<(Self, gix_hash::ObjectId), Error> {
) -> Result<(Self, Option<gix_hash::ObjectId>), Error> {
let _span = gix_features::trace::detail!("gix_index::State::from_bytes()");
let (version, num_entries, post_header_data) = header::decode(data, object_hash)?;
let start_of_extensions = extension::end_of_index_entry::decode(data, object_hash);
Expand Down Expand Up @@ -214,10 +214,11 @@ impl State {
}

let checksum = gix_hash::ObjectId::from(data);
if let Some(expected_checksum) = expected_checksum {
if checksum != expected_checksum {
let checksum = (!checksum.is_null()).then_some(checksum);
if let Some((expected_checksum, actual_checksum)) = expected_checksum.zip(checksum) {
if actual_checksum != expected_checksum {
return Err(Error::ChecksumMismatch {
actual_checksum: checksum,
actual_checksum,
expected_checksum,
});
}
Expand Down
2 changes: 2 additions & 0 deletions gix-index/src/extension/link.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ impl Link {
self,
split_index: &mut crate::File,
object_hash: gix_hash::Kind,
skip_hash: bool,
options: crate::decode::Options,
) -> Result<(), crate::file::init::Error> {
let shared_index_path = split_index
Expand All @@ -82,6 +83,7 @@ impl Link {
let mut shared_index = crate::File::at(
&shared_index_path,
object_hash,
skip_hash,
crate::decode::Options {
expected_checksum: self.shared_index_checksum.into(),
..options
Expand Down
60 changes: 48 additions & 12 deletions gix-index/src/file/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,18 @@ pub use error::Error;
/// Initialization
impl File {
/// Try to open the index file at `path` with `options`, assuming `object_hash` is used throughout the file, or create a new
/// index that merely exists in memory and is empty.
/// index that merely exists in memory and is empty. `skip_hash` will increase the performance by a factor of 2, at the cost of
/// possibly not detecting corruption.
///
/// Note that the `path` will not be written if it doesn't exist.
pub fn at_or_default(
path: impl Into<PathBuf>,
object_hash: gix_hash::Kind,
skip_hash: bool,
options: decode::Options,
) -> Result<Self, Error> {
let path = path.into();
Ok(match Self::at(&path, object_hash, options) {
Ok(match Self::at(&path, object_hash, skip_hash, options) {
Ok(f) => f,
Err(Error::Io(err)) if err.kind() == std::io::ErrorKind::NotFound => {
File::from_state(State::new(object_hash), path)
Expand All @@ -44,26 +46,60 @@ impl File {
})
}

/// Open an index file at `path` with `options`, assuming `object_hash` is used throughout the file.
pub fn at(path: impl Into<PathBuf>, object_hash: gix_hash::Kind, options: decode::Options) -> Result<Self, Error> {
/// Open an index file at `path` with `options`, assuming `object_hash` is used throughout the file. If `skip_hash` is `true`,
/// we will not get or compare the checksum of the index at all, which generally increases performance of this method by a factor
/// of 2 or more.
///
/// Note that the verification of the file hash depends on `options`, and even then it's performed after the file was read and not
/// before it is read. That way, invalid files would see a more descriptive error message as we try to parse them.
pub fn at(
path: impl Into<PathBuf>,
object_hash: gix_hash::Kind,
skip_hash: bool,
options: decode::Options,
) -> Result<Self, Error> {
let _span = gix_features::trace::detail!("gix_index::File::at()");
let path = path.into();
let (data, mtime) = {
// SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file.
let file = std::fs::File::open(&path)?;
// SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file.
#[allow(unsafe_code)]
let data = unsafe { Mmap::map(&file)? };

if !skip_hash {
// Note that even though it's trivial to offload this into a thread, which is worth it for all but the smallest
// index files, we choose more safety here just like git does and don't even try to decode the index if the hashes
// don't match.
// Thanks to `skip_hash`, we can get performance and it's under caller control, at the cost of some safety.
let expected = gix_hash::ObjectId::from(&data[data.len() - object_hash.len_in_bytes()..]);
if !expected.is_null() {
let _span = gix_features::trace::detail!("gix::open_index::hash_index", path = ?path);
let meta = file.metadata()?;
let num_bytes_to_hash = meta.len() - object_hash.len_in_bytes() as u64;
let actual_hash = gix_features::hash::bytes(
&file,
num_bytes_to_hash as usize,
object_hash,
&mut gix_features::progress::Discard,
&Default::default(),
)?;

if actual_hash != expected {
return Err(Error::Decode(decode::Error::ChecksumMismatch {
actual_checksum: actual_hash,
expected_checksum: expected,
}));
}
}
}

(data, filetime::FileTime::from_last_modification_time(&file.metadata()?))
};

let (state, checksum) = State::from_bytes(&data, mtime, object_hash, options)?;
let mut file = File {
state,
path,
checksum: Some(checksum),
};
let mut file = File { state, path, checksum };
if let Some(mut link) = file.link.take() {
link.dissolve_into(&mut file, object_hash, options)?;
link.dissolve_into(&mut file, object_hash, skip_hash, options)?;
}

Ok(file)
Expand All @@ -72,7 +108,7 @@ impl File {
/// Consume `state` and pretend it was read from `path`, setting our checksum to `null`.
///
/// `File` instances created like that should be written to disk to set the correct checksum via `[File::write()]`.
pub fn from_state(state: crate::State, path: impl Into<PathBuf>) -> Self {
pub fn from_state(state: State, path: impl Into<PathBuf>) -> Self {
File {
state,
path: path.into(),
Expand Down
33 changes: 17 additions & 16 deletions gix-index/src/file/verify.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ mod error {
actual: gix_hash::ObjectId,
expected: gix_hash::ObjectId,
},
#[error("Checksum of in-memory index wasn't computed yet")]
NoChecksum,
}
}
pub use error::Error;
Expand All @@ -24,19 +22,22 @@ impl File {
/// Verify the integrity of the index to assure its consistency.
pub fn verify_integrity(&self) -> Result<(), Error> {
let _span = gix_features::trace::coarse!("gix_index::File::verify_integrity()");
let checksum = self.checksum.ok_or(Error::NoChecksum)?;
let num_bytes_to_hash = self.path.metadata()?.len() - checksum.as_bytes().len() as u64;
let should_interrupt = AtomicBool::new(false);
let actual = gix_features::hash::bytes_of_file(
&self.path,
num_bytes_to_hash as usize,
checksum.kind(),
&mut gix_features::progress::Discard,
&should_interrupt,
)?;
(actual == checksum).then_some(()).ok_or(Error::ChecksumMismatch {
actual,
expected: checksum,
})
if let Some(checksum) = self.checksum {
let num_bytes_to_hash = self.path.metadata()?.len() - checksum.as_bytes().len() as u64;
let should_interrupt = AtomicBool::new(false);
let actual = gix_features::hash::bytes_of_file(
&self.path,
num_bytes_to_hash as usize,
checksum.kind(),
&mut gix_features::progress::Discard,
&should_interrupt,
)?;
(actual == checksum).then_some(()).ok_or(Error::ChecksumMismatch {
actual,
expected: checksum,
})
} else {
Ok(())
}
}
}
27 changes: 16 additions & 11 deletions gix-index/src/file/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,28 @@ impl File {
mut out: impl std::io::Write,
options: write::Options,
) -> std::io::Result<(Version, gix_hash::ObjectId)> {
let mut hasher = hash::Write::new(&mut out, self.state.object_hash);
let version = self.state.write_to(&mut hasher, options)?;

let hash = hasher.hash.digest();
out.write_all(&hash)?;
Ok((version, gix_hash::ObjectId::from(hash)))
let (version, hash) = if options.skip_hash {
let out: &mut dyn std::io::Write = &mut out;
let version = self.state.write_to(out, options)?;
(version, self.state.object_hash.null())
} else {
let mut hasher = hash::Write::new(&mut out, self.state.object_hash);
let out: &mut dyn std::io::Write = &mut hasher;
let version = self.state.write_to(out, options)?;
(version, gix_hash::ObjectId::from(hasher.hash.digest()))
};
out.write_all(hash.as_slice())?;
Ok((version, hash))
}

/// Write ourselves to the path we were read from after acquiring a lock, using `options`.
///
/// Note that the hash produced will be stored which is why we need to be mutable.
pub fn write(&mut self, options: write::Options) -> Result<(), Error> {
let mut lock = std::io::BufWriter::new(gix_lock::File::acquire_to_update_resource(
&self.path,
gix_lock::acquire::Fail::Immediately,
None,
)?);
let mut lock = std::io::BufWriter::with_capacity(
64 * 1024,
gix_lock::File::acquire_to_update_resource(&self.path, gix_lock::acquire::Fail::Immediately, None)?,
);
let (version, digest) = self.write_to(&mut lock, options)?;
match lock.into_inner() {
Ok(lock) => lock.commit()?,
Expand Down
17 changes: 15 additions & 2 deletions gix-index/src/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,26 @@ impl Extensions {
/// Note that default options write either index V2 or V3 depending on the content of the entries.
#[derive(Debug, Default, Clone, Copy)]
pub struct Options {
/// Configures which extensions to write
/// Configures which extensions to write.
pub extensions: Extensions,
/// Set the trailing hash of the produced index to all zeroes to save some time.
///
/// This value is typically controlled by `index.skipHash` and is respected when the index is written
/// via [`File::write()`](crate::File::write()) and [`File::write_to()`](crate::File::write_to()).
/// Note that
pub skip_hash: bool,
}

impl State {
/// Serialize this instance to `out` with [`options`][Options].
pub fn write_to(&self, out: impl std::io::Write, Options { extensions }: Options) -> std::io::Result<Version> {
pub fn write_to(
&self,
out: impl std::io::Write,
Options {
extensions,
skip_hash: _,
}: Options,
) -> std::io::Result<Version> {
let _span = gix_features::trace::detail!("gix_index::State::write()");
let version = self.detect_required_version();

Expand Down
Binary file not shown.
4 changes: 3 additions & 1 deletion gix-index/tests/index/file/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ mod at_or_new {
gix_index::File::at_or_default(
Generated("v4_more_files_IEOT").to_path(),
gix_hash::Kind::Sha1,
false,
Default::default(),
)
.expect("file exists and can be opened");
Expand All @@ -16,6 +17,7 @@ mod at_or_new {
let index = gix_index::File::at_or_default(
"__definitely no file that exists ever__",
gix_hash::Kind::Sha1,
false,
Default::default(),
)
.expect("file is defaulting to a new one");
Expand Down Expand Up @@ -46,7 +48,7 @@ mod from_state {
let new_index_path = tmp.path().join(fixture.to_name());
assert!(!new_index_path.exists());

let index = gix_index::File::at(fixture.to_path(), gix_hash::Kind::Sha1, Default::default())?;
let index = gix_index::File::at(fixture.to_path(), gix_hash::Kind::Sha1, false, Default::default())?;
let mut index = gix_index::File::from_state(index.into(), new_index_path.clone());
assert!(index.checksum().is_none());
assert_eq!(index.path(), new_index_path);
Expand Down
Loading

0 comments on commit 4bfd1cc

Please sign in to comment.