Skip to content

Commit

Permalink
Merge branch 'status'
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed May 14, 2024
2 parents c4e0a82 + 7a3c583 commit 04ef31e
Show file tree
Hide file tree
Showing 37 changed files with 464 additions and 249 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/msrv.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,6 @@ jobs:
- uses: actions/checkout@v4
- uses: extractions/setup-just@v2
- run: |
rustup toolchain install $rust_version --profile minimal --no-self-update
rustup default $rust_version
rustup toolchain install ${{ env.rust_version }} --profile minimal --no-self-update
rustup default ${{ env.rust_version }}
- run: just ci-check-msrv
76 changes: 67 additions & 9 deletions gitoxide-core/src/repository/odb.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::io;
use std::sync::atomic::Ordering;

use anyhow::bail;

Expand Down Expand Up @@ -50,6 +51,8 @@ pub mod statistics {
pub struct Options {
pub format: OutputFormat,
pub thread_limit: Option<usize>,
/// A debug-flag that triggers looking up the headers of all objects again, but without indices preloaded
pub extra_header_lookup: bool,
}
}

Expand All @@ -59,7 +62,11 @@ pub fn statistics(
mut progress: impl gix::Progress,
out: impl io::Write,
mut err: impl io::Write,
statistics::Options { format, thread_limit }: statistics::Options,
statistics::Options {
format,
thread_limit,
extra_header_lookup,
}: statistics::Options,
) -> anyhow::Result<()> {
use bytesize::ByteSize;
use gix::odb::{find, HeaderExt};
Expand All @@ -76,6 +83,10 @@ pub fn statistics(
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[derive(Default)]
struct Statistics {
/// All objects that were used to produce these statistics.
/// Only `Some` if we are doing an extra round of header queries on a repository without loaded indices.
#[cfg_attr(feature = "serde", serde(skip_serializing))]
ids: Option<Vec<gix::ObjectId>>,
total_objects: usize,
loose_objects: usize,
packed_objects: usize,
Expand Down Expand Up @@ -135,14 +146,17 @@ pub fn statistics(
}

impl gix::parallel::Reduce for Reduce {
type Input = Result<Vec<gix::odb::find::Header>, anyhow::Error>;
type Input = Result<Vec<(gix::ObjectId, gix::odb::find::Header)>, anyhow::Error>;
type FeedProduce = ();
type Output = Statistics;
type Error = anyhow::Error;

fn feed(&mut self, items: Self::Input) -> Result<Self::FeedProduce, Self::Error> {
for item in items? {
for (id, item) in items? {
self.stats.consume(item);
if let Some(ids) = self.stats.ids.as_mut() {
ids.push(id);
}
}
Ok(())
}
Expand All @@ -154,9 +168,9 @@ pub fn statistics(
}

let cancelled = || anyhow::anyhow!("Cancelled by user");
let object_ids = repo.objects.store_ref().iter()?.filter_map(Result::ok);
let object_ids = repo.objects.iter()?.filter_map(Result::ok);
let chunk_size = 1_000;
let stats = if gix::parallel::num_threads(thread_limit) > 1 {
let mut stats = if gix::parallel::num_threads(thread_limit) > 1 {
gix::parallel::in_parallel(
gix::interrupt::Iter::new(
gix::features::iter::Chunks {
Expand All @@ -166,19 +180,30 @@ pub fn statistics(
cancelled,
),
thread_limit,
move |_| (repo.objects.clone().into_inner(), counter),
{
let objects = repo.objects.clone();
move |_| (objects.clone().into_inner(), counter)
},
|ids, (handle, counter)| {
let ids = ids?;
counter.fetch_add(ids.len(), std::sync::atomic::Ordering::Relaxed);
counter.fetch_add(ids.len(), Ordering::Relaxed);
let out = ids
.into_iter()
.map(|id| handle.header(id))
.map(|id| handle.header(id).map(|hdr| (id, hdr)))
.collect::<Result<Vec<_>, _>>()?;
Ok(out)
},
Reduce::default(),
Reduce {
stats: Statistics {
ids: extra_header_lookup.then(Vec::new),
..Default::default()
},
},
)?
} else {
if extra_header_lookup {
bail!("extra-header-lookup is only meaningful in threaded mode");
}
let mut stats = Statistics::default();

for (count, id) in object_ids.enumerate() {
Expand All @@ -193,6 +218,39 @@ pub fn statistics(

progress.show_throughput(start);

if let Some(mut ids) = stats.ids.take() {
// Critical to re-open the repo to assure we don't have any ODB state and start fresh.
let start = std::time::Instant::now();
let repo = gix::open_opts(repo.git_dir(), repo.open_options().to_owned())?;
progress.set_name("re-counting".into());
progress.init(Some(ids.len()), gix::progress::count("objects"));
let counter = progress.counter();
counter.store(0, Ordering::Relaxed);
let errors = gix::parallel::in_parallel_with_slice(
&mut ids,
thread_limit,
{
let objects = repo.objects.clone();
move |_| (objects.clone().into_inner(), counter, false)
},
|id, (odb, counter, has_error), _threads_left, _stop_everything| -> anyhow::Result<()> {
counter.fetch_add(1, Ordering::Relaxed);
if let Err(_err) = odb.header(id) {
*has_error = true;
gix::trace::error!(err = ?_err, "Object that is known to be present wasn't found");
}
Ok(())
},
|| Some(std::time::Duration::from_millis(100)),
|(_, _, has_error)| has_error,
)?;

progress.show_throughput(start);
if errors.contains(&true) {
bail!("At least one object couldn't be looked up even though it must exist");
}
}

#[cfg(feature = "serde")]
{
serde_json::to_writer_pretty(out, &stats)?;
Expand Down
2 changes: 1 addition & 1 deletion gix-config/tests/mem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::alloc;
use std::time::Instant;

#[global_allocator]
static ALLOCATOR: Cap<alloc::System> = Cap::new(alloc::System, usize::max_value());
static ALLOCATOR: Cap<alloc::System> = Cap::new(alloc::System, usize::MAX);

#[test]
fn usage() {
Expand Down
22 changes: 16 additions & 6 deletions gix-date/src/time/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ use crate::{time::Sign, Time};
/// Serialization with standard `git` format
impl Time {
/// Serialize this instance into memory, similar to what [`write_to()`][Self::write_to()] would do with arbitrary `Write` implementations.
///
/// # Panics
///
/// If the underlying call fails as this instance can't be represented, typically due to an invalid offset.
pub fn to_bstring(&self) -> BString {
let mut buf = Vec::with_capacity(64);
self.write_to(&mut buf).expect("write to memory cannot fail");
Expand All @@ -13,6 +17,18 @@ impl Time {

/// Serialize this instance to `out` in a format suitable for use in header fields of serialized git commits or tags.
pub fn write_to(&self, out: &mut dyn std::io::Write) -> std::io::Result<()> {
const SECONDS_PER_HOUR: u32 = 60 * 60;
let offset = self.offset.unsigned_abs();
let hours = offset / SECONDS_PER_HOUR;
let minutes = (offset - (hours * SECONDS_PER_HOUR)) / 60;

if hours > 99 {
return Err(std::io::Error::new(
std::io::ErrorKind::Other,
"Cannot represent offsets larger than +-9900",
));
}

let mut itoa = itoa::Buffer::new();
out.write_all(itoa.format(self.seconds).as_bytes())?;
out.write_all(b" ")?;
Expand All @@ -23,12 +39,6 @@ impl Time {

const ZERO: &[u8; 1] = b"0";

const SECONDS_PER_HOUR: u32 = 60 * 60;
let offset = self.offset.unsigned_abs();
let hours = offset / SECONDS_PER_HOUR;
assert!(hours < 25, "offset is more than a day: {hours}");
let minutes = (offset - (hours * SECONDS_PER_HOUR)) / 60;

if hours < 10 {
out.write_all(ZERO)?;
}
Expand Down
162 changes: 109 additions & 53 deletions gix-date/tests/time/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use bstr::ByteSlice;
use gix_date::{time::Sign, SecondsSinceUnixEpoch, Time};
use gix_date::Time;

mod baseline;
mod format;
Expand Down Expand Up @@ -32,57 +31,114 @@ fn is_set() {
.is_set());
}

#[test]
fn write_to() -> Result<(), Box<dyn std::error::Error>> {
for (time, expected) in [
(
Time {
seconds: SecondsSinceUnixEpoch::MAX,
offset: 0,
sign: Sign::Minus,
},
"9223372036854775807 -0000",
),
(
Time {
seconds: SecondsSinceUnixEpoch::MIN,
offset: 0,
sign: Sign::Minus,
},
"-9223372036854775808 -0000",
),
(
Time {
seconds: 500,
offset: 9000,
sign: Sign::Plus,
},
"500 +0230",
),
(
Time {
seconds: 189009009,
offset: -36000,
sign: Sign::Minus,
},
"189009009 -1000",
),
(
Time {
seconds: 0,
offset: 0,
sign: Sign::Minus,
},
"0 -0000",
),
] {
let mut output = Vec::new();
time.write_to(&mut output)?;
assert_eq!(output.as_bstr(), expected);
assert_eq!(time.size(), output.len());
mod write_to {
use bstr::ByteSlice;
use gix_date::time::Sign;
use gix_date::{SecondsSinceUnixEpoch, Time};

#[test]
fn invalid() {
let time = Time {
seconds: 0,
offset: (100 * 60 * 60) + 30 * 60,
sign: Sign::Plus,
};
let err = time.write_to(&mut Vec::new()).unwrap_err();
assert_eq!(err.to_string(), "Cannot represent offsets larger than +-9900");
}

let actual = gix_date::parse(&output.as_bstr().to_string(), None).expect("round-trippable");
assert_eq!(time, actual);
#[test]
fn valid_roundtrips() -> Result<(), Box<dyn std::error::Error>> {
for (time, expected) in [
(
Time {
seconds: SecondsSinceUnixEpoch::MAX,
offset: 0,
sign: Sign::Minus,
},
"9223372036854775807 -0000",
),
(
Time {
seconds: SecondsSinceUnixEpoch::MIN,
offset: 0,
sign: Sign::Minus,
},
"-9223372036854775808 -0000",
),
(
Time {
seconds: 500,
offset: 9000,
sign: Sign::Plus,
},
"500 +0230",
),
(
Time {
seconds: 189009009,
offset: -36000,
sign: Sign::Minus,
},
"189009009 -1000",
),
(
Time {
seconds: 0,
offset: 0,
sign: Sign::Minus,
},
"0 -0000",
),
(
Time {
seconds: 0,
offset: -24 * 60 * 60,
sign: Sign::Minus,
},
"0 -2400",
),
(
Time {
seconds: 0,
offset: 24 * 60 * 60,
sign: Sign::Plus,
},
"0 +2400",
),
(
Time {
seconds: 0,
offset: (25 * 60 * 60) + 30 * 60,
sign: Sign::Plus,
},
"0 +2530",
),
(
Time {
seconds: 0,
offset: (-25 * 60 * 60) - 30 * 60,
sign: Sign::Minus,
},
"0 -2530",
),
(
Time {
seconds: 0,
offset: (99 * 60 * 60) + 59 * 60,
sign: Sign::Plus,
},
"0 +9959",
),
] {
let mut output = Vec::new();
time.write_to(&mut output)?;
assert_eq!(output.as_bstr(), expected);
assert_eq!(time.size(), output.len());

let actual = gix_date::parse(&output.as_bstr().to_string(), None).expect("round-trippable");
assert_eq!(time, actual);
}
Ok(())
}
Ok(())
}
Loading

0 comments on commit 04ef31e

Please sign in to comment.