diff --git a/Cargo.lock b/Cargo.lock index cf92d12399e..a89a532b3a8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1858,7 +1858,6 @@ dependencies = [ "git-url", "itertools", "jwalk", - "rayon", "serde", "serde_json", "tempfile", diff --git a/gitoxide-core/Cargo.toml b/gitoxide-core/Cargo.toml index 10589a32f04..61819fc8aef 100644 --- a/gitoxide-core/Cargo.toml +++ b/gitoxide-core/Cargo.toml @@ -18,7 +18,7 @@ default = [] ## Discover all git repositories within a directory. Particularly useful with [skim](https://github.com/lotabout/skim). organize = ["git-url", "jwalk"] ## Derive the amount of time invested into a git repository akin to [git-hours](https://github.com/kimmobrunfeldt/git-hours). -estimate-hours = ["itertools", "rayon", "fs-err"] +estimate-hours = ["itertools", "fs-err"] #! ### Mutually Exclusive Networking #! If both are set, _blocking-client_ will take precedence, allowing `--all-features` to be used. @@ -59,7 +59,6 @@ blocking = { version = "1.0.2", optional = true } git-url = { version = "^0.8.0", path = "../git-url", optional = true } jwalk = { version = "0.6.0", optional = true } -rayon = { version = "1.5.0", optional = true } itertools = { version = "0.10.1", optional = true } fs-err = { version = "2.6.0", optional = true } diff --git a/gitoxide-core/src/hours.rs b/gitoxide-core/src/hours.rs index 5ec57db8eb2..7d08eece44d 100644 --- a/gitoxide-core/src/hours.rs +++ b/gitoxide-core/src/hours.rs @@ -1,6 +1,6 @@ +use std::collections::BTreeSet; use std::{ collections::{hash_map::Entry, HashMap}, - ffi::OsStr, io, path::Path, time::Instant, @@ -8,17 +8,9 @@ use std::{ use anyhow::{anyhow, bail}; use git_repository as git; -use git_repository::{ - actor, - bstr::{BString, ByteSlice}, - interrupt, objs, - prelude::*, - progress, - refs::file::ReferenceExt, - Progress, -}; +use git_repository::bstr::{BStr, BString}; +use git_repository::{actor, bstr::ByteSlice, interrupt, objs, prelude::*, progress, Progress}; use itertools::Itertools; -use rayon::prelude::*; /// Additional configuration for the hours estimation functionality. pub struct Context { @@ -41,7 +33,7 @@ pub struct Context { /// * _progress_ - A way to provide progress and performance information pub fn estimate( working_dir: &Path, - refname: &OsStr, + rev_spec: &BStr, mut progress: P, Context { show_pii, @@ -55,69 +47,92 @@ where P: Progress, { let repo = git::discover(working_dir)?.apply_environment(); - let commit_id = repo - .refs - .find(refname.to_string_lossy().as_ref())? - .peel_to_id_in_place(&repo.refs, |oid, buf| { - repo.objects - .try_find(oid, buf) - .map(|obj| obj.map(|obj| (obj.kind, obj.data))) - })? - .to_owned(); + let commit_id = repo.rev_parse_single(rev_spec)?.detach(); + let mut string_heap = BTreeSet::<&'static [u8]>::new(); let (all_commits, is_shallow) = { - let start = Instant::now(); let mut progress = progress.add_child("Traverse commit graph"); - progress.init(None, progress::count("commits")); - let mut commits: Vec> = Vec::new(); - let commit_iter = interrupt::Iter::new( - commit_id.ancestors(|oid, buf| { - progress.inc(); - repo.objects.find(oid, buf).map(|o| { - commits.push(o.data.to_owned()); - objs::CommitRefIter::from_bytes(o.data) - }) - }), - || anyhow!("Cancelled by user"), - ); - let mut is_shallow = false; - for c in commit_iter { - match c? { - Ok(c) => c, - Err(git::traverse::commit::ancestors::Error::FindExisting { .. }) => { - is_shallow = true; - break; + let string_heap = &mut string_heap; + std::thread::scope( + move |scope| -> anyhow::Result<(Vec>, bool)> { + let start = Instant::now(); + progress.init(None, progress::count("commits")); + let (tx, rx) = std::sync::mpsc::channel::>(); + let mailmap = repo.open_mailmap(); + + let handle = scope.spawn(move || -> anyhow::Result>> { + let mut out = Vec::new(); + for commit_data in rx { + if let Some(author) = objs::CommitRefIter::from_bytes(&commit_data) + .author() + .map(|author| mailmap.resolve(author.trim())) + .ok() + { + let mut string_ref = |s: &BString| -> &'static BStr { + match string_heap.get(s.as_slice()) { + Some(n) => n.as_bstr(), + None => { + let sv: Vec = s.clone().into(); + string_heap.insert(Box::leak(sv.into_boxed_slice())); + (*string_heap.get(s.as_slice()).expect("present")).as_ref() + } + } + }; + let name = string_ref(&author.name); + let email = string_ref(&author.email); + + out.push(actor::SignatureRef { + name, + email, + time: author.time, + }); + } + } + out.shrink_to_fit(); + out.sort_by(|a, b| { + a.email.cmp(&b.email).then( + a.time + .seconds_since_unix_epoch + .cmp(&b.time.seconds_since_unix_epoch) + .reverse(), + ) + }); + Ok(out) + }); + + let commit_iter = interrupt::Iter::new( + commit_id.ancestors(|oid, buf| { + progress.inc(); + repo.objects.find(oid, buf).map(|o| { + tx.send(o.data.to_owned()).ok(); + objs::CommitRefIter::from_bytes(o.data) + }) + }), + || anyhow!("Cancelled by user"), + ); + let mut is_shallow = false; + for c in commit_iter { + match c? { + Ok(c) => c, + Err(git::traverse::commit::ancestors::Error::FindExisting { .. }) => { + is_shallow = true; + break; + } + Err(err) => return Err(err.into()), + }; } - Err(err) => return Err(err.into()), - }; - } - progress.show_throughput(start); - (commits, is_shallow) + drop(tx); + progress.show_throughput(start); + Ok((handle.join().expect("no panic")?, is_shallow)) + }, + )? }; - let mailmap = repo.open_mailmap(); - let start = Instant::now(); - #[allow(clippy::redundant_closure)] - let mut all_commits: Vec = all_commits - .into_par_iter() - .filter_map(|commit_data: Vec| { - objs::CommitRefIter::from_bytes(&commit_data) - .author() - .map(|author| mailmap.resolve(author.trim())) - .ok() - }) - .collect::>(); - all_commits.sort_by(|a, b| { - a.email.cmp(&b.email).then( - a.time - .seconds_since_unix_epoch - .cmp(&b.time.seconds_since_unix_epoch) - .reverse(), - ) - }); if all_commits.is_empty() { bail!("No commits to process"); } + + let start = Instant::now(); let mut current_email = &all_commits[0].email; let mut slice_start = 0; let mut results_by_hours = Vec::new(); @@ -201,7 +216,7 @@ where const MINUTES_PER_HOUR: f32 = 60.0; const HOURS_PER_WORKDAY: f32 = 8.0; -fn estimate_hours(commits: &[actor::Signature]) -> WorkByEmail { +fn estimate_hours(commits: &[actor::SignatureRef<'static>]) -> WorkByEmail { assert!(!commits.is_empty()); const MAX_COMMIT_DIFFERENCE_IN_MINUTES: f32 = 2.0 * MINUTES_PER_HOUR; const FIRST_COMMIT_ADDITION_IN_MINUTES: f32 = 2.0 * MINUTES_PER_HOUR; @@ -209,7 +224,7 @@ fn estimate_hours(commits: &[actor::Signature]) -> WorkByEmail { let hours = FIRST_COMMIT_ADDITION_IN_MINUTES / 60.0 + commits.iter().rev().tuple_windows().fold( 0_f32, - |hours, (cur, next): (&actor::Signature, &actor::Signature)| { + |hours, (cur, next): (&actor::SignatureRef<'_>, &actor::SignatureRef<'_>)| { let change_in_minutes = (next.time.seconds_since_unix_epoch - cur.time.seconds_since_unix_epoch) as f32 / MINUTES_PER_HOUR; if change_in_minutes < MAX_COMMIT_DIFFERENCE_IN_MINUTES { @@ -221,19 +236,19 @@ fn estimate_hours(commits: &[actor::Signature]) -> WorkByEmail { ); let author = &commits[0]; WorkByEmail { - name: author.name.to_owned(), - email: author.email.to_owned(), + name: author.name, + email: author.email, hours, num_commits: commits.len() as u32, } } -fn deduplicate_identities(persons: &[WorkByEmail]) -> Vec> { - let mut email_to_index = HashMap::<&BString, usize>::with_capacity(persons.len()); - let mut name_to_index = HashMap::<&BString, usize>::with_capacity(persons.len()); - let mut out = Vec::>::with_capacity(persons.len()); +fn deduplicate_identities(persons: &[WorkByEmail]) -> Vec { + let mut email_to_index = HashMap::<&'static BStr, usize>::with_capacity(persons.len()); + let mut name_to_index = HashMap::<&'static BStr, usize>::with_capacity(persons.len()); + let mut out = Vec::::with_capacity(persons.len()); for person_by_email in persons { - match email_to_index.entry(&person_by_email.email) { + match email_to_index.entry(person_by_email.email) { Entry::Occupied(email_entry) => { out[*email_entry.get()].merge(person_by_email); name_to_index.insert(&person_by_email.name, *email_entry.get()); @@ -256,14 +271,14 @@ fn deduplicate_identities(persons: &[WorkByEmail]) -> Vec> { } #[derive(Debug)] -struct WorkByPerson<'a> { - name: Vec<&'a BString>, - email: Vec<&'a BString>, +struct WorkByPerson { + name: Vec<&'static BStr>, + email: Vec<&'static BStr>, hours: f32, num_commits: u32, } -impl<'a> WorkByPerson<'a> { +impl<'a> WorkByPerson { fn merge(&mut self, other: &'a WorkByEmail) { if !self.name.contains(&&other.name) { self.name.push(&other.name); @@ -276,18 +291,18 @@ impl<'a> WorkByPerson<'a> { } } -impl<'a> From<&'a WorkByEmail> for WorkByPerson<'a> { +impl<'a> From<&'a WorkByEmail> for WorkByPerson { fn from(w: &'a WorkByEmail) -> Self { WorkByPerson { - name: vec![&w.name], - email: vec![&w.email], + name: vec![w.name], + email: vec![w.email], hours: w.hours, num_commits: w.num_commits, } } } -impl<'a> WorkByPerson<'a> { +impl WorkByPerson { fn write_to(&self, total_hours: f32, mut out: impl std::io::Write) -> std::io::Result<()> { writeln!( out, @@ -308,8 +323,8 @@ impl<'a> WorkByPerson<'a> { #[derive(Debug)] struct WorkByEmail { - name: BString, - email: BString, + name: &'static BStr, + email: &'static BStr, hours: f32, num_commits: u32, } diff --git a/src/porcelain/main.rs b/src/porcelain/main.rs index 6de13d10f7c..e0656617e30 100644 --- a/src/porcelain/main.rs +++ b/src/porcelain/main.rs @@ -38,7 +38,7 @@ pub fn main() -> Result<()> { Subcommands::Tool(tool) => match tool { crate::porcelain::options::ToolCommands::EstimateHours(crate::porcelain::options::EstimateHours { working_dir, - refname, + rev_spec, no_bots, show_pii, omit_unify_identities, @@ -53,7 +53,7 @@ pub fn main() -> Result<()> { move |progress, out, _err| { hours::estimate( &working_dir, - &refname, + rev_spec.as_ref(), progress, hours::Context { show_pii, diff --git a/src/porcelain/options.rs b/src/porcelain/options.rs index e949fed06c2..5ee5774321f 100644 --- a/src/porcelain/options.rs +++ b/src/porcelain/options.rs @@ -1,4 +1,6 @@ -use std::{ffi::OsString, path::PathBuf}; +use git::bstr::BString; +use git_repository as git; +use std::path::PathBuf; #[derive(Debug, clap::Parser)] #[clap(about = "The rusty git", version = clap::crate_version!())] @@ -93,9 +95,9 @@ pub struct EstimateHours { #[clap(validator_os = validator::is_repo)] #[clap(default_value = ".")] pub working_dir: PathBuf, - /// The name of the ref like 'HEAD' or 'main' at which to start iterating the commit graph. - #[clap(default_value("HEAD"))] - pub refname: OsString, + /// The name of the revision as spec, like 'HEAD' or 'main' at which to start iterating the commit graph. + #[clap(default_value("HEAD"), parse(try_from_os_str = git::env::os_str_to_bstring))] + pub rev_spec: BString, /// Ignore github bots which match the `[bot]` search string. #[clap(short = 'b', long)] pub no_bots: bool,