diff --git a/Cargo.lock b/Cargo.lock index ba7620df846..5e0885aafae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "ahash" version = "0.8.3" @@ -988,6 +999,18 @@ dependencies = [ "regex", ] +[[package]] +name = "fallible-iterator" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fastrand" version = "1.9.0" @@ -1256,7 +1279,7 @@ dependencies = [ "itertools", "jwalk", "mime_guess", - "num_cpus", + "rusqlite", "serde", "serde_json", "tempfile", @@ -2370,6 +2393,9 @@ name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.6", +] [[package]] name = "hashbrown" @@ -2377,6 +2403,15 @@ version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +[[package]] +name = "hashlink" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69fe1fcf8b4278d860ad0548329f892a3631fb63f82574df68275f34cdbe0ffa" +dependencies = [ + "hashbrown 0.12.3", +] + [[package]] name = "heck" version = "0.4.1" @@ -2559,7 +2594,7 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e98c1d0ad70fc91b8b9654b1f33db55e59579d3b3de2bffdced0fdb810570cb8" dependencies = [ - "ahash", + "ahash 0.8.3", "hashbrown 0.12.3", ] @@ -2728,6 +2763,17 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "libsqlite3-sys" +version = "0.25.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29f835d03d717946d28b1d1ed632eb6f0e24a299388ee623d0c23118d3e8a7fa" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + [[package]] name = "libz-ng-sys" version = "1.1.8" @@ -3517,6 +3563,20 @@ dependencies = [ "winapi", ] +[[package]] +name = "rusqlite" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01e213bc3ecb39ac32e81e51ebe31fd888a940515173e3a18a35f8c6e896422a" +dependencies = [ + "bitflags", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "smallvec", +] + [[package]] name = "rustc-hash" version = "1.1.0" diff --git a/Cargo.toml b/Cargo.toml index 816f2a48368..2dcaec93854 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,14 +43,14 @@ prodash-render-line-crossterm = ["prodash-render-line", "prodash/render-line-cro #! These combine common choices of the above features to represent typical builds ## *fast* + *prodash-render-tui-crossterm* + *prodash-render-line-crossterm* + *http* + *gitoxide-core-tools* + *client-networking* -max = ["fast", "pretty-cli", "http-client-curl", "gitoxide-core-tools", "gitoxide-core-blocking-client", "prodash-render-line", "prodash-render-tui", "prodash/render-line-autoconfigure", "gix/regex" ] +max = ["fast", "pretty-cli", "http-client-curl", "gitoxide-core-tools-query", "gitoxide-core-tools", "gitoxide-core-blocking-client", "prodash-render-line", "prodash-render-tui", "prodash/render-line-autoconfigure", "gix/regex" ] ## *fast* + *prodash-render-line-crossterm* + *gitoxide-core-tools* + *client-networking*. -lean = ["fast", "pretty-cli", "http-client-curl", "gitoxide-core-tools", "gitoxide-core-blocking-client", "prodash-render-line" ] +lean = ["fast", "pretty-cli", "http-client-curl", "gitoxide-core-tools-query", "gitoxide-core-tools", "gitoxide-core-blocking-client", "prodash-render-line" ] ## fast* + *prodash-render-line-crossterm* + *gitoxide-core-tools* + *client-async-networking*. ## Due to async client-networking not being implemented for most transports, this one supports only the 'git' transport. ## It uses, however, a fully asynchronous networking implementation which can serve a real-world example on how to implement custom async transports. -lean-async = ["fast", "pretty-cli", "gitoxide-core-tools", "gitoxide-core-async-client", "prodash-render-line"] +lean-async = ["fast", "pretty-cli", "gitoxide-core-tools", "gitoxide-core-tools-query", "gitoxide-core-async-client", "prodash-render-line"] ## As small as it can possibly be, no threading, no fast sha1, line progress only, rust based zlib implementation. ## no networking, local operations only. @@ -64,6 +64,9 @@ max-pure = ["pretty-cli", "gix-features/rustsha1", "gix-features/zlib-rust-backe ## A way to enable all `gitoxide-core` tools found in `gix tools` gitoxide-core-tools = ["gitoxide-core/organize", "gitoxide-core/estimate-hours"] +## A program to perform analytics on a git repository, using an auto-maintained sqlite database +gitoxide-core-tools-query = ["gitoxide-core-tools", "gitoxide-core/query"] + #! #### Mutually Exclusive Networking #! If both are set a compile error is triggered. This also means that `cargo … --all-features` will fail. diff --git a/gitoxide-core/Cargo.toml b/gitoxide-core/Cargo.toml index 5be0de60e88..ad027bf7cff 100644 --- a/gitoxide-core/Cargo.toml +++ b/gitoxide-core/Cargo.toml @@ -16,9 +16,11 @@ default = [] #! ### Tools ## Discover all git repositories within a directory. Particularly useful with [skim](https://github.com/lotabout/skim). -organize = ["gix-url", "jwalk"] +organize = ["dep:gix-url", "dep:jwalk"] ## Derive the amount of time invested into a git repository akin to [git-hours](https://github.com/kimmobrunfeldt/git-hours). -estimate-hours = ["itertools", "fs-err", "num_cpus", "crossbeam-channel", "mime_guess"] +estimate-hours = ["dep:itertools", "dep:fs-err", "dep:crossbeam-channel", "dep:mime_guess"] +## Gather information about repositories and store it in a database for easy querying. +query = ["dep:rusqlite"] #! ### Mutually Exclusive Networking #! If both are set, _blocking-client_ will take precedence, allowing `--all-features` to be used. @@ -62,10 +64,12 @@ jwalk = { version = "0.8.0", optional = true } # for 'hours' itertools = { version = "0.10.1", optional = true } fs-err = { version = "2.6.0", optional = true } -num_cpus = { version = "1.13.1", optional = true } crossbeam-channel = { version = "0.5.6", optional = true } mime_guess = { version = "2.0.4", optional = true } +# for 'query' +rusqlite = { version = "0.28.0", optional = true, features = ["bundled"] } + document-features = { version = "0.2.0", optional = true } [package.metadata.docs.rs] diff --git a/gitoxide-core/src/hours/mod.rs b/gitoxide-core/src/hours/mod.rs index 83eb6714da4..ef914fe3624 100644 --- a/gitoxide-core/src/hours/mod.rs +++ b/gitoxide-core/src/hours/mod.rs @@ -57,10 +57,7 @@ where let commit_id = repo.rev_parse_single(rev_spec)?.detach(); let mut string_heap = BTreeSet::<&'static [u8]>::new(); let needs_stats = file_stats || line_stats; - let threads = { - let t = threads.unwrap_or_else(num_cpus::get); - (t == 0).then(num_cpus::get_physical).unwrap_or(t) - }; + let threads = gix::features::parallel::num_threads(threads); let (commit_authors, stats, is_shallow, skipped_merge_commits) = { let stat_progress = needs_stats.then(|| progress.add_child("extract stats")).map(|mut p| { @@ -236,6 +233,8 @@ where (true, true) => { files.modified += 1; if line_stats { + // TODO: replace this with proper git-attributes - this isn't + // really working, can't see shell scripts for example. let is_text_file = mime_guess::from_path( gix::path::from_bstr(change.location) .as_ref(), diff --git a/gitoxide-core/src/lib.rs b/gitoxide-core/src/lib.rs index 0d8dac5a892..4664386bee2 100644 --- a/gitoxide-core/src/lib.rs +++ b/gitoxide-core/src/lib.rs @@ -73,6 +73,8 @@ pub mod mailmap; #[cfg(feature = "organize")] pub mod organize; pub mod pack; +#[cfg(feature = "query")] +pub mod query; pub mod repository; #[cfg(all(feature = "async-client", feature = "blocking-client"))] diff --git a/gitoxide-core/src/query/db.rs b/gitoxide-core/src/query/db.rs new file mode 100644 index 00000000000..df1099e50ba --- /dev/null +++ b/gitoxide-core/src/query/db.rs @@ -0,0 +1,68 @@ +use anyhow::Context; +use rusqlite::{params, OptionalExtension}; + +/// A version to be incremented whenever the database layout is changed, to refresh it automatically. +const VERSION: usize = 1; + +pub fn create(path: impl AsRef) -> anyhow::Result { + let path = path.as_ref(); + let mut con = rusqlite::Connection::open(path)?; + let meta_table = r#" + CREATE TABLE if not exists meta( + version int + )"#; + con.execute_batch(meta_table)?; + let version: Option = con.query_row("SELECT version FROM meta", [], |r| r.get(0)).optional()?; + match version { + None => { + con.execute("INSERT into meta(version) values(?)", params![VERSION])?; + } + Some(version) if version != VERSION => match con.close() { + Ok(()) => { + std::fs::remove_file(path) + .with_context(|| format!("Failed to remove incompatible database file at {path:?}"))?; + con = rusqlite::Connection::open(path)?; + con.execute_batch(meta_table)?; + con.execute("INSERT into meta(version) values(?)", params![VERSION])?; + } + Err((_, err)) => return Err(err.into()), + }, + _ => {} + } + con.execute_batch( + r#" + CREATE TABLE if not exists commits( + hash blob(20) NOT NULL PRIMARY KEY + ) + "#, + )?; + // Files are stored as paths which also have an id for referencing purposes + con.execute_batch( + r#" + CREATE TABLE if not exists files( + file_id integer NOT NULL PRIMARY KEY, + file_path text UNIQUE + ) + "#, + )?; + con.execute_batch( + r#" + CREATE TABLE if not exists commit_file( + hash blob(20), + file_id text, + has_diff boolean NOT NULL, + lines_added integer NOT NULL, + lines_removed integer NOT NULL, + lines_before integer NOT NULL, + lines_after integer NOT NULL, + mode integer, + source_file_id integer, + FOREIGN KEY (hash) REFERENCES commits (hash), + FOREIGN KEY (file_id) REFERENCES files (file_id), + PRIMARY KEY (hash, file_id) + ) + "#, + )?; + + Ok(con) +} diff --git a/gitoxide-core/src/query/engine/command.rs b/gitoxide-core/src/query/engine/command.rs new file mode 100644 index 00000000000..ab1264bf4a2 --- /dev/null +++ b/gitoxide-core/src/query/engine/command.rs @@ -0,0 +1,183 @@ +use crate::query; +use crate::query::engine::update::FileMode; +use crate::query::Command; +use anyhow::Context; +use gix::bstr::ByteSlice; +use gix::Progress; +use rusqlite::{params, OptionalExtension}; +use std::collections::HashMap; + +impl query::Engine { + pub fn run( + &self, + cmd: Command, + mut out: impl std::io::Write, + mut progress: impl gix::Progress, + ) -> anyhow::Result<()> { + match cmd { + Command::TracePath { mut spec } => { + if let Some(prefix) = self.repo.prefix() { + spec.apply_prefix(&prefix?); + }; + let relpath = spec.items().next().expect("spec has at least one item"); + let file_id: usize = self + .con + .query_row( + "SELECT file_id FROM files WHERE file_path = ?", + params![relpath.to_str_lossy()], + |r| r.get(0), + ) + .optional()? + .context("Path not found anywhere in recorded history")?; + + let mut by_file_id = self + .con + .prepare("SELECT hash, mode, source_file_id, has_diff, lines_added, lines_removed from commit_file where file_id = ? order by mode")?; + let mut path_by_id = self.con.prepare("SELECT file_path from files where file_id = ?")?; + let mut seen = HashMap::::new(); + seen.insert(file_id, relpath.to_string()); + + let mut stack = vec![file_id]; + let mut info = Vec::new(); + let start = std::time::Instant::now(); + let mut progress = progress.add_child("run sql query"); + progress.init(None, gix::progress::count("round")); + while let Some(file_id) = stack.pop() { + let rows = by_file_id.query_map([file_id], |r| { + Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?, r.get(4)?, r.get(5)?)) + })?; + progress.inc(); + for row in rows { + let (hash, mode, source_file_id, has_diff, lines_added, lines_removed): ( + [u8; 20], + usize, + Option, + bool, + usize, + usize, + ) = row?; + let id = gix::ObjectId::from(hash); + let mode = FileMode::from_usize(mode).context("invalid file mode")?; + info.push(trace_path::Info { + id, + file_id, + mode, + diff: has_diff.then_some(trace_path::Diff { + lines_added, + lines_removed, + }), + source_file_id, + }); + if let Some(source_id) = source_file_id { + if let std::collections::hash_map::Entry::Vacant(e) = seen.entry(source_id) { + stack.push(source_id); + e.insert(path_by_id.query_row([source_id], |r| r.get(0))?); + } + } + } + } + + info.sort_by(|a, b| a.id.cmp(&b.id)); + let max_diff_lines = info + .iter() + .map(|i| i.diff.map_or(0, |d| d.lines_removed + d.lines_added)) + .max() + .unwrap_or_default(); + let mut found = 0; + progress.show_throughput(start); + let start = std::time::Instant::now(); + progress.init(Some(self.commits.len()), gix::progress::count("commits")); + progress.set_name("associate info"); + for info in self + .commits + .iter() + .inspect(|_| progress.inc()) + .filter_map(|c| info.binary_search_by(|i| i.id.cmp(c)).ok().map(|idx| &info[idx])) + { + found += 1; + info.write_to(&mut out, &self.repo, &seen, max_diff_lines)?; + } + let missing = info.len() - found; + progress.show_throughput(start); + if missing > 0 { + writeln!( + out, + "{missing} file(s) were found in history that are not reachable from HEAD" + )?; + } + Ok(()) + } + } + } +} + +mod trace_path { + use crate::query::engine::update::FileMode; + use gix::prelude::ObjectIdExt; + use std::collections::HashMap; + + #[derive(Debug, Default, Copy, Clone)] + pub struct Diff { + pub lines_added: usize, + pub lines_removed: usize, + } + + impl Diff { + fn format(&self, max_diff_lines: usize) -> String { + const NUM_CHARS: f32 = 10.0; + let mut buf = String::with_capacity(NUM_CHARS as usize); + if max_diff_lines != 0 { + let num_plus = ((self.lines_added as f32 / max_diff_lines as f32) * NUM_CHARS).ceil() as usize; + let num_minus = ((self.lines_removed as f32 / max_diff_lines as f32) * NUM_CHARS) as usize; + buf.extend((0..num_plus).map(|_| '+')); + buf.extend((0..num_minus).map(|_| '-')); + } + buf.extend((buf.len()..NUM_CHARS as usize).map(|_| ' ')); + buf + } + } + + #[derive(Debug)] + pub struct Info { + pub id: gix::ObjectId, + pub file_id: usize, + pub mode: FileMode, + pub diff: Option, + pub source_file_id: Option, + } + + impl Info { + pub fn write_to( + &self, + mut out: impl std::io::Write, + repo: &gix::Repository, + path_by_id: &HashMap, + max_diff_lines: usize, + ) -> std::io::Result<()> { + let id = self.id.attach(repo); + match self.source_file_id { + Some(source_id) => { + writeln!( + out, + "{}|{} {} {} ➡ {}", + self.diff.unwrap_or_default().format(max_diff_lines), + id.shorten_or_id(), + self.mode.as_str(), + path_by_id[&source_id], + path_by_id[&self.file_id], + ) + } + None => { + writeln!( + out, + "{}|{} {} {}", + self.diff.unwrap_or_default().format(max_diff_lines), + id.shorten_or_id(), + self.mode.as_str(), + path_by_id[&self.file_id] + ) + } + } + } + } +} diff --git a/gitoxide-core/src/query/engine/mod.rs b/gitoxide-core/src/query/engine/mod.rs new file mode 100644 index 00000000000..60d46397ac4 --- /dev/null +++ b/gitoxide-core/src/query/engine/mod.rs @@ -0,0 +1,11 @@ +pub enum Command { + TracePath { + /// The repo-relative path to the file to trace + spec: gix::path::Spec, + }, +} + +pub(crate) mod update; +pub use update::update; + +mod command; diff --git a/gitoxide-core/src/query/engine/update.rs b/gitoxide-core/src/query/engine/update.rs new file mode 100644 index 00000000000..6c74769993e --- /dev/null +++ b/gitoxide-core/src/query/engine/update.rs @@ -0,0 +1,501 @@ +use crate::query::Options; +use anyhow::{anyhow, bail}; +use gix::bstr::{BStr, BString, ByteSlice}; +use gix::features::progress; +use gix::object::tree::diff::rewrites::CopySource; +use gix::odb::FindExt; +use gix::parallel::{InOrderIter, SequenceId}; +use gix::prelude::ObjectIdExt; +use gix::Progress; +use rusqlite::{params, Statement, Transaction}; +use std::convert::Infallible; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::time::Instant; + +pub fn update( + repo: &gix::Repository, + con: &mut rusqlite::Connection, + progress: &mut impl gix::Progress, + Options { + object_cache_size_mb, + find_copies_harder, + threads, + }: Options, +) -> anyhow::Result> { + let commit_id = repo.head_id()?.detach(); + let threads = gix::features::parallel::num_threads(threads); + + let mut stat_progress = { + let mut p = progress.add_child("extract stats"); + p.init(None, progress::count("commits")); + p + }; + let stat_counter = stat_progress.counter().expect("shared counter available"); + + let mut db_progress = { + let mut p = progress.add_child("db cache"); + p.init(None, progress::count("events")); + p + }; + let commit_counter = db_progress.counter().expect("shared counter available"); + + let mut change_progress = { + let mut p = progress.add_child("find changes"); + p.init(None, progress::count("modified files")); + p + }; + let change_counter = change_progress.counter().expect("shared counter available"); + + let mut lines_progress = { + let mut p = progress.add_child("find changes"); + p.init(None, progress::count("diff lines")); + p + }; + let lines_counter = lines_progress.counter().expect("shared counter available"); + + let mut traverse_progress = progress.add_child("traverse commit graph"); + traverse_progress.init(None, progress::count("commits")); + + let out = std::thread::scope(|scope| -> anyhow::Result<_> { + struct Commit { + id: gix::hash::ObjectId, + } + struct CommitDiffStats { + /// The id of the commit which was diffed with its predecessor + id: gix::hash::ObjectId, + changes: Vec, + } + let start = Instant::now(); + let (tx_commits, rx_commits) = std::sync::mpsc::channel::(); + let (tx_stats, rx_stats) = std::sync::mpsc::channel::), Infallible>>(); + + let mut known_commits = + Vec::with_capacity(con.query_row("SELECT COUNT(hash) from commits", [], |r| r.get::<_, usize>(0))?); + for item in con.prepare("SELECT hash from commits")?.query_map([], |r| { + Ok(gix::ObjectId::try_from(r.get_ref(0)?.as_bytes()?) + .unwrap_or_else(|_| gix::ObjectId::null(gix::hash::Kind::Sha1))) + })? { + known_commits.push(item?); + } + known_commits.sort(); + + let db_thread = scope.spawn({ + move || -> anyhow::Result<()> { + let trans = con.transaction()?; + { + let Updates { + mut new_commit, + mut insert_commit_file, + mut insert_commit_file_with_source, + mut insert_file_path, + } = Updates::new(&trans)?; + for Commit { id } in rx_commits { + new_commit.execute(params![id.as_bytes()])?; + commit_counter.fetch_add(1, Ordering::Relaxed); + } + for stats in InOrderIter::from(rx_stats.into_iter()) { + for CommitDiffStats { id, changes } in stats.expect("infallible") { + new_commit.execute(params![id.as_bytes()])?; + for change in changes { + insert_file_path.execute(params![change.relpath.to_str_lossy()])?; + let (has_diff, lines) = change.lines.map(|l| (true, l)).unwrap_or_default(); + if let Some(source_relpath) = change.source_relpath { + insert_file_path.execute(params![source_relpath.to_str_lossy()])?; + insert_commit_file_with_source.execute(params![ + id.as_bytes(), + change.relpath.to_str_lossy(), + has_diff, + lines.added, + lines.removed, + lines.before, + lines.after, + change.mode as usize, + source_relpath.to_str_lossy(), + ])?; + } else { + insert_commit_file.execute(params![ + id.as_bytes(), + change.relpath.to_str_lossy(), + has_diff, + lines.added, + lines.removed, + lines.before, + lines.after, + change.mode as usize, + ])?; + } + } + commit_counter.fetch_add(1, Ordering::Relaxed); + } + } + } + trans.commit()?; + Ok(()) + } + }); + + let rewrites = { + let mut r = + gix::object::tree::diff::Rewrites::try_from_config(&repo.config_snapshot(), true)?.unwrap_or_default(); + r.copies = Some(gix::object::tree::diff::rewrites::Copies { + source: if find_copies_harder { + CopySource::FromSetOfModifiedFilesAndSourceTree + } else { + CopySource::FromSetOfModifiedFiles + }, + percentage: None, + }); + r + }; + let (tx_tree_ids, stat_threads) = { + let (tx, rx) = + crossbeam_channel::unbounded::<(SequenceId, Vec<(Option, gix::hash::ObjectId)>)>(); + let stat_workers = (0..threads) + .map(|_| { + scope.spawn({ + let stat_counter = stat_counter.clone(); + let change_counter = change_counter.clone(); + let lines_counter = lines_counter.clone(); + let tx_stats = tx_stats.clone(); + let mut repo = repo.clone(); + repo.object_cache_size_if_unset((object_cache_size_mb * 1024 * 1024) / threads); + let rx = rx.clone(); + move || -> anyhow::Result<()> { + for (chunk_id, chunk) in rx { + let mut out_chunk = Vec::with_capacity(chunk.len()); + for (parent_commit, commit) in chunk { + stat_counter.fetch_add(1, Ordering::SeqCst); + if gix::interrupt::is_triggered() { + return Ok(()); + } + let mut out = Vec::new(); + let from = match parent_commit { + Some(id) => { + match repo.find_object(id).ok().and_then(|c| c.peel_to_tree().ok()) { + Some(tree) => tree, + None => continue, + } + } + None => repo.empty_tree(), + }; + let to = match repo.find_object(commit).ok().and_then(|c| c.peel_to_tree().ok()) { + Some(c) => c, + None => continue, + }; + from.changes()? + .track_path() + .track_rewrites(Some(rewrites)) + .for_each_to_obtain_tree(&to, |change| { + use gix::object::tree::diff::change::Event::*; + change_counter.fetch_add(1, Ordering::SeqCst); + match change.event { + Addition { entry_mode, id } => { + if entry_mode.is_blob_or_symlink() { + add_lines(&mut out, change.location, &lines_counter, id); + } + } + Modification { + entry_mode, + previous_entry_mode, + id, + previous_id, + } => match (previous_entry_mode.is_blob(), entry_mode.is_blob()) { + (false, false) => {} + (false, true) => { + add_lines(&mut out, change.location, &lines_counter, id); + } + (true, false) => { + add_lines( + &mut out, + change.location, + &lines_counter, + previous_id, + ); + } + (true, true) => { + // TODO: use git attributes here to know if it's a binary file or not. + if let Some(Ok(diff)) = change.event.diff() { + let mut nl = 0; + let tokens = diff.line_tokens(); + let counts = gix::diff::blob::diff( + diff.algo, + &tokens, + gix::diff::blob::sink::Counter::default(), + ); + nl += counts.insertions as usize + counts.removals as usize; + let lines = LineStats { + added: counts.insertions as usize, + removed: counts.removals as usize, + before: tokens.before.len(), + after: tokens.after.len(), + }; + lines_counter.fetch_add(nl, Ordering::SeqCst); + out.push(FileChange { + relpath: change.location.to_owned(), + mode: FileMode::Modified, + source_relpath: None, + lines: Some(lines), + }); + } + } + }, + Deletion { entry_mode, id } => { + if entry_mode.is_blob_or_symlink() { + remove_lines(&mut out, change.location, &lines_counter, id); + } + } + Rewrite { + source_location, + diff, + copy, + .. + } => { + out.push(FileChange { + relpath: change.location.to_owned(), + source_relpath: Some(source_location.to_owned()), + mode: if copy { FileMode::Copy } else { FileMode::Rename }, + lines: diff.map(|d| LineStats { + added: d.insertions as usize, + removed: d.removals as usize, + before: d.before as usize, + after: d.after as usize, + }), + }); + } + } + Ok::<_, Infallible>(Default::default()) + })?; + out_chunk.push(CommitDiffStats { + id: commit, + changes: out, + }); + } + if tx_stats.send(Ok((chunk_id, out_chunk))).is_err() { + break; + } + } + Ok(()) + } + }) + }) + .collect::>(); + (tx, stat_workers) + }; + drop(tx_stats); + + let mut skipped_merge_commits = 0; + const CHUNK_SIZE: usize = 50; + let mut chunk = Vec::with_capacity(CHUNK_SIZE); + let mut chunk_id: SequenceId = 0; + let commit_iter = gix::interrupt::Iter::new( + commit_id.ancestors(|oid, buf| -> Result<_, gix::object::find::existing::Error> { + let obj = repo.objects.find(oid, buf)?; + traverse_progress.inc(); + if known_commits.binary_search(&oid.to_owned()).is_err() { + let res = { + let mut parents = gix::objs::CommitRefIter::from_bytes(obj.data).parent_ids(); + let res = parents.next().map(|first_parent| (Some(first_parent), oid.to_owned())); + match parents.next() { + Some(_) => { + skipped_merge_commits += 1; + None + } + None => res, + } + }; + if let Some((first_parent, commit)) = res { + if chunk.len() == CHUNK_SIZE { + tx_tree_ids + .send((chunk_id, std::mem::replace(&mut chunk, Vec::with_capacity(CHUNK_SIZE)))) + .ok(); + chunk_id += 1; + } + chunk.push((first_parent, commit)); + } else { + tx_commits.send(Commit { id: oid.to_owned() }).ok(); + } + } + Ok(gix::objs::CommitRefIter::from_bytes(obj.data)) + }), + || anyhow!("Cancelled by user"), + ); + let mut commits = Vec::new(); + for c in commit_iter { + match c? { + Ok(c) => { + commits.push(c); + } + Err(gix::traverse::commit::ancestors::Error::FindExisting { .. }) => { + eprintln!("shallow repository - commit history is truncated"); + break; + } + Err(err) => return Err(err.into()), + }; + } + drop(tx_commits); + tx_tree_ids.send((chunk_id, chunk)).ok(); + drop(tx_tree_ids); + let saw_new_commits = !commits.is_empty(); + if saw_new_commits { + traverse_progress.show_throughput(start); + } + drop(traverse_progress); + + let stat_max = Some(commits.len()); + stat_progress.set_max(stat_max); + db_progress.set_max(stat_max); + for handle in stat_threads { + handle.join().expect("no panic")?; + if gix::interrupt::is_triggered() { + bail!("Cancelled by user"); + } + } + if saw_new_commits { + stat_progress.show_throughput(start); + change_progress.show_throughput(start); + lines_progress.show_throughput(start); + } + + db_thread.join().expect("no panic")?; + if saw_new_commits { + db_progress.show_throughput(start); + } else { + db_progress.info("up to date"); + } + Ok(commits) + })?; + + Ok(out) +} + +fn add_lines(out: &mut Vec, path: &BStr, lines_counter: &AtomicUsize, id: gix::Id<'_>) { + if let Ok(blob) = id.object() { + let nl = blob.data.lines_with_terminator().count(); + let mut lines = LineStats::default(); + lines.added += nl; + lines.after = nl; + lines_counter.fetch_add(nl, Ordering::SeqCst); + out.push(FileChange { + relpath: path.to_owned(), + mode: FileMode::Added, + source_relpath: None, + lines: Some(lines), + }); + } +} + +fn remove_lines(out: &mut Vec, path: &BStr, lines_counter: &AtomicUsize, id: gix::Id<'_>) { + if let Ok(blob) = id.object() { + let mut lines = LineStats::default(); + let nl = blob.data.lines_with_terminator().count(); + lines.removed += nl; + lines.before = nl; + lines_counter.fetch_add(nl, Ordering::SeqCst); + out.push(FileChange { + relpath: path.to_owned(), + mode: FileMode::Removed, + source_relpath: None, + lines: Some(lines), + }) + } +} + +#[derive(Debug, Copy, Clone)] +pub enum FileMode { + Added = 1, + Removed = 2, + Modified = 3, + Rename = 4, + Copy = 5, +} + +impl FileMode { + pub fn as_str(&self) -> &'static str { + use FileMode::*; + match self { + Added => "+", + Removed => "-", + Modified => "Δ", + Rename => "➡", + Copy => "⏸", + } + } + pub fn from_usize(mode: usize) -> Option { + use FileMode::*; + match mode { + 1 => Added, + 2 => Removed, + 3 => Modified, + 4 => Rename, + 5 => Copy, + _ => return None, + } + .into() + } +} + +#[derive(Debug)] +struct FileChange { + relpath: BString, + mode: FileMode, + source_relpath: Option, + lines: Option, +} + +/// Line statistics for a particular commit. +#[derive(Debug, Default, Copy, Clone)] +struct LineStats { + /// amount of added lines + added: usize, + /// amount of removed lines + removed: usize, + /// the amount of lines before the change. + before: usize, + /// the amount of lines after the change. + after: usize, +} + +struct Updates<'a> { + new_commit: Statement<'a>, + insert_commit_file: Statement<'a>, + insert_commit_file_with_source: Statement<'a>, + insert_file_path: Statement<'a>, +} + +impl<'a> Updates<'a> { + fn new(trans: &'a Transaction<'_>) -> rusqlite::Result { + let new_commit = trans.prepare( + r#"INSERT INTO + commits(hash) + VALUES(?)"#, + )?; + let insert_commit_file = trans.prepare( + r#" + INSERT INTO + commit_file(hash, file_id, has_diff, lines_added, lines_removed, lines_before, lines_after, mode) + VALUES(?, (SELECT files.file_id FROM files WHERE files.file_path = ?), ?, ?, ?, ?, ?, ?) + "#, + )?; + let insert_commit_file_with_source = trans.prepare( + r#" + INSERT INTO + commit_file(hash, file_id, has_diff, lines_added, lines_removed, lines_before, lines_after, mode, source_file_id) + VALUES(?, (SELECT files.file_id FROM files WHERE files.file_path = ?), ?, ?, ?, ?, ?, ?, (SELECT files.file_id FROM files WHERE files.file_path = ?)) + "#, + )?; + + let insert_file_path = trans.prepare( + r#" + INSERT OR IGNORE INTO + files(file_path) + VALUES(?) + "#, + )?; + Ok(Updates { + new_commit, + insert_commit_file, + insert_commit_file_with_source, + insert_file_path, + }) + } +} diff --git a/gitoxide-core/src/query/mod.rs b/gitoxide-core/src/query/mod.rs new file mode 100644 index 00000000000..6583d12c95b --- /dev/null +++ b/gitoxide-core/src/query/mod.rs @@ -0,0 +1,23 @@ +pub struct Engine { + repo: gix::Repository, + con: rusqlite::Connection, + commits: Vec, +} + +pub struct Options { + pub object_cache_size_mb: usize, + pub find_copies_harder: bool, + pub threads: Option, +} + +mod db; + +mod engine; +pub use engine::Command; + +pub fn prepare(repo_dir: &std::path::Path, mut progress: impl gix::Progress, opts: Options) -> anyhow::Result { + let repo = gix::discover(repo_dir)?; + let mut con = db::create(repo.git_dir().join("ein.query"))?; + let commits = engine::update(&repo, &mut con, &mut progress, opts)?; + Ok(Engine { repo, con, commits }) +} diff --git a/gitoxide-core/src/repository/tree.rs b/gitoxide-core/src/repository/tree.rs index cbe9b913352..0f5aec74a72 100644 --- a/gitoxide-core/src/repository/tree.rs +++ b/gitoxide-core/src/repository/tree.rs @@ -2,7 +2,7 @@ use std::{borrow::Cow, io}; use anyhow::bail; -use gix::{prelude::ObjectIdExt, Tree}; +use gix::Tree; use crate::OutputFormat; @@ -176,13 +176,10 @@ pub fn entries( } fn treeish_to_tree<'repo>(treeish: Option<&str>, repo: &'repo gix::Repository) -> anyhow::Result> { - Ok(match treeish { - Some(hex) => gix::hash::ObjectId::from_hex(hex.as_bytes()) - .map(|id| id.attach(repo))? - .object()? - .try_into_tree()?, - None => repo.head()?.peel_to_commit_in_place()?.tree()?, - }) + let spec = treeish + .map(|spec| format!("{spec}^{{tree}}")) + .unwrap_or_else(|| "@^{tree}".into()); + Ok(repo.rev_parse_single(spec.as_str())?.object()?.into_tree()) } fn format_entry( diff --git a/gix-diff/src/tree/changes.rs b/gix-diff/src/tree/changes.rs index cd27a4ecb8f..d7d8d38bfef 100644 --- a/gix-diff/src/tree/changes.rs +++ b/gix-diff/src/tree/changes.rs @@ -1,6 +1,7 @@ use std::{borrow::BorrowMut, collections::VecDeque}; use gix_hash::{oid, ObjectId}; +use gix_object::tree::EntryRef; use crate::{ tree, @@ -107,7 +108,7 @@ impl<'a> tree::Changes<'a> { (Some(lhs), Some(rhs)) => { use std::cmp::Ordering::*; let (lhs, rhs) = (lhs?, rhs?); - match lhs.filename.cmp(rhs.filename) { + match compare(&lhs, &rhs) { Equal => handle_lhs_and_rhs_with_equal_filenames(lhs, rhs, &mut state.trees, delegate)?, Less => catchup_lhs_with_rhs(&mut lhs_entries, lhs, rhs, &mut state.trees, delegate)?, Greater => catchup_rhs_with_lhs(&mut rhs_entries, lhs, rhs, &mut state.trees, delegate)?, @@ -126,8 +127,17 @@ impl<'a> tree::Changes<'a> { } } +fn compare(a: &EntryRef<'_>, b: &EntryRef<'_>) -> std::cmp::Ordering { + let common = a.filename.len().min(b.filename.len()); + a.filename[..common].cmp(&b.filename[..common]).then_with(|| { + let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/')); + let b = b.filename.get(common).or_else(|| b.mode.is_tree().then_some(&b'/')); + a.cmp(&b) + }) +} + fn delete_entry_schedule_recursion( - entry: gix_object::tree::EntryRef<'_>, + entry: EntryRef<'_>, queue: &mut VecDeque, delegate: &mut R, ) -> Result<(), Error> { @@ -150,7 +160,7 @@ fn delete_entry_schedule_recursion( } fn add_entry_schedule_recursion( - entry: gix_object::tree::EntryRef<'_>, + entry: EntryRef<'_>, queue: &mut VecDeque, delegate: &mut R, ) -> Result<(), Error> { @@ -173,8 +183,8 @@ fn add_entry_schedule_recursion( } fn catchup_rhs_with_lhs( rhs_entries: &mut IteratorType>, - lhs: gix_object::tree::EntryRef<'_>, - rhs: gix_object::tree::EntryRef<'_>, + lhs: EntryRef<'_>, + rhs: EntryRef<'_>, queue: &mut VecDeque, delegate: &mut R, ) -> Result<(), Error> { @@ -182,7 +192,7 @@ fn catchup_rhs_with_lhs( add_entry_schedule_recursion(rhs, queue, delegate)?; loop { match rhs_entries.peek() { - Some(Ok(rhs)) => match lhs.filename.cmp(rhs.filename) { + Some(Ok(rhs)) => match compare(&lhs, rhs) { Equal => { let rhs = rhs_entries.next().transpose()?.expect("the peeked item to be present"); delegate.pop_path_component(); @@ -213,8 +223,8 @@ fn catchup_rhs_with_lhs( fn catchup_lhs_with_rhs( lhs_entries: &mut IteratorType>, - lhs: gix_object::tree::EntryRef<'_>, - rhs: gix_object::tree::EntryRef<'_>, + lhs: EntryRef<'_>, + rhs: EntryRef<'_>, queue: &mut VecDeque, delegate: &mut R, ) -> Result<(), Error> { @@ -222,7 +232,7 @@ fn catchup_lhs_with_rhs( delete_entry_schedule_recursion(lhs, queue, delegate)?; loop { match lhs_entries.peek() { - Some(Ok(lhs)) => match lhs.filename.cmp(rhs.filename) { + Some(Ok(lhs)) => match compare(lhs, &rhs) { Equal => { let lhs = lhs_entries.next().expect("the peeked item to be present")?; delegate.pop_path_component(); @@ -252,8 +262,8 @@ fn catchup_lhs_with_rhs( } fn handle_lhs_and_rhs_with_equal_filenames( - lhs: gix_object::tree::EntryRef<'_>, - rhs: gix_object::tree::EntryRef<'_>, + lhs: EntryRef<'_>, + rhs: EntryRef<'_>, queue: &mut VecDeque, delegate: &mut R, ) -> Result<(), Error> { @@ -275,7 +285,7 @@ fn handle_lhs_and_rhs_with_equal_filenames( } queue.push_back((Some(lhs.oid.to_owned()), Some(rhs.oid.to_owned()))); } - (lhs_mode, Tree) if lhs_mode.is_no_tree() => { + (_, Tree) => { delegate.push_back_tracked_path_component(lhs.filename); if delegate .visit(Change::Deletion { @@ -297,7 +307,7 @@ fn handle_lhs_and_rhs_with_equal_filenames( }; queue.push_back((None, Some(rhs.oid.to_owned()))); } - (Tree, rhs_mode) if rhs_mode.is_no_tree() => { + (Tree, _) => { delegate.push_back_tracked_path_component(lhs.filename); if delegate .visit(Change::Deletion { @@ -344,3 +354,41 @@ type IteratorType = std::mem::ManuallyDrop>; fn peekable(iter: I) -> IteratorType { std::mem::ManuallyDrop::new(iter.peekable()) } + +#[cfg(test)] +mod tests { + use super::*; + use gix_object::tree::EntryMode; + use std::cmp::Ordering; + + #[test] + fn compare_select_samples() { + let null = gix_hash::ObjectId::null(gix_hash::Kind::Sha1); + let actual = compare( + &EntryRef { + mode: EntryMode::Blob, + filename: "plumbing-cli.rs".into(), + oid: &null, + }, + &EntryRef { + mode: EntryMode::Tree, + filename: "plumbing".into(), + oid: &null, + }, + ); + assert_eq!(actual, Ordering::Less); + let actual = compare( + &EntryRef { + mode: EntryMode::Tree, + filename: "plumbing-cli.rs".into(), + oid: &null, + }, + &EntryRef { + mode: EntryMode::Blob, + filename: "plumbing".into(), + oid: &null, + }, + ); + assert_eq!(actual, Ordering::Greater); + } +} diff --git a/gix-diff/tests/fixtures/generated-archives/make_diff_repo.tar.xz b/gix-diff/tests/fixtures/generated-archives/make_diff_repo.tar.xz index e7431a40213..e75187a3c7d 100644 --- a/gix-diff/tests/fixtures/generated-archives/make_diff_repo.tar.xz +++ b/gix-diff/tests/fixtures/generated-archives/make_diff_repo.tar.xz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:61576762112bf19aaafc9cb0a6dbc65cc50fc7f1fb189b2c0d3584e21333a155 -size 17188 +oid sha256:86d0447c83f1a348e93719f9d782784a0ced7a198ea28a94f6cccbd928d81b3d +size 18472 diff --git a/gix-diff/tests/fixtures/make_diff_repo.sh b/gix-diff/tests/fixtures/make_diff_repo.sh index df4d5249165..782b14b2a63 100755 --- a/gix-diff/tests/fixtures/make_diff_repo.sh +++ b/gix-diff/tests/fixtures/make_diff_repo.sh @@ -94,3 +94,10 @@ rm g/aa touch g/a git add g/a git commit -qam 'rm g/aa, add g/a' + +rm -Rf ./* && mkdir git-sec gix && touch a git-sec/2 git-sequencer h gix/5 && git add . +git commit -am "clear slate" + +git mv git-sec gix-sec && git commit -m "interesting rename 1" + +git mv gix-sec git-sec && git commit -m "interesting rename 2" diff --git a/gix-diff/tests/tree/mod.rs b/gix-diff/tests/tree/mod.rs index b9f8b942598..370dd0d6fd2 100644 --- a/gix-diff/tests/tree/mod.rs +++ b/gix-diff/tests/tree/mod.rs @@ -5,6 +5,7 @@ mod changes { use gix_hash::{oid, ObjectId}; use gix_object::{bstr::ByteSlice, tree::EntryMode, TreeRefIter}; use gix_odb::pack::Find; + use std::collections::HashMap; use crate::hex_to_id; @@ -132,19 +133,32 @@ mod changes { .expect("valid hex id") } - fn all_commits(db: &gix_odb::Handle) -> Vec { + fn all_commits(db: &gix_odb::Handle) -> HashMap { use gix_traverse::commit; + let mut buf = Vec::new(); let head = head_of(db); commit::Ancestors::new(Some(head), commit::ancestors::State::default(), |oid, buf| { use gix_odb::FindExt; db.find_commit_iter(oid, buf) }) - .collect::>() - .into_iter() - .rev() .collect::, _>>() .expect("valid iteration") + .into_iter() + .map(|c| { + use gix_odb::FindExt; + ( + db.find_commit(c, &mut buf) + .unwrap() + .message + .trim() + .to_str_lossy() + .into_owned(), + c, + ) + }) + .rev() + .collect() } #[test] @@ -152,7 +166,7 @@ mod changes { let db = db(None)?; let all_commits = all_commits(&db); assert_eq!( - diff_with_previous_commit_from(&db, &all_commits[0])?, + diff_with_previous_commit_from(&db, &all_commits["f added"])?, vec![Addition { entry_mode: EntryMode::Blob, oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), @@ -162,7 +176,7 @@ mod changes { ); assert_eq!( - diff_with_previous_commit_from(&db, &all_commits[1])?, + diff_with_previous_commit_from(&db, &all_commits["f modified"])?, vec![Modification { previous_entry_mode: EntryMode::Blob, previous_oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), @@ -174,18 +188,18 @@ mod changes { ); assert_eq!( - diff_with_previous_commit_from(&db, &all_commits[2])?, + diff_with_previous_commit_from(&db, &all_commits["f deleted"])?, vec![Deletion { entry_mode: EntryMode::Blob, oid: hex_to_id("28ce6a8b26aa170e1de65536fe8abe1832bd3242"), path: "f".into() }], ":100644 000000 28ce6a8b26aa170e1de65536fe8abe1832bd3242 0000000000000000000000000000000000000000 D f -" + " ); assert_eq!( - diff_with_previous_commit_from(&db, &all_commits[4])?, + diff_with_previous_commit_from(&db, &all_commits["f mode modified to dir f/"])?, vec![ Deletion { entry_mode: EntryMode::Blob, @@ -208,7 +222,27 @@ mod changes { ); assert_eq!( - diff_with_previous_commit_from(&db, &all_commits[5])?, + diff_with_previous_commit_from(&db, &all_commits["a renamed to b"])?, + vec![ + Deletion { + entry_mode: EntryMode::Blob, + oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), + path: "a".into() + }, + Addition { + entry_mode: EntryMode::Blob, + oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), + path: "b".into() + } + ], + "simple rename, same level + :100644 000000 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0000000000000000000000000000000000000000 D a + :000000 100644 0000000000000000000000000000000000000000 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 A b" + + ); + + assert_eq!( + diff_with_previous_commit_from(&db, &all_commits["f/f modified"])?, vec![ Modification { previous_entry_mode: EntryMode::Tree, @@ -241,7 +275,7 @@ mod changes { #[cfg(not(windows))] let link_entry_mode = EntryMode::Link; assert_eq!( - diff_with_previous_commit_from(&db, &all_commits[8])?, + diff_with_previous_commit_from(&db, &all_commits["f/f mode changed to link"])?, vec![ Modification { previous_entry_mode: EntryMode::Tree, @@ -262,18 +296,18 @@ mod changes { ); assert_eq!( - diff_with_previous_commit_from(&db, &all_commits[10])?, + diff_with_previous_commit_from(&db, &all_commits["f/ changed into file f"])?, vec![ - Deletion { - entry_mode: EntryMode::Tree, - oid: tree_with_link_id, - path: "f".into() - }, Addition { entry_mode: EntryMode::Blob, oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), path: "f".into() }, + Deletion { + entry_mode: EntryMode::Tree, + oid: tree_with_link_id, + path: "f".into() + }, Deletion { entry_mode: EntryMode::Blob, oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), @@ -296,7 +330,7 @@ mod changes { :120000 000000 2e65efe2a145dda7ee51d1741299f848e5bf752e 0000000000000000000000000000000000000000 D f/f" ); assert_eq!( - diff_with_previous_commit_from(&db, &all_commits[12])?, + diff_with_previous_commit_from(&db, &all_commits["delete d/"])?, vec![ Deletion { entry_mode: EntryMode::Tree, @@ -312,7 +346,7 @@ mod changes { ":100644 000000 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0000000000000000000000000000000000000000 D d/f" ); assert_eq!( - diff_with_previous_commit_from(&db, &all_commits[13])?, + diff_with_previous_commit_from(&db, &all_commits["add /c /d /e"])?, vec![ Addition { entry_mode: EntryMode::Blob, @@ -335,7 +369,7 @@ mod changes { :000000 100644 0000000000000000000000000000000000000000 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 A e" ); assert_eq!( - diff_with_previous_commit_from(&db, &all_commits[14])?, + diff_with_previous_commit_from(&db, &all_commits["add g/a"])?, vec![ Addition { entry_mode: EntryMode::Tree, @@ -351,7 +385,7 @@ mod changes { ":000000 100644 0000000000000000000000000000000000000000 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 A g/a" ); assert_eq!( - diff_with_previous_commit_from(&db, &all_commits[15])?, + diff_with_previous_commit_from(&db, &all_commits["remove /c /d /e"])?, vec![ Deletion { entry_mode: EntryMode::Blob, @@ -374,7 +408,7 @@ mod changes { :100644 000000 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0000000000000000000000000000000000000000 D e" ); assert_eq!( - diff_with_previous_commit_from(&db, &all_commits[16])?, + diff_with_previous_commit_from(&db, &all_commits["rm /f, add /ff"])?, vec![ Deletion { entry_mode: EntryMode::Blob, @@ -391,7 +425,7 @@ mod changes { :000000 100644 0000000000000000000000000000000000000000 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 A ff" ); assert_eq!( - diff_with_previous_commit_from(&db, &all_commits[17])?, + diff_with_previous_commit_from(&db, &all_commits["rm g/a, add g/aa"])?, vec![ Modification { previous_entry_mode: EntryMode::Tree, @@ -415,7 +449,7 @@ mod changes { :000000 100644 0000000000000000000000000000000000000000 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 A g/aa" ); assert_eq!( - diff_with_previous_commit_from(&db, &all_commits[18])?, + diff_with_previous_commit_from(&db, &all_commits["rm /ff, add /f"])?, vec![ Addition { entry_mode: EntryMode::Blob, @@ -432,7 +466,7 @@ mod changes { :000000 100644 0000000000000000000000000000000000000000 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 A ff" ); assert_eq!( - diff_with_previous_commit_from(&db, &all_commits[19])?, + diff_with_previous_commit_from(&db, &all_commits["rm g/aa, add g/a"])?, vec![ Modification { previous_entry_mode: EntryMode::Tree, @@ -464,7 +498,7 @@ mod changes { let all_commits = all_commits(&db); assert_eq!( - diff_with_previous_commit_from(&db, &all_commits[0])?, + diff_with_previous_commit_from(&db, &all_commits["f added"])?, vec![ Addition { entry_mode: EntryMode::Tree, @@ -480,7 +514,7 @@ mod changes { ":000000 100644 0000000000000000000000000000000000000000 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 A a/f" ); assert_eq!( - diff_with_previous_commit_from(&db, &all_commits[1])?, + diff_with_previous_commit_from(&db, &all_commits["f modified"])?, vec![ Modification { previous_entry_mode: EntryMode::Tree, @@ -500,7 +534,7 @@ mod changes { ":100644 100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 28ce6a8b26aa170e1de65536fe8abe1832bd3242 M a/f" ); - for commit in all_commits { + for (_, commit) in all_commits { // Just make sure it works - checked results with dbg!() once and am too ~~lazy~~ time constrained to add the // assertions now similar to the non-nested version. diff_with_previous_commit_from(&db, &commit)?; @@ -514,13 +548,10 @@ mod changes { let db = db(None)?; let all_commits = all_commits(&db); + let last_commit = all_commits["rm g/aa, add g/a"]; + let first_commit = all_commits["f added"]; assert_eq!( - diff_commits( - &db, - all_commits[0].to_owned(), - all_commits.last().expect("we have many commits"), - None - )?, + diff_commits(&db, first_commit.to_owned(), &last_commit, None)?, vec![ Addition { entry_mode: EntryMode::Blob, @@ -540,12 +571,7 @@ mod changes { ] ); assert_eq!( - diff_commits( - &db, - all_commits.last().expect("we have many commits").to_owned(), - &all_commits[0], - Location::FileName.into() - )?, + diff_commits(&db, last_commit.to_owned(), &first_commit, Location::FileName.into())?, vec![ Deletion { entry_mode: EntryMode::Blob, @@ -573,12 +599,7 @@ mod changes { let all_commits = all_commits(&db); assert_eq!( - diff_commits( - &db, - None::, - &all_commits[all_commits.len() - 6], - Some(Location::Path) - )?, + diff_commits(&db, None::, &all_commits["add g/a"], Some(Location::Path))?, vec![ Addition { entry_mode: EntryMode::Tree, @@ -624,5 +645,71 @@ mod changes { ); Ok(()) } + + #[test] + fn interesting_rename() -> crate::Result { + let db = db(None)?; + let all_commits = all_commits(&db); + + assert_eq!( + diff_with_previous_commit_from(&db, &all_commits["interesting rename 1"])?, + vec![ + Deletion { + entry_mode: EntryMode::Tree, + oid: hex_to_id("f84fc275158a2973cb4a79b1618b79ec7f573a95"), + path: "git-sec".into() + }, + Addition { + entry_mode: EntryMode::Tree, + oid: hex_to_id("f84fc275158a2973cb4a79b1618b79ec7f573a95"), + path: "gix-sec".into() + }, + Deletion { + entry_mode: EntryMode::Blob, + oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), + path: "git-sec/2".into() + }, + Addition { + entry_mode: EntryMode::Blob, + oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), + path: "gix-sec/2".into() + } + ] + ); + Ok(()) + } + + #[test] + fn interesting_rename_2() -> crate::Result { + let db = db(None)?; + let all_commits = all_commits(&db); + + assert_eq!( + diff_with_previous_commit_from(&db, &all_commits["interesting rename 2"])?, + vec![ + Addition { + entry_mode: EntryMode::Tree, + oid: hex_to_id("f84fc275158a2973cb4a79b1618b79ec7f573a95"), + path: "git-sec".into() + }, + Deletion { + entry_mode: EntryMode::Tree, + oid: hex_to_id("f84fc275158a2973cb4a79b1618b79ec7f573a95"), + path: "gix-sec".into() + }, + Addition { + entry_mode: EntryMode::Blob, + oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), + path: "git-sec/2".into() + }, + Deletion { + entry_mode: EntryMode::Blob, + oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), + path: "gix-sec/2".into() + } + ] + ); + Ok(()) + } } } diff --git a/gix-object/src/tree/mod.rs b/gix-object/src/tree/mod.rs index 306b3a27031..688689c0812 100644 --- a/gix-object/src/tree/mod.rs +++ b/gix-object/src/tree/mod.rs @@ -85,6 +85,7 @@ impl<'a> PartialOrd for EntryRef<'a> { impl<'a> Ord for EntryRef<'a> { /// Entries compare by the common portion of the filename. This is critical for proper functioning of algorithms working on trees. + /// Doing it like this is needed for compatibility with older, potentially broken(?) trees. fn cmp(&self, other: &Self) -> Ordering { let len = self.filename.len().min(other.filename.len()); self.filename[..len].cmp(&other.filename[..len]) diff --git a/gix/tests/fixtures/generated-archives/make_diff_repo.tar.xz b/gix/tests/fixtures/generated-archives/make_diff_repo.tar.xz index b27d032fa50..d31727f442c 100644 --- a/gix/tests/fixtures/generated-archives/make_diff_repo.tar.xz +++ b/gix/tests/fixtures/generated-archives/make_diff_repo.tar.xz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0e47ea88dfffaae2926cdc1d02a27c05b66188dacf0eb03c44898744f8a38fb0 -size 18512 +oid sha256:f0ffd6adef1a5755c2013dd7e793ca8e239308448872681a36a2216ea56623b2 +size 32032 diff --git a/gix/tests/fixtures/make_diff_repo.sh b/gix/tests/fixtures/make_diff_repo.sh index 54895018b17..1aac40b43a7 100644 --- a/gix/tests/fixtures/make_diff_repo.sh +++ b/gix/tests/fixtures/make_diff_repo.sh @@ -30,7 +30,7 @@ git commit -m "r2-ambiguous" git mv dir/c dir/c-moved echo n >> dir/c-moved echo n >> b -git commit -am "r3" # modified rename and normal modification +git commit -am "r3-simple" # modified rename and normal modification touch lt1 lt2 ln -s lt1 link-1 @@ -77,3 +77,672 @@ seq 15 > newly-added echo nn >> b git add . git commit -m "tc3-find-harder" + +rm -Rf ./* +# from 92de081dc9ab5660cb18fa750452345dd63550ea~1 of `gitoxide` +while read -r _ _ _ path; do + mkdir -p ${path%/*} && touch $path +done < git-index/tests/index/file/mod.rs +git add . && git commit -m "r1-change" + +rm -Rf ./* +# from d7ad650d3~1 of `gitoxide` +while read -r _ _ _ path; do + mkdir -p ${path%/*} && touch $path +done < baseline-3.no-renames +git -c diff.renames=1 show > baseline-3.with-renames +git -c diff.renames=0 show HEAD~2 > baseline-2.no-renames +git -c diff.renames=1 show HEAD~2 > baseline-2.with-renames +git -c diff.renames=0 show HEAD~4 > baseline.no-renames +git -c diff.renames=1 show HEAD~4 > baseline.with-renames diff --git a/gix/tests/object/tree/diff.rs b/gix/tests/object/tree/diff.rs index 4f9f516bad8..132caedf562 100644 --- a/gix/tests/object/tree/diff.rs +++ b/gix/tests/object/tree/diff.rs @@ -1,3 +1,4 @@ +use gix::bstr::BString; use std::convert::Infallible; use gix_object::{bstr::ByteSlice, tree::EntryMode}; @@ -99,7 +100,7 @@ fn tree_named(repo: &gix::Repository, rev_spec: impl AsRef) -> gix::Tree { } mod track_rewrites { - use crate::object::tree::diff::tree_named; + use crate::object::tree::diff::{added, deleted, modified, store, tree_named}; use crate::util::named_repo; use gix::object::tree::diff::change::{DiffLineStats, Event}; use gix::object::tree::diff::rewrites::{Copies, CopySource}; @@ -182,8 +183,8 @@ mod track_rewrites { #[test] fn rename_by_similarity() -> crate::Result { let repo = named_repo("make_diff_repo.sh")?; - let from = tree_named(&repo, "@^{/r3}~1"); - let to = tree_named(&repo, ":/r3"); + let from = tree_named(&repo, "@^{/r3-simple}~1"); + let to = tree_named(&repo, ":/r3-simple"); for percentage in [ None, @@ -637,4 +638,486 @@ mod track_rewrites { Ok(()) } + + #[test] + fn realistic_renames() -> crate::Result { + let repo = named_repo("make_diff_repo.sh")?; + let from = tree_named(&repo, "@^{/r1-change}~1"); + let to = tree_named(&repo, ":/r1-change"); + + let mut actual = Vec::new(); + let mut other = Vec::new(); + from.changes()? + .track_path() + .track_rewrites( + Rewrites { + copies: Some(Copies::default()), + limit: 1, + ..Default::default() + } + .into(), + ) + .for_each_to_obtain_tree(&to, |change| -> Result<_, Infallible> { + if !change.event.entry_mode().is_tree() { + if let Event::Rewrite { + source_location, copy, .. + } = change.event + { + actual.push(source_location.to_owned()); + actual.push(change.location.to_owned()); + assert!(!copy); + } else { + other.push(store(&change)); + } + } + Ok(Default::default()) + })?; + + assert_eq!(actual, vec!["git-index/src/file.rs", "git-index/src/file/mod.rs"]); + assert_eq!( + other, + vec![ + added("git-index/tests/index/file/access.rs"), + modified("git-index/tests/index/file/mod.rs") + ] + ); + + let actual = std::fs::read_to_string(repo.work_dir().expect("non-bare").join("baseline.with-renames"))?; + let expected = r#"commit 6974f2b5181772977a9d7d34a566414508552650 +Author: author +Date: Sat Jan 1 00:00:00 2000 +0000 + + r1-change + +diff --git a/git-index/src/file.rs b/git-index/src/file/mod.rs +similarity index 100% +rename from git-index/src/file.rs +rename to git-index/src/file/mod.rs +diff --git a/git-index/tests/index/file/access.rs b/git-index/tests/index/file/access.rs +new file mode 100644 +index 0000000..e69de29 +diff --git a/git-index/tests/index/file/mod.rs b/git-index/tests/index/file/mod.rs +index e69de29..8ba3a16 100644 +--- a/git-index/tests/index/file/mod.rs ++++ b/git-index/tests/index/file/mod.rs +@@ -0,0 +1 @@ ++n +"#; + assert_eq!(actual, expected); + + Ok(()) + } + + #[test] + fn realistic_renames_disabled() -> crate::Result { + let repo = named_repo("make_diff_repo.sh")?; + let from = tree_named(&repo, "@^{/r1-change}~1"); + let to = tree_named(&repo, ":/r1-change"); + + let mut actual = Vec::new(); + from.changes()? + .track_path() + .track_rewrites(None) + .for_each_to_obtain_tree(&to, |change| -> Result<_, Infallible> { + if !change.event.entry_mode().is_tree() { + actual.push(store(&change)); + if let Event::Rewrite { .. } = change.event { + unreachable!("it's disabled, so cannot happen") + } + } + Ok(Default::default()) + })?; + + assert_eq!( + actual, + vec![ + deleted("git-index/src/file.rs"), + added("git-index/src/file/mod.rs"), + added("git-index/tests/index/file/access.rs"), + modified("git-index/tests/index/file/mod.rs") + ] + ); + + let actual = std::fs::read_to_string(repo.work_dir().expect("non-bare").join("baseline.no-renames"))?; + let expected = r#"commit 6974f2b5181772977a9d7d34a566414508552650 +Author: author +Date: Sat Jan 1 00:00:00 2000 +0000 + + r1-change + +diff --git a/git-index/src/file.rs b/git-index/src/file.rs +deleted file mode 100644 +index e69de29..0000000 +diff --git a/git-index/src/file/mod.rs b/git-index/src/file/mod.rs +new file mode 100644 +index 0000000..e69de29 +diff --git a/git-index/tests/index/file/access.rs b/git-index/tests/index/file/access.rs +new file mode 100644 +index 0000000..e69de29 +diff --git a/git-index/tests/index/file/mod.rs b/git-index/tests/index/file/mod.rs +index e69de29..8ba3a16 100644 +--- a/git-index/tests/index/file/mod.rs ++++ b/git-index/tests/index/file/mod.rs +@@ -0,0 +1 @@ ++n +"#; + assert_eq!(actual, expected); + + Ok(()) + } + + #[test] + fn realistic_renames_disabled_2() -> crate::Result { + let repo = named_repo("make_diff_repo.sh")?; + let from = tree_named(&repo, "@^{/r2-change}~1"); + let to = tree_named(&repo, ":/r2-change"); + + let mut actual = Vec::new(); + from.changes()? + .track_path() + .track_rewrites(None) + .for_each_to_obtain_tree(&to, |change| -> Result<_, Infallible> { + if !change.event.entry_mode().is_tree() { + actual.push(store(&change)); + if let Event::Rewrite { .. } = change.event { + unreachable!("it's disabled, so cannot happen") + } + } + Ok(Default::default()) + })?; + + let expected = r#"commit 72de3500e1bff816e56432bee8de02946d3e784b +Author: author +Date: Sat Jan 1 00:00:00 2000 +0000 + + r2-change + +diff --git a/git-sec/CHANGELOG.md b/git-sec/CHANGELOG.md +deleted file mode 100644 +index e69de29..0000000 +diff --git a/git-sec/Cargo.toml b/git-sec/Cargo.toml +deleted file mode 100644 +index e69de29..0000000 +diff --git a/git-sec/src/identity.rs b/git-sec/src/identity.rs +deleted file mode 100644 +index e69de29..0000000 +diff --git a/git-sec/src/lib.rs b/git-sec/src/lib.rs +deleted file mode 100644 +index e69de29..0000000 +diff --git a/git-sec/src/permission.rs b/git-sec/src/permission.rs +deleted file mode 100644 +index e69de29..0000000 +diff --git a/git-sec/src/trust.rs b/git-sec/src/trust.rs +deleted file mode 100644 +index e69de29..0000000 +diff --git a/git-sec/tests/identity/mod.rs b/git-sec/tests/identity/mod.rs +deleted file mode 100644 +index e69de29..0000000 +diff --git a/git-sec/tests/sec.rs b/git-sec/tests/sec.rs +deleted file mode 100644 +index e69de29..0000000 +diff --git a/gix-sec/CHANGELOG.md b/gix-sec/CHANGELOG.md +new file mode 100644 +index 0000000..e69de29 +diff --git a/gix-sec/Cargo.toml b/gix-sec/Cargo.toml +new file mode 100644 +index 0000000..e69de29 +diff --git a/gix-sec/src/identity.rs b/gix-sec/src/identity.rs +new file mode 100644 +index 0000000..e69de29 +diff --git a/gix-sec/src/lib.rs b/gix-sec/src/lib.rs +new file mode 100644 +index 0000000..e69de29 +diff --git a/gix-sec/src/permission.rs b/gix-sec/src/permission.rs +new file mode 100644 +index 0000000..e69de29 +diff --git a/gix-sec/src/trust.rs b/gix-sec/src/trust.rs +new file mode 100644 +index 0000000..e69de29 +diff --git a/gix-sec/tests/identity/mod.rs b/gix-sec/tests/identity/mod.rs +new file mode 100644 +index 0000000..e69de29 +diff --git a/gix-sec/tests/sec.rs b/gix-sec/tests/sec.rs +new file mode 100644 +index 0000000..e69de29 +"#; + assert_eq!( + std::fs::read_to_string(repo.work_dir().expect("non-bare").join("baseline-2.no-renames"))?, + expected + ); + + assert_eq!( + actual, + vec![ + deleted("git-sec/CHANGELOG.md"), + deleted("git-sec/Cargo.toml"), + added("gix-sec/CHANGELOG.md"), + added("gix-sec/Cargo.toml"), + deleted("git-sec/src/identity.rs"), + deleted("git-sec/src/lib.rs"), + deleted("git-sec/src/permission.rs"), + deleted("git-sec/src/trust.rs"), + deleted("git-sec/tests/sec.rs"), + added("gix-sec/src/identity.rs"), + added("gix-sec/src/lib.rs"), + added("gix-sec/src/permission.rs"), + added("gix-sec/src/trust.rs"), + added("gix-sec/tests/sec.rs"), + deleted("git-sec/tests/identity/mod.rs"), + added("gix-sec/tests/identity/mod.rs"), + ] + ); + + Ok(()) + } + + #[test] + fn realistic_renames_disabled_3() -> crate::Result { + let repo = named_repo("make_diff_repo.sh")?; + let from = tree_named(&repo, "@^{/r3-change}~1"); + let to = tree_named(&repo, ":/r3-change"); + + let mut actual = Vec::new(); + from.changes()? + .track_path() + .track_rewrites(None) + .for_each_to_obtain_tree(&to, |change| -> Result<_, Infallible> { + if !change.event.entry_mode().is_tree() { + actual.push(store(&change)); + if let Event::Rewrite { .. } = change.event { + unreachable!("it's disabled, so cannot happen") + } + } + Ok(Default::default()) + })?; + + let expected = r#"commit dee00f5a20957db20d8d2e0050210716d6b44879 +Author: author +Date: Sat Jan 1 00:00:00 2000 +0000 + + r3-change + +diff --git a/src/ein.rs b/src/ein.rs +new file mode 100644 +index 0000000..e69de29 +diff --git a/src/gix.rs b/src/gix.rs +new file mode 100644 +index 0000000..e69de29 +diff --git a/src/plumbing-cli.rs b/src/plumbing-cli.rs +deleted file mode 100644 +index e69de29..0000000 +diff --git a/src/porcelain-cli.rs b/src/porcelain-cli.rs +deleted file mode 100644 +index e69de29..0000000 +"#; + + assert_eq!( + std::fs::read_to_string(repo.work_dir().expect("non-bare").join("baseline-3.no-renames"))?, + expected + ); + assert_eq!( + actual, + vec![ + added("src/ein.rs"), + added("src/gix.rs"), + deleted("src/plumbing-cli.rs"), + deleted("src/porcelain-cli.rs"), + ] + ); + + Ok(()) + } + + #[test] + fn realistic_renames_3() -> crate::Result { + let repo = named_repo("make_diff_repo.sh")?; + let from = tree_named(&repo, "@^{/r3-change}~1"); + let to = tree_named(&repo, ":/r3-change"); + + let mut actual = Vec::new(); + let mut other = Vec::new(); + from.changes()? + .track_path() + .track_rewrites( + Rewrites { + copies: Some(Copies::default()), + limit: 1, + ..Default::default() + } + .into(), + ) + .for_each_to_obtain_tree(&to, |change| -> Result<_, Infallible> { + if !change.event.entry_mode().is_tree() { + if let Event::Rewrite { + source_location, copy, .. + } = change.event + { + actual.push(source_location.to_owned()); + actual.push(change.location.to_owned()); + assert!(!copy); + } else { + other.push(store(&change)); + } + } + Ok(Default::default()) + })?; + + let expected = r#"commit dee00f5a20957db20d8d2e0050210716d6b44879 +Author: author +Date: Sat Jan 1 00:00:00 2000 +0000 + + r3-change + +diff --git a/src/plumbing-cli.rs b/src/ein.rs +similarity index 100% +rename from src/plumbing-cli.rs +rename to src/ein.rs +diff --git a/src/porcelain-cli.rs b/src/gix.rs +similarity index 100% +rename from src/porcelain-cli.rs +rename to src/gix.rs +"#; + assert_eq!( + std::fs::read_to_string(repo.work_dir().expect("non-bare").join("baseline-3.with-renames"))?, + expected + ); + assert_eq!( + actual, + vec![ + "src/plumbing-cli.rs", + "src/ein.rs", + "src/porcelain-cli.rs", + "src/gix.rs" + ] + ); + assert!(other.is_empty()); + + Ok(()) + } + + #[test] + fn realistic_renames_2() -> crate::Result { + let repo = named_repo("make_diff_repo.sh")?; + let from = tree_named(&repo, "@^{/r2-change}~1"); + let to = tree_named(&repo, ":/r2-change"); + + let mut actual = Vec::new(); + from.changes()? + .track_path() + .track_rewrites( + Rewrites { + copies: Some(Copies::default()), + limit: 1, + ..Default::default() + } + .into(), + ) + .for_each_to_obtain_tree(&to, |change| -> Result<_, Infallible> { + if !change.event.entry_mode().is_tree() { + if let Event::Rewrite { + source_location, copy, .. + } = change.event + { + actual.push(source_location.to_owned()); + actual.push(change.location.to_owned()); + assert!(!copy); + } else { + unreachable!("everything is a rewrite"); + } + } + Ok(Default::default()) + })?; + + let expected = r#"commit 72de3500e1bff816e56432bee8de02946d3e784b +Author: author +Date: Sat Jan 1 00:00:00 2000 +0000 + + r2-change + +diff --git a/git-sec/CHANGELOG.md b/gix-sec/CHANGELOG.md +similarity index 100% +rename from git-sec/CHANGELOG.md +rename to gix-sec/CHANGELOG.md +diff --git a/git-sec/Cargo.toml b/gix-sec/Cargo.toml +similarity index 100% +rename from git-sec/Cargo.toml +rename to gix-sec/Cargo.toml +diff --git a/git-sec/src/identity.rs b/gix-sec/src/identity.rs +similarity index 100% +rename from git-sec/src/identity.rs +rename to gix-sec/src/identity.rs +diff --git a/git-sec/src/lib.rs b/gix-sec/src/lib.rs +similarity index 100% +rename from git-sec/src/lib.rs +rename to gix-sec/src/lib.rs +diff --git a/git-sec/src/permission.rs b/gix-sec/src/permission.rs +similarity index 100% +rename from git-sec/src/permission.rs +rename to gix-sec/src/permission.rs +diff --git a/git-sec/src/trust.rs b/gix-sec/src/trust.rs +similarity index 100% +rename from git-sec/src/trust.rs +rename to gix-sec/src/trust.rs +diff --git a/git-sec/tests/identity/mod.rs b/gix-sec/tests/identity/mod.rs +similarity index 100% +rename from git-sec/tests/identity/mod.rs +rename to gix-sec/tests/identity/mod.rs +diff --git a/git-sec/tests/sec.rs b/gix-sec/tests/sec.rs +similarity index 100% +rename from git-sec/tests/sec.rs +rename to gix-sec/tests/sec.rs +"#; + assert_eq!( + std::fs::read_to_string(repo.work_dir().expect("non-bare").join("baseline-2.with-renames"))?, + expected + ); + + assert_eq!( + actual, + vec![ + "git-sec/CHANGELOG.md", + "gix-sec/CHANGELOG.md", + "git-sec/Cargo.toml", + "gix-sec/Cargo.toml", + "git-sec/src/identity.rs", + "gix-sec/src/identity.rs", + "git-sec/src/lib.rs", + "gix-sec/src/lib.rs", + "git-sec/src/permission.rs", + "gix-sec/src/permission.rs", + "git-sec/src/trust.rs", + "gix-sec/src/trust.rs", + "git-sec/tests/sec.rs", + "gix-sec/tests/sec.rs", + "git-sec/tests/identity/mod.rs", + "gix-sec/tests/identity/mod.rs" + ] + ); + + Ok(()) + } +} +fn store(change: &gix::object::tree::diff::Change<'_, '_, '_>) -> (char, BString) { + (shorthand(&change.event), change.location.to_owned()) +} + +fn added(path: &str) -> (char, BString) { + ('A', path.into()) +} + +fn deleted(path: &str) -> (char, BString) { + ('D', path.into()) +} + +fn modified(path: &str) -> (char, BString) { + ('M', path.into()) +} + +fn shorthand(change: &Event) -> char { + match change { + Event::Addition { .. } => 'A', + Event::Deletion { .. } => 'D', + Event::Modification { .. } => 'M', + Event::Rewrite { .. } => 'R', + } } diff --git a/src/plumbing/options/mod.rs b/src/plumbing/options/mod.rs index 2f8617382b1..54335f70d43 100644 --- a/src/plumbing/options/mod.rs +++ b/src/plumbing/options/mod.rs @@ -253,7 +253,7 @@ pub mod tree { #[clap(long, short = 'e')] extended: bool, - /// The tree to traverse, or the tree at `HEAD` if unspecified. + /// The revspec of the tree to traverse, or the tree at `HEAD` if unspecified. treeish: Option, }, /// Provide information about a tree. @@ -261,7 +261,7 @@ pub mod tree { /// Provide files size as well. This is expensive as the object is decoded entirely. #[clap(long, short = 'e')] extended: bool, - /// The tree to traverse, or the tree at `HEAD` if unspecified. + /// The revspec of the tree to traverse, or the tree at `HEAD` if unspecified. treeish: Option, }, } @@ -355,7 +355,7 @@ pub mod revision { pub mod exclude { use std::ffi::OsString; - use super::AsPathSpec; + use crate::shared::AsPathSpec; #[derive(Debug, clap::Subcommand)] pub enum Subcommands { @@ -402,26 +402,3 @@ pub mod index { /// pub mod free; - -mod clap_util { - use std::ffi::OsStr; - - use clap::{ - builder::{OsStringValueParser, TypedValueParser}, - Arg, Command, Error, - }; - - #[derive(Clone)] - pub struct AsPathSpec; - - impl TypedValueParser for AsPathSpec { - type Value = gix::path::Spec; - - fn parse_ref(&self, cmd: &Command, arg: Option<&Arg>, value: &OsStr) -> Result { - OsStringValueParser::new() - .try_map(|arg| gix::path::Spec::try_from(arg.as_os_str())) - .parse_ref(cmd, arg, value) - } - } -} -use clap_util::AsPathSpec; diff --git a/src/porcelain/main.rs b/src/porcelain/main.rs index 2d0c5307b44..af390be939e 100644 --- a/src/porcelain/main.rs +++ b/src/porcelain/main.rs @@ -26,6 +26,8 @@ pub fn main() -> Result<()> { })?; let verbose = !args.quiet; let progress = args.progress; + #[cfg(feature = "gitoxide-core-tools")] + let threads = args.threads; let progress_keep_open = args.progress_keep_open; match args.cmd { @@ -41,16 +43,51 @@ pub fn main() -> Result<()> { Subcommands::Init { directory } => core::repository::init(directory).map(|_| ()), #[cfg(feature = "gitoxide-core-tools")] Subcommands::Tool(tool) => match tool { - crate::porcelain::options::ToolCommands::EstimateHours(crate::porcelain::options::EstimateHours { - working_dir, - rev_spec, - no_bots, - threads, - file_stats, - line_stats, - show_pii, - omit_unify_identities, + #[cfg(feature = "gitoxide-core-tools-query")] + crate::porcelain::options::ToolCommands::Query(crate::porcelain::options::tools::Query { + object_cache_size_mb, + find_copies_harder, + repo_dir, + cmd, }) => { + use gitoxide_core::query; + prepare_and_run( + "query", + verbose, + progress, + progress_keep_open, + crate::shared::STANDARD_RANGE, + move |mut progress, out, err| { + let engine = query::prepare( + &repo_dir, + &mut progress, + query::Options { + object_cache_size_mb, + find_copies_harder, + threads, + }, + )?; + match cmd { + None => writeln!(err, "Choose a command for the query engine")?, + Some(crate::porcelain::options::tools::query::Command::TracePath { path }) => { + engine.run(query::Command::TracePath { spec: path }, out, progress)?; + } + } + Ok(()) + }, + ) + } + crate::porcelain::options::ToolCommands::EstimateHours( + crate::porcelain::options::tools::EstimateHours { + working_dir, + rev_spec, + no_bots, + file_stats, + line_stats, + show_pii, + omit_unify_identities, + }, + ) => { use gitoxide_core::hours; prepare_and_run( "estimate-hours", diff --git a/src/porcelain/options.rs b/src/porcelain/options.rs index d295d11a2da..53fe2ccca0e 100644 --- a/src/porcelain/options.rs +++ b/src/porcelain/options.rs @@ -1,7 +1,5 @@ use std::path::PathBuf; -use gix::bstr::BString; - #[derive(Debug, clap::Parser)] #[clap(about = "The rusty git", version = clap::crate_version!())] #[clap(subcommand_required = true)] @@ -13,6 +11,9 @@ pub struct Args { /// Bring up a terminal user interface displaying progress visually #[clap(long, conflicts_with("quiet"))] pub progress: bool, + /// The amount of threads to use. If unset, use all cores, if 0 use al physical cores. + #[clap(short = 't', long)] + pub threads: Option, /// The progress TUI will stay up even though the work is already completed. /// @@ -78,91 +79,136 @@ pub enum ToolCommands { /// Defaults to the current working directory. destination_directory: Option, }, - EstimateHours(EstimateHours), + #[cfg(feature = "gitoxide-core-tools-query")] + Query(tools::Query), + EstimateHours(tools::EstimateHours), } -#[derive(Debug, clap::Parser)] -#[clap( - about = "Estimate hours worked based on a commit history", - long_about = "See https://github.com/kimmobrunfeldt/git-hours#how-it-works for details", - version = clap::crate_version!(), - visible_alias = "h", - visible_alias = "hours") -] -pub struct EstimateHours { - /// The directory containing a '.git/' folder. - #[clap(value_parser = validator::IsRepo)] - #[clap(default_value = ".")] - pub working_dir: PathBuf, - /// The name of the revision as spec, like 'HEAD' or 'main' at which to start iterating the commit graph. - #[clap(default_value("HEAD"), value_parser = crate::shared::AsBString)] - pub rev_spec: BString, - /// Ignore github bots which match the `[bot]` search string. - #[clap(short = 'b', long)] - pub no_bots: bool, - /// Collect additional information about file modifications, additions and deletions (without rename tracking). - #[clap(short = 'f', long)] - pub file_stats: bool, - /// Collect additional information about lines added and deleted (without rename tracking). - /// - /// Note that this implies the work to be done for file-stats, so it should be set as well. - #[clap(short = 'l', long)] - pub line_stats: bool, - /// The amount of threads to use. If unset, use all cores, if 0 use al physical cores. - #[clap(short = 't', long)] - pub threads: Option, - /// Show personally identifiable information before the summary. Includes names and email addresses. - #[clap(short = 'p', long)] - pub show_pii: bool, - /// Omit unifying identities by name and email which can lead to the same author appear multiple times - /// due to using different names or email addresses. - #[clap(short = 'i', long)] - pub omit_unify_identities: bool, -} +#[cfg(feature = "gitoxide-core-tools")] +pub mod tools { + use gix::bstr::BString; + use std::path::PathBuf; + + #[cfg(feature = "gitoxide-core-tools-query")] + #[derive(Debug, clap::Parser)] + #[command( + about = "a database accelerated engine to extract information and query it", + version = clap::crate_version!(), + visible_alias = "q" + )] + pub struct Query { + /// The total amount of object cache memory in MB. Bigger repos may benefit from more memory. + /// + /// 0 disables it. + #[arg(long, short = 'o', default_value_t = 200)] + pub object_cache_size_mb: usize, + /// Find identical copies in the entire tree, not only in the set of modified files. + /// + /// This is an expensive option, and typically cuts speed in half. + #[arg(long, short = 'C')] + pub find_copies_harder: bool, + /// path to the git repository to generate the database for + #[arg(default_value = ".")] + pub repo_dir: std::path::PathBuf, + #[clap(subcommand)] + pub cmd: Option, + } -mod validator { - use std::{ffi::OsStr, path::PathBuf}; - - use anyhow::Context; - - #[derive(Clone)] - pub struct IsRepo; - - impl clap::builder::TypedValueParser for IsRepo { - type Value = PathBuf; - - fn parse_ref( - &self, - cmd: &clap::Command, - _arg: Option<&clap::Arg>, - value: &OsStr, - ) -> Result { - assure_is_repo(value).map_err(|e| { - let mut err = clap::Error::new(clap::error::ErrorKind::InvalidValue).with_cmd(cmd); - err.insert( - clap::error::ContextKind::InvalidValue, - clap::error::ContextValue::String(e.to_string()), - ); - err - })?; - Ok(value.into()) + #[cfg(feature = "gitoxide-core-tools-query")] + pub mod query { + use crate::shared::AsPathSpec; + + #[derive(Debug, clap::Subcommand)] + pub enum Command { + /// Follow a file through the entire history reachable from HEAD. + TracePath { + /// The path to trace through history. + #[clap(value_parser = AsPathSpec)] + path: gix::path::Spec, + }, } } - fn assure_is_repo(dir: &OsStr) -> anyhow::Result<()> { - let git_dir = PathBuf::from(dir).join(".git"); - let p = gix::path::realpath(&git_dir) - .with_context(|| format!("Could not canonicalize git repository at '{}'", git_dir.display()))?; - if p.extension().unwrap_or_default() == "git" - || p.file_name().unwrap_or_default() == ".git" - || p.join("HEAD").is_file() - { - Ok(()) - } else { - Err(anyhow::anyhow!( - "Path '{}' needs to be a directory containing '.git/'", - p.display() - )) + #[derive(Debug, clap::Parser)] + #[clap( + about = "Estimate hours worked based on a commit history", + long_about = "See https://github.com/kimmobrunfeldt/git-hours#how-it-works for details", + version = clap::crate_version!(), + visible_alias = "h", + visible_alias = "hours" + )] + pub struct EstimateHours { + /// The directory containing a '.git/' folder. + #[clap(value_parser = validator::IsRepo)] + #[clap(default_value = ".")] + pub working_dir: PathBuf, + /// The name of the revision as spec, like 'HEAD' or 'main' at which to start iterating the commit graph. + #[clap(default_value("HEAD"), value_parser = crate::shared::AsBString)] + pub rev_spec: BString, + /// Ignore github bots which match the `[bot]` search string. + #[clap(short = 'b', long)] + pub no_bots: bool, + /// Collect additional information about file modifications, additions and deletions (without rename tracking). + #[clap(short = 'f', long)] + pub file_stats: bool, + /// Collect additional information about lines added and deleted (without rename tracking). + /// + /// Note that this implies the work to be done for file-stats, so it should be set as well. + #[clap(short = 'l', long)] + pub line_stats: bool, + /// Show personally identifiable information before the summary. Includes names and email addresses. + #[clap(short = 'p', long)] + pub show_pii: bool, + /// Omit unifying identities by name and email which can lead to the same author appear multiple times + /// due to using different names or email addresses. + #[clap(short = 'i', long)] + pub omit_unify_identities: bool, + } + + mod validator { + use std::{ffi::OsStr, path::PathBuf}; + + use anyhow::Context; + + #[derive(Clone)] + pub struct IsRepo; + + impl clap::builder::TypedValueParser for IsRepo { + type Value = PathBuf; + + fn parse_ref( + &self, + cmd: &clap::Command, + _arg: Option<&clap::Arg>, + value: &OsStr, + ) -> Result { + assure_is_repo(value).map_err(|e| { + let mut err = clap::Error::new(clap::error::ErrorKind::InvalidValue).with_cmd(cmd); + err.insert( + clap::error::ContextKind::InvalidValue, + clap::error::ContextValue::String(e.to_string()), + ); + err + })?; + Ok(value.into()) + } + } + + fn assure_is_repo(dir: &OsStr) -> anyhow::Result<()> { + let git_dir = PathBuf::from(dir).join(".git"); + let p = gix::path::realpath(&git_dir) + .with_context(|| format!("Could not canonicalize git repository at '{}'", git_dir.display()))?; + if p.extension().unwrap_or_default() == "git" + || p.file_name().unwrap_or_default() == ".git" + || p.join("HEAD").is_file() + { + Ok(()) + } else { + Err(anyhow::anyhow!( + "Path '{}' needs to be a directory containing '.git/'", + p.display() + )) + } } } } diff --git a/src/shared.rs b/src/shared.rs index 0e7790a7314..ee16f0c34e0 100644 --- a/src/shared.rs +++ b/src/shared.rs @@ -291,5 +291,20 @@ mod clap { Some(Box::new([PossibleValue::new("SHA1")].into_iter())) } } + + use clap::builder::{OsStringValueParser, TypedValueParser}; + + #[derive(Clone)] + pub struct AsPathSpec; + + impl TypedValueParser for AsPathSpec { + type Value = gix::path::Spec; + + fn parse_ref(&self, cmd: &Command, arg: Option<&Arg>, value: &OsStr) -> Result { + OsStringValueParser::new() + .try_map(|arg| gix::path::Spec::try_from(arg.as_os_str())) + .parse_ref(cmd, arg, value) + } + } } -pub use self::clap::{AsBString, AsHashKind, AsOutputFormat}; +pub use self::clap::{AsBString, AsHashKind, AsOutputFormat, AsPathSpec}; diff --git a/tests/journey/ein.sh b/tests/journey/ein.sh index 775b23f26d2..3c67d8c6874 100644 --- a/tests/journey/ein.sh +++ b/tests/journey/ein.sh @@ -140,12 +140,14 @@ title "Porcelain ${kind}" } ) ) + if test "$kind" != "max-pure"; then (with "running with no further arguments" it "succeeds and informs about possible operations" && { WITH_SNAPSHOT="$snapshot/no-args-failure" \ expect_run_sh $WITH_CLAP_FAILURE "$exe t" } ) + fi ) ) ) diff --git a/tests/snapshots/panic-behaviour/expected-failure b/tests/snapshots/panic-behaviour/expected-failure index 56a3d364186..1dafa93b97b 100644 --- a/tests/snapshots/panic-behaviour/expected-failure +++ b/tests/snapshots/panic-behaviour/expected-failure @@ -1,2 +1,2 @@ -thread 'main' panicked at 'something went very wrong', src/porcelain/main.rs:39:42 +thread 'main' panicked at 'something went very wrong', src/porcelain/main.rs:41:42 note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace \ No newline at end of file diff --git a/tests/snapshots/panic-behaviour/expected-failure-in-thread b/tests/snapshots/panic-behaviour/expected-failure-in-thread index 818ba6c3c0f..b99a2650670 100644 --- a/tests/snapshots/panic-behaviour/expected-failure-in-thread +++ b/tests/snapshots/panic-behaviour/expected-failure-in-thread @@ -1,3 +1,3 @@ -thread 'main' panicked at 'something went very wrong', src/porcelain/main.rs:39:42 +thread 'main' panicked at 'something went very wrong', src/porcelain/main.rs:41:42 note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace  \ No newline at end of file diff --git a/tests/snapshots/panic-behaviour/expected-failure-in-thread-with-progress b/tests/snapshots/panic-behaviour/expected-failure-in-thread-with-progress index d0f76c0692a..c892c1ea47d 100644 --- a/tests/snapshots/panic-behaviour/expected-failure-in-thread-with-progress +++ b/tests/snapshots/panic-behaviour/expected-failure-in-thread-with-progress @@ -1,3 +1,3 @@ -[?1049h[?25lthread '' panicked at 'something went very wrong', src/porcelain/main.rs:39:42 +[?1049h[?25lthread '' panicked at 'something went very wrong', src/porcelain/main.rs:41:42 note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace [?25h[?1049l \ No newline at end of file diff --git a/tests/snapshots/porcelain/tool/no-args-failure b/tests/snapshots/porcelain/tool/no-args-failure index 04e2456301d..090d919c851 100644 --- a/tests/snapshots/porcelain/tool/no-args-failure +++ b/tests/snapshots/porcelain/tool/no-args-failure @@ -5,6 +5,7 @@ Usage: ein tool Commands: find Find all repositories in a given directory organize Move all repositories found in a directory into a structure matching their clone URLs + query a database accelerated engine to extract information and query it [aliases: q] estimate-hours Estimate hours worked based on a commit history [aliases: h, hours] help Print this message or the help of the given subcommand(s)