diff --git a/Cargo.lock b/Cargo.lock index ad92273b749..13e98214f49 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2040,6 +2040,29 @@ dependencies = [ "thiserror", ] +[[package]] +name = "gix-merge" +version = "0.0.0" +dependencies = [ + "bstr", + "document-features", + "gix-command", + "gix-filter", + "gix-fs 0.11.3", + "gix-hash 0.14.2", + "gix-object 0.44.0", + "gix-path 0.10.11", + "gix-quote 0.4.12", + "gix-tempfile 14.0.2", + "gix-testtools", + "gix-trace 0.1.10", + "gix-worktree 0.36.0", + "imara-diff", + "pretty_assertions", + "serde", + "thiserror", +] + [[package]] name = "gix-negotiate" version = "0.15.0" diff --git a/Cargo.toml b/Cargo.toml index c4fe1097bc4..6e5b2dfe1bf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -243,6 +243,7 @@ members = [ "gix-object", "gix-glob", "gix-diff", + "gix-merge", "gix-date", "gix-traverse", "gix-dir", diff --git a/README.md b/README.md index 49bbcf1150f..5d5ca7e9f88 100644 --- a/README.md +++ b/README.md @@ -130,10 +130,11 @@ is usable to some extent. * [gix-submodule](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-submodule) * [gix-status](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-status) * [gix-worktree-state](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-worktree-state) - * `gitoxide-core` -* **very early** _(possibly without any documentation and many rough edges)_ * [gix-date](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-date) * [gix-dir](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-dir) + * `gitoxide-core` +* **very early** _(possibly without any documentation and many rough edges)_ + * [gix-merge](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-merge) * **idea** _(just a name placeholder)_ * [gix-note](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-note) * [gix-fetchhead](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-fetchhead) diff --git a/crate-status.md b/crate-status.md index b34dffa0754..c0e24adabf2 100644 --- a/crate-status.md +++ b/crate-status.md @@ -196,6 +196,9 @@ The top-level crate that acts as hub to all functionality provided by the `gix-* * [x] probe capabilities * [x] symlink creation and removal * [x] file snapshots +* [ ] **BString Interner with Arena-Backing and arbitrary value association** + - probably based on [`internment`](https://docs.rs/internment/latest/internment/struct.Arena.html#), + but needs `bumpalo` support to avoid item allocations/boxing, and avoid internal `Mutex`. (key type is pointer based). ### gix-fs * [x] probe capabilities @@ -215,6 +218,7 @@ The top-level crate that acts as hub to all functionality provided by the `gix-* * [x] [name validation][tagname-validation] * [x] transform borrowed to owned objects * [x] edit trees efficiently and write changes back + - [ ] See if `gix-fs::InternedMap` improves performance. * [x] API documentation * [ ] Some examples @@ -320,11 +324,24 @@ Check out the [performance discussion][gix-diff-performance] as well. * [x] prepare invocation of external diff program - [ ] pass meta-info * [ ] working with hunks of data +* [ ] diff-heuristics match Git perfectly * [x] API documentation * [ ] Examples - + [gix-diff-performance]: https://github.com/Byron/gitoxide/discussions/74 +### gix-merge + +* [x] three-way merge analysis of blobs with choice of how to resolve conflicts + - [ ] choose how to resolve conflicts on the data-structure + - [ ] produce a new blob based on data-structure containing possible resolutions + - [x] `merge` style + - [x] `diff3` style + - [x] `zdiff` style +* [ ] diff-heuristics match Git perfectly +* [x] API documentation + * [ ] Examples + ### gix-traverse Check out the [performance discussion][gix-traverse-performance] as well. diff --git a/gix-attributes/src/state.rs b/gix-attributes/src/state.rs index 8f2a466bcbf..0e9b41a2d75 100644 --- a/gix-attributes/src/state.rs +++ b/gix-attributes/src/state.rs @@ -23,9 +23,9 @@ impl<'a> ValueRef<'a> { } /// Access and conversions -impl ValueRef<'_> { +impl<'a> ValueRef<'a> { /// Access this value as byte string. - pub fn as_bstr(&self) -> &BStr { + pub fn as_bstr(&self) -> &'a BStr { self.0.as_bytes().as_bstr() } diff --git a/gix-diff/src/blob/pipeline.rs b/gix-diff/src/blob/pipeline.rs index 45018218426..b9c727e4ca8 100644 --- a/gix-diff/src/blob/pipeline.rs +++ b/gix-diff/src/blob/pipeline.rs @@ -22,6 +22,7 @@ pub struct WorktreeRoots { pub new_root: Option, } +/// Access impl WorktreeRoots { /// Return the root path for the given `kind` pub fn by_kind(&self, kind: ResourceKind) -> Option<&Path> { @@ -30,6 +31,11 @@ impl WorktreeRoots { ResourceKind::NewOrDestination => self.new_root.as_deref(), } } + + /// Return `true` if all worktree roots are unset. + pub fn is_unset(&self) -> bool { + self.new_root.is_none() && self.old_root.is_none() + } } /// Data as part of an [Outcome]. @@ -184,6 +190,8 @@ impl Pipeline { /// Access impl Pipeline { /// Return all drivers that this instance was initialized with. + /// + /// They are sorted by [`name`](Driver::name) to support binary searches. pub fn drivers(&self) -> &[super::Driver] { &self.drivers } @@ -445,7 +453,7 @@ impl Pipeline { } } .map_err(|err| { - convert_to_diffable::Error::CreateTempfile { + convert_to_diffable::Error::StreamCopy { source: err, rela_path: rela_path.to_owned(), } @@ -533,6 +541,8 @@ impl Driver { pub fn prepare_binary_to_text_cmd(&self, path: &Path) -> Option { let command: &BStr = self.binary_to_text_command.as_ref()?.as_ref(); let cmd = gix_command::prepare(gix_path::from_bstr(command).into_owned()) + // TODO: Add support for an actual Context, validate it *can* match Git + .with_context(Default::default()) .with_shell() .stdin(Stdio::null()) .stdout(Stdio::piped()) diff --git a/gix-diff/src/blob/platform.rs b/gix-diff/src/blob/platform.rs index 6a550bc2dcf..495d23bd433 100644 --- a/gix-diff/src/blob/platform.rs +++ b/gix-diff/src/blob/platform.rs @@ -184,7 +184,7 @@ pub mod prepare_diff { use crate::blob::platform::Resource; - /// The kind of operation that was performed during the [`diff`](super::Platform::prepare_diff()) operation. + /// The kind of operation that should be performed based on the configuration of the resources involved in the diff. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum Operation<'a> { /// The [internal diff algorithm](imara_diff::diff) should be called with the provided arguments. @@ -383,6 +383,7 @@ impl Platform { /// /// If one of the resources is binary, the operation reports an error as such resources don't make their data available /// which is required for the external diff to run. + // TODO: fix this - the diff shouldn't fail if binary (or large) files are used, just copy them into tempfiles. pub fn prepare_diff_command( &self, diff_command: BString, diff --git a/gix-filter/src/eol/convert_to_git.rs b/gix-filter/src/eol/convert_to_git.rs index c83f96428a0..88ed8784f0e 100644 --- a/gix-filter/src/eol/convert_to_git.rs +++ b/gix-filter/src/eol/convert_to_git.rs @@ -57,8 +57,10 @@ pub(crate) mod function { /// Return `true` if `buf` was written or `false` if nothing had to be done. /// Depending on the state in `buf`, `index_object` is called to write the version of `src` as stored in the index /// into the buffer and if it is a blob, or return `Ok(None)` if no such object exists. - /// If renormalization is desired, let it return `Ok(None)` at all times to not let it have any influence over the - /// outcome of this function. + /// + /// *If renormalization is desired*, let it return `Ok(None)` at all times to not let it have any influence over the + /// outcome of this function. Otherwise, it will check if the in-index buffer already has newlines that it would now + /// want to change, and avoid doing so as what's in Git should be what's desired (except for when *renormalizing*). /// If `round_trip_check` is not `None`, round-tripping will be validated and handled accordingly. pub fn convert_to_git( src: &[u8], diff --git a/gix-filter/src/pipeline/convert.rs b/gix-filter/src/pipeline/convert.rs index 0572dd451b1..4962296656d 100644 --- a/gix-filter/src/pipeline/convert.rs +++ b/gix-filter/src/pipeline/convert.rs @@ -91,7 +91,7 @@ impl Pipeline { self.options.eol_config, )?; - let mut in_buffer = false; + let mut in_src_buffer = false; // this is just an approximation, but it's as good as it gets without reading the actual input. let would_convert_eol = eol::convert_to_git( b"\r\n", @@ -119,13 +119,13 @@ impl Pipeline { } self.bufs.clear(); read.read_to_end(&mut self.bufs.src)?; - in_buffer = true; + in_src_buffer = true; } } - if !in_buffer && (apply_ident_filter || encoding.is_some() || would_convert_eol) { + if !in_src_buffer && (apply_ident_filter || encoding.is_some() || would_convert_eol) { self.bufs.clear(); src.read_to_end(&mut self.bufs.src)?; - in_buffer = true; + in_src_buffer = true; } if let Some(encoding) = encoding { @@ -158,7 +158,7 @@ impl Pipeline { if apply_ident_filter && ident::undo(&self.bufs.src, &mut self.bufs.dest)? { self.bufs.swap(); } - Ok(if in_buffer { + Ok(if in_src_buffer { ToGitOutcome::Buffer(&self.bufs.src) } else { ToGitOutcome::Unchanged(src) diff --git a/gix-merge/Cargo.toml b/gix-merge/Cargo.toml new file mode 100644 index 00000000000..93a5ae5b664 --- /dev/null +++ b/gix-merge/Cargo.toml @@ -0,0 +1,49 @@ +[package] +name = "gix-merge" +version = "0.0.0" +repository = "https://github.com/Byron/gitoxide" +license = "MIT OR Apache-2.0" +description = "A crate of the gitoxide project implementing merge algorithms" +authors = ["Sebastian Thiel "] +edition = "2021" +rust-version = "1.65" + +[lints] +workspace = true + +[lib] +doctest = false + +[features] +default = ["blob"] +## Enable diffing of blobs using imara-diff, which also allows for a generic rewrite tracking implementation. +blob = ["dep:imara-diff", "dep:gix-filter", "dep:gix-worktree", "dep:gix-path", "dep:gix-fs", "dep:gix-command", "dep:gix-tempfile", "dep:gix-trace", "dep:gix-quote"] +## Data structures implement `serde::Serialize` and `serde::Deserialize`. +serde = ["dep:serde", "gix-hash/serde", "gix-object/serde"] + +[dependencies] +gix-hash = { version = "^0.14.2", path = "../gix-hash" } +gix-object = { version = "^0.44.0", path = "../gix-object" } +gix-filter = { version = "^0.13.0", path = "../gix-filter", optional = true } +gix-worktree = { version = "^0.36.0", path = "../gix-worktree", default-features = false, features = ["attributes"], optional = true } +gix-command = { version = "^0.3.9", path = "../gix-command", optional = true } +gix-path = { version = "^0.10.11", path = "../gix-path", optional = true } +gix-fs = { version = "^0.11.3", path = "../gix-fs", optional = true } +gix-tempfile = { version = "^14.0.0", path = "../gix-tempfile", optional = true } +gix-trace = { version = "^0.1.10", path = "../gix-trace", optional = true } +gix-quote = { version = "^0.4.12", path = "../gix-quote", optional = true } + +thiserror = "1.0.63" +imara-diff = { version = "0.1.7", optional = true } +bstr = { version = "1.5.0", default-features = false } +serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } + +document-features = { version = "0.2.0", optional = true } + +[dev-dependencies] +gix-testtools = { path = "../tests/tools" } +pretty_assertions = "1.4.0" + +[package.metadata.docs.rs] +all-features = true +features = ["document-features"] diff --git a/gix-merge/LICENSE-APACHE b/gix-merge/LICENSE-APACHE new file mode 120000 index 00000000000..965b606f331 --- /dev/null +++ b/gix-merge/LICENSE-APACHE @@ -0,0 +1 @@ +../LICENSE-APACHE \ No newline at end of file diff --git a/gix-merge/LICENSE-MIT b/gix-merge/LICENSE-MIT new file mode 120000 index 00000000000..76219eb72e8 --- /dev/null +++ b/gix-merge/LICENSE-MIT @@ -0,0 +1 @@ +../LICENSE-MIT \ No newline at end of file diff --git a/gix-merge/src/blob/builtin_driver/binary.rs b/gix-merge/src/blob/builtin_driver/binary.rs new file mode 100644 index 00000000000..6d4a9696584 --- /dev/null +++ b/gix-merge/src/blob/builtin_driver/binary.rs @@ -0,0 +1,43 @@ +/// What to do when having to pick a side to resolve a conflict. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum ResolveWith { + /// Chose the ancestor to resolve a conflict. + Ancestor, + /// Chose our side to resolve a conflict. + Ours, + /// Chose their side to resolve a conflict. + Theirs, +} + +/// Tell the caller of [`merge()`](function::merge) which side was picked. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum Pick { + /// Chose the ancestor. + Ancestor, + /// Chose our side. + Ours, + /// Chose their side. + Theirs, +} + +pub(super) mod function { + use crate::blob::builtin_driver::binary::{Pick, ResolveWith}; + use crate::blob::Resolution; + + /// As this algorithm doesn't look at the actual data, it returns a choice solely based on logic. + /// + /// It always results in a conflict with `current` being picked unless `on_conflict` is not `None`. + pub fn merge(on_conflict: Option) -> (Pick, Resolution) { + match on_conflict { + None => (Pick::Ours, Resolution::Conflict), + Some(resolve) => ( + match resolve { + ResolveWith::Ours => Pick::Ours, + ResolveWith::Theirs => Pick::Theirs, + ResolveWith::Ancestor => Pick::Ancestor, + }, + Resolution::Complete, + ), + } + } +} diff --git a/gix-merge/src/blob/builtin_driver/mod.rs b/gix-merge/src/blob/builtin_driver/mod.rs new file mode 100644 index 00000000000..ecbc1f93373 --- /dev/null +++ b/gix-merge/src/blob/builtin_driver/mod.rs @@ -0,0 +1,30 @@ +use crate::blob::BuiltinDriver; + +impl BuiltinDriver { + /// Return the name of this instance. + pub fn as_str(&self) -> &str { + match self { + BuiltinDriver::Text => "text", + BuiltinDriver::Binary => "binary", + BuiltinDriver::Union => "union", + } + } + + /// Get all available built-in drivers. + pub fn all() -> &'static [Self] { + &[BuiltinDriver::Text, BuiltinDriver::Binary, BuiltinDriver::Union] + } + + /// Try to match one of our variants to `name`, case-sensitive, and return its instance. + pub fn by_name(name: &str) -> Option { + Self::all().iter().find(|variant| variant.as_str() == name).copied() + } +} + +/// +pub mod binary; +pub use binary::function::merge as binary; + +/// +pub mod text; +pub use text::function::merge as text; diff --git a/gix-merge/src/blob/builtin_driver/text/function.rs b/gix-merge/src/blob/builtin_driver/text/function.rs new file mode 100644 index 00000000000..bb800ce47e2 --- /dev/null +++ b/gix-merge/src/blob/builtin_driver/text/function.rs @@ -0,0 +1,239 @@ +use crate::blob::builtin_driver::text::utils::{ + assure_ends_with_nl, contains_lines, detect_line_ending, detect_line_ending_or_nl, fill_ancestor, + hunks_differ_in_diff3, take_intersecting, tokens, write_ancestor, write_conflict_marker, write_hunks, + zealously_contract_hunks, CollectHunks, Hunk, Side, +}; +use crate::blob::builtin_driver::text::{Conflict, ConflictStyle, Labels, Options}; +use crate::blob::Resolution; + +/// Merge `current` and `other` with `ancestor` as base according to `opts`. +/// +/// Use `labels` to annotate conflict sections. +/// +/// `input` is for reusing memory for lists of tokens, but note that it grows indefinitely +/// while tokens for `current`, `ancestor` and `other` are added. +/// Place the merged result in `out` (cleared before use) and return the resolution. +/// +/// # Important +/// +/// *The caller* is responsible for clearing `input`, otherwise tokens will accumulate. +/// This idea is to save time if the input is known to be very similar. +#[allow(clippy::too_many_arguments)] +pub fn merge<'a>( + out: &mut Vec, + input: &mut imara_diff::intern::InternedInput<&'a [u8]>, + Labels { + ancestor: ancestor_label, + current: current_label, + other: other_label, + }: Labels<'_>, + current: &'a [u8], + ancestor: &'a [u8], + other: &'a [u8], + opts: Options, +) -> Resolution { + out.clear(); + input.update_before(tokens(ancestor)); + input.update_after(tokens(current)); + + let current_hunks = imara_diff::diff( + opts.diff_algorithm, + input, + CollectHunks { + side: Side::Current, + hunks: Default::default(), + }, + ); + + let current_tokens = std::mem::take(&mut input.after); + input.update_after(tokens(other)); + + let mut hunks = imara_diff::diff( + opts.diff_algorithm, + input, + CollectHunks { + side: Side::Other, + hunks: current_hunks, + }, + ); + + hunks.sort_by(|a, b| a.before.start.cmp(&b.before.start)); + let mut hunks = hunks.into_iter().peekable(); + let mut intersecting = Vec::new(); + let mut ancestor_integrated_until = 0; + let mut resolution = Resolution::Complete; + let mut filled_hunks = Vec::with_capacity(2); + while let Some(hunk) = hunks.next() { + if take_intersecting(&hunk, &mut hunks, &mut intersecting) { + fill_ancestor(&hunk.before, &mut intersecting); + + let filled_hunks_side = hunk.side; + filled_hunks.clear(); + filled_hunks.push(hunk); + fill_ancestor( + &intersecting + .first() + .zip(intersecting.last()) + .map(|(f, l)| f.before.start..l.before.end) + .expect("at least one entry"), + &mut filled_hunks, + ); + match opts.conflict { + Conflict::Keep { style, marker_size } => { + let (hunks_front_and_back, num_hunks_front) = match style { + ConflictStyle::Merge | ConflictStyle::ZealousDiff3 => { + zealously_contract_hunks(&mut filled_hunks, &mut intersecting, input, ¤t_tokens) + } + ConflictStyle::Diff3 => (Vec::new(), 0), + }; + let (our_hunks, their_hunks) = match filled_hunks_side { + Side::Current => (&filled_hunks, &intersecting), + Side::Other => (&intersecting, &filled_hunks), + Side::Ancestor => { + unreachable!("initial hunks are never ancestors") + } + }; + let (front_hunks, back_hunks) = hunks_front_and_back.split_at(num_hunks_front); + let first_hunk = front_hunks + .first() + .or(our_hunks.first()) + .expect("at least one hunk to write"); + let last_hunk = back_hunks + .last() + .or(their_hunks.last()) + .or(our_hunks.last()) + .or(front_hunks.last()) + .expect("at least one hunk"); + write_ancestor(input, ancestor_integrated_until, first_hunk.before.start as usize, out); + + write_hunks(front_hunks, input, ¤t_tokens, out); + if their_hunks.is_empty() { + write_hunks(our_hunks, input, ¤t_tokens, out); + } else if our_hunks.is_empty() { + // TODO: assure we run into this - currently no test triggers this. Can this happen at all? + write_hunks(their_hunks, input, ¤t_tokens, out); + } else { + // DEVIATION: this makes tests (mostly) pass, but probably is very different from what Git does. + let hunk_storage; + let nl = detect_line_ending( + if front_hunks.is_empty() { + hunk_storage = Hunk { + before: ancestor_integrated_until..first_hunk.before.start, + after: Default::default(), + side: Side::Ancestor, + }; + std::slice::from_ref(&hunk_storage) + } else { + front_hunks + }, + input, + ¤t_tokens, + ) + .or_else(|| detect_line_ending(our_hunks, input, ¤t_tokens)) + .unwrap_or(b"\n".into()); + match style { + ConflictStyle::Merge => { + if contains_lines(our_hunks) || contains_lines(their_hunks) { + resolution = Resolution::Conflict; + write_conflict_marker(out, b'<', current_label, marker_size, nl); + write_hunks(our_hunks, input, ¤t_tokens, out); + write_conflict_marker(out, b'=', None, marker_size, nl); + write_hunks(their_hunks, input, ¤t_tokens, out); + write_conflict_marker(out, b'>', other_label, marker_size, nl); + } + } + ConflictStyle::Diff3 | ConflictStyle::ZealousDiff3 => { + if contains_lines(our_hunks) || contains_lines(their_hunks) { + if hunks_differ_in_diff3(style, our_hunks, their_hunks, input, ¤t_tokens) { + resolution = Resolution::Conflict; + write_conflict_marker(out, b'<', current_label, marker_size, nl); + write_hunks(our_hunks, input, ¤t_tokens, out); + let ancestor_hunk = Hunk { + before: first_hunk.before.start..last_hunk.before.end, + after: Default::default(), + side: Side::Ancestor, + }; + let ancestor_hunk = std::slice::from_ref(&ancestor_hunk); + let ancestor_nl = + detect_line_ending_or_nl(ancestor_hunk, input, ¤t_tokens); + write_conflict_marker(out, b'|', ancestor_label, marker_size, ancestor_nl); + write_hunks(ancestor_hunk, input, ¤t_tokens, out); + write_conflict_marker(out, b'=', None, marker_size, nl); + write_hunks(their_hunks, input, ¤t_tokens, out); + write_conflict_marker(out, b'>', other_label, marker_size, nl); + } else { + write_hunks(our_hunks, input, ¤t_tokens, out); + } + } + } + } + } + write_hunks(back_hunks, input, ¤t_tokens, out); + ancestor_integrated_until = last_hunk.before.end; + } + Conflict::ResolveWithOurs | Conflict::ResolveWithTheirs => { + let (our_hunks, their_hunks) = match filled_hunks_side { + Side::Current => (&filled_hunks, &intersecting), + Side::Other => (&intersecting, &filled_hunks), + Side::Ancestor => { + unreachable!("initial hunks are never ancestors") + } + }; + let hunks_to_write = if opts.conflict == Conflict::ResolveWithOurs { + our_hunks + } else { + their_hunks + }; + if let Some(first_hunk) = hunks_to_write.first() { + write_ancestor(input, ancestor_integrated_until, first_hunk.before.start as usize, out); + } + write_hunks(hunks_to_write, input, ¤t_tokens, out); + if let Some(last_hunk) = hunks_to_write.last() { + ancestor_integrated_until = last_hunk.before.end; + } + } + Conflict::ResolveWithUnion => { + let (hunks_front_and_back, num_hunks_front) = + zealously_contract_hunks(&mut filled_hunks, &mut intersecting, input, ¤t_tokens); + + let (our_hunks, their_hunks) = match filled_hunks_side { + Side::Current => (&filled_hunks, &intersecting), + Side::Other => (&intersecting, &filled_hunks), + Side::Ancestor => { + unreachable!("initial hunks are never ancestors") + } + }; + let (front_hunks, back_hunks) = hunks_front_and_back.split_at(num_hunks_front); + let first_hunk = front_hunks + .first() + .or(our_hunks.first()) + .expect("at least one hunk to write"); + write_ancestor(input, ancestor_integrated_until, first_hunk.before.start as usize, out); + write_hunks(front_hunks, input, ¤t_tokens, out); + assure_ends_with_nl(out, detect_line_ending_or_nl(front_hunks, input, ¤t_tokens)); + write_hunks(our_hunks, input, ¤t_tokens, out); + assure_ends_with_nl(out, detect_line_ending_or_nl(our_hunks, input, ¤t_tokens)); + write_hunks(their_hunks, input, ¤t_tokens, out); + if !back_hunks.is_empty() { + assure_ends_with_nl(out, detect_line_ending_or_nl(their_hunks, input, ¤t_tokens)); + } + write_hunks(back_hunks, input, ¤t_tokens, out); + let last_hunk = back_hunks + .last() + .or(their_hunks.last()) + .or(our_hunks.last()) + .or(front_hunks.last()) + .expect("at least one hunk"); + ancestor_integrated_until = last_hunk.before.end; + } + } + } else { + write_ancestor(input, ancestor_integrated_until, hunk.before.start as usize, out); + ancestor_integrated_until = hunk.before.end; + write_hunks(std::slice::from_ref(&hunk), input, ¤t_tokens, out); + } + } + write_ancestor(input, ancestor_integrated_until, input.before.len(), out); + + resolution +} diff --git a/gix-merge/src/blob/builtin_driver/text/mod.rs b/gix-merge/src/blob/builtin_driver/text/mod.rs new file mode 100644 index 00000000000..1c4287dc7be --- /dev/null +++ b/gix-merge/src/blob/builtin_driver/text/mod.rs @@ -0,0 +1,124 @@ +use bstr::BStr; + +/// The way the built-in [text driver](crate::blob::BuiltinDriver::Text) will express +/// merge conflicts in the resulting file. +#[derive(Default, Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum ConflictStyle { + /// Only show the zealously minified conflicting lines of the local changes and the incoming (other) changes, + /// hiding the base version entirely. + /// + /// ``` + /// line1-changed-by-both + /// <<<<<<< local + /// line2-to-be-changed-in-incoming + /// ======= + /// line2-changed + /// >>>>>>> incoming + ///``` + #[default] + Merge, + /// Show non-minimized hunks of local changes, the base, and the incoming (other) changes. + /// + /// This mode does not hide any information. + /// ``` + /// <<<<<<< local + /// line1-changed-by-both + /// line2-to-be-changed-in-incoming + /// ||||||| 9a8d80c + /// line1-to-be-changed-by-both + /// line2-to-be-changed-in-incoming + /// ======= + /// line1-changed-by-both + /// line2-changed + /// >>>>>>> incoming + ///``` + Diff3, + /// Like [`Diff3](Self::Diff3), but will show *minimized* hunks of local change and the incoming (other) changes, + /// as well as non-minimized hunks of the base. + /// + /// ``` + /// line1-changed-by-both + /// <<<<<<< local + /// line2-to-be-changed-in-incoming + /// ||||||| 9a8d80c + /// line1-to-be-changed-by-both + /// line2-to-be-changed-in-incoming + /// ======= + /// line2-changed + /// >>>>>>> incoming + /// ``` + ZealousDiff3, +} + +/// The set of labels to annotate conflict markers with. +/// +/// That way it becomes clearer where the content of conflicts are originating from. +#[derive(Default, Copy, Clone, Debug, Eq, PartialEq)] +pub struct Labels<'a> { + pub ancestor: Option<&'a BStr>, + pub current: Option<&'a BStr>, + pub other: Option<&'a BStr>, +} + +/// Options for the builtin [text driver](crate::blob::BuiltinDriver::Text). +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct Options { + /// Determine of the diff will be performed. + /// Defaults to [`imara_diff::Algorithm::Myers`]. + pub diff_algorithm: imara_diff::Algorithm, + /// Decide what to do to automatically resolve conflicts, or to keep them + /// If `None`, add conflict markers according to `conflict_style` and `marker_size`. + pub conflict: Conflict, +} + +impl Default for Options { + fn default() -> Self { + Options { + conflict: Default::default(), + diff_algorithm: imara_diff::Algorithm::Myers, + } + } +} + +/// What to do to resolve a conflict. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum Conflict { + /// Keep the conflict by marking it in the source file. + Keep { + /// How to visualize conflicts in merged files. + style: ConflictStyle, + /// The amount of markers to draw, defaults to 7, i.e. `<<<<<<<` + marker_size: usize, + }, + /// Chose our side to resolve a conflict. + ResolveWithOurs, + /// Chose their side to resolve a conflict. + ResolveWithTheirs, + /// Place our and their lines one after another, in any order + ResolveWithUnion, +} + +impl Conflict { + /// The amount of conflict marker characters to print by default. + pub const DEFAULT_MARKER_SIZE: usize = 7; + + /// The amount of conflict markers to print if this instance contains them, or `None` otherwise + pub fn marker_size(&self) -> Option { + match self { + Conflict::Keep { marker_size, .. } => Some(*marker_size), + Conflict::ResolveWithOurs | Conflict::ResolveWithTheirs | Conflict::ResolveWithUnion => None, + } + } +} + +impl Default for Conflict { + fn default() -> Self { + Conflict::Keep { + style: Default::default(), + marker_size: Conflict::DEFAULT_MARKER_SIZE, + } + } +} + +pub(super) mod function; +mod utils; diff --git a/gix-merge/src/blob/builtin_driver/text/utils.rs b/gix-merge/src/blob/builtin_driver/text/utils.rs new file mode 100644 index 00000000000..1aab3e47f08 --- /dev/null +++ b/gix-merge/src/blob/builtin_driver/text/utils.rs @@ -0,0 +1,473 @@ +use crate::blob::builtin_driver::text::ConflictStyle; +use bstr::{BStr, ByteSlice, ByteVec}; +use std::iter::Peekable; +use std::ops::Range; + +/// Used only when `diff3` is the conflict style as `zdiff3` automatically reduces hunks into nothing. +/// Here we check if all hunks are the same. +pub fn hunks_differ_in_diff3( + style: ConflictStyle, + a: &[Hunk], + b: &[Hunk], + input: &imara_diff::intern::InternedInput<&[u8]>, + current_tokens: &[imara_diff::intern::Token], +) -> bool { + if style != ConflictStyle::Diff3 { + return true; + } + + let tokens_for_hunk = + |hunk: &Hunk| -> &[imara_diff::intern::Token] { tokens_for_side(hunk.side, input, current_tokens) }; + + a.iter() + .flat_map(tokens_for_hunk) + .ne(b.iter().flat_map(tokens_for_hunk)) +} + +pub fn contains_lines(hunks: &[Hunk]) -> bool { + hunks.iter().any(|h| !h.after.is_empty()) +} + +/// ## Deviation +/// +/// This implementation definitely isn't the same as in Git, primarily because it seemed impossible +/// to understand what's going on there without investing more time than it seemed worth. +pub fn detect_line_ending( + hunks: &[Hunk], + input: &mut imara_diff::intern::InternedInput<&[u8]>, + current_tokens: &[imara_diff::intern::Token], +) -> Option<&'static BStr> { + fn is_eol_crlf( + hunks: &[Hunk], + input: &mut imara_diff::intern::InternedInput<&[u8]>, + current_tokens: &[imara_diff::intern::Token], + ) -> Option { + let (range, side) = hunks.iter().rev().find_map(|h| { + (!h.after.is_empty()) + .then_some((&h.after, h.side)) + .or((!h.before.is_empty()).then_some((&h.before, Side::Ancestor))) + })?; + + let tokens = tokens_for_side(side, input, current_tokens); + { + let last_line = tokens + .get(range.end as usize - 1) + .map(|token| &input.interner[*token])?; + if last_line.last() == Some(&b'\n') { + return last_line.get(last_line.len().checked_sub(2)?).map(|c| *c == b'\r'); + } + } + let second_to_last_line = tokens + .get(range.end.checked_sub(2)? as usize) + .map(|token| &input.interner[*token])?; + second_to_last_line + .get(second_to_last_line.len().checked_sub(2)?) + .map(|c| *c == b'\r') + } + is_eol_crlf(hunks, input, current_tokens).map(|is_crlf| if is_crlf { b"\r\n".into() } else { b"\n".into() }) +} + +pub fn detect_line_ending_or_nl( + hunks: &[Hunk], + input: &mut imara_diff::intern::InternedInput<&[u8]>, + current_tokens: &[imara_diff::intern::Token], +) -> &'static BStr { + detect_line_ending(hunks, input, current_tokens).unwrap_or(b"\n".into()) +} + +fn tokens_for_side<'a>( + side: Side, + input: &'a imara_diff::intern::InternedInput<&[u8]>, + current_tokens: &'a [imara_diff::intern::Token], +) -> &'a [imara_diff::intern::Token] { + match side { + Side::Current => current_tokens, + Side::Other => &input.after, + Side::Ancestor => &input.before, + } +} + +pub fn assure_ends_with_nl(out: &mut Vec, nl: &BStr) { + if !out.is_empty() && !out.ends_with(b"\n") { + out.push_str(nl); + } +} + +pub fn write_conflict_marker(out: &mut Vec, marker: u8, label: Option<&BStr>, marker_size: usize, nl: &BStr) { + assure_ends_with_nl(out, nl); + out.extend(std::iter::repeat(marker).take(marker_size)); + if let Some(label) = label { + out.push(b' '); + out.extend_from_slice(label); + } + out.push_str(nl); +} + +pub fn write_ancestor(input: &imara_diff::intern::InternedInput<&[u8]>, from: u32, to: usize, out: &mut Vec) { + if to < from as usize { + return; + } + if let Some(tokens) = input.before.get(from as usize..to) { + write_tokens(&input.interner, tokens, out); + } +} + +/// Look at all hunks in `in_out` and fill in the ancestor in the range of `ancestor_range`. +/// This is all based on knowing the ranges are sequences of tokens. +pub fn fill_ancestor(Range { start, end }: &Range, in_out: &mut Vec) { + fn is_nonzero(num: &u32) -> bool { + *num > 0 + } + if in_out.is_empty() { + return; + } + let first = &in_out[0]; + let mut first_idx = 0; + if let Some(lines_to_add) = first.before.start.checked_sub(*start).filter(is_nonzero) { + in_out.insert(0, ancestor_hunk(*start, lines_to_add)); + first_idx += 1; + } + + let mut added_hunks = false; + for (idx, next_idx) in (first_idx..in_out.len()).map(|idx| (idx, idx + 1)) { + let Some(next_hunk) = in_out.get(next_idx) else { break }; + let hunk = &in_out[idx]; + if let Some(lines_to_add) = next_hunk.after.start.checked_sub(hunk.after.end).filter(is_nonzero) { + in_out.push(ancestor_hunk(hunk.after.end, lines_to_add)); + added_hunks = true; + } + } + let in_out_len = in_out.len(); + if added_hunks { + in_out[first_idx..in_out_len].sort_by_key(|hunk| hunk.before.start); + } + + let last = &in_out[in_out_len - 1]; + if let Some(lines_to_add) = end.checked_sub(last.before.end).filter(is_nonzero) { + in_out.push(ancestor_hunk(last.before.end, lines_to_add)); + } +} + +fn ancestor_hunk(start: u32, num_lines: u32) -> Hunk { + let range = start..start + num_lines; + Hunk { + before: range.clone(), + after: range, + side: Side::Ancestor, + } +} + +/// Reduce the area of `a_hunks` and the hunks in `b_hunks` so that only those lines that are +/// actually different remain. Note that we have to compare the resolved values, not only the tokens, +/// so `current_tokens` is expected to be known to the `input` (and its `interner`). +/// Hunks from all input arrays maybe removed in the process from the front and back, in case they +/// are entirely equal to what's in `hunk`. Note also that `a_hunks` and `b_hunks` are treated to be consecutive, +/// so [`fill_ancestor()`] must have been called beforehand, and are assumed to covert the same space in the +/// ancestor buffer. +/// Use `mode` to determine how hunks may be handled. +/// +/// Return a new vector of all the hunks that were removed from front and back, with partial hunks inserted, +/// along with the amount of hunks that go front, with the remaining going towards the back. +#[must_use] +pub fn zealously_contract_hunks( + a_hunks: &mut Vec, + b_hunks: &mut Vec, + input: &imara_diff::intern::InternedInput<&[u8]>, + current_tokens: &[imara_diff::intern::Token], +) -> (Vec, usize) { + let line_content = |token_idx: u32, side: Side| { + let tokens = match side { + Side::Current => current_tokens, + Side::Other => &input.after, + Side::Ancestor => &input.before, + }; + &input.interner[tokens[token_idx as usize]] + }; + let (mut last_a_hunk_idx, mut last_b_hunk_idx) = (0, 0); + let (mut out, hunks_in_front) = { + let (mut remove_leading_a_hunks_from, mut remove_leading_b_hunks_from) = (None, None); + let (mut a_hunk_token_equal_till, mut b_hunk_token_equal_till) = (None, None); + for ((a_token_idx, a_hunk_idx, a_hunk_side), (b_token_idx, b_hunk_idx, b_hunk_side)) in + iterate_hunks(a_hunks).zip(iterate_hunks(b_hunks)) + { + let a_line = line_content(a_token_idx, a_hunk_side).as_bstr(); + let b_line = line_content(b_token_idx, b_hunk_side).as_bstr(); + + if last_a_hunk_idx != a_hunk_idx { + a_hunk_token_equal_till = None; + last_a_hunk_idx = a_hunk_idx; + } + if last_b_hunk_idx != b_hunk_idx { + b_hunk_token_equal_till = None; + last_b_hunk_idx = b_hunk_idx; + } + if a_line == b_line { + (remove_leading_a_hunks_from, remove_leading_b_hunks_from) = (Some(a_hunk_idx), Some(b_hunk_idx)); + (a_hunk_token_equal_till, b_hunk_token_equal_till) = (Some(a_token_idx), Some(b_token_idx)); + } else { + break; + } + } + + let mut out = Vec::with_capacity(remove_leading_a_hunks_from.unwrap_or_else(|| { + if a_hunk_token_equal_till.is_some() { + 1 + } else { + 0 + } + })); + truncate_hunks_from_from_front( + a_hunks, + remove_leading_a_hunks_from, + a_hunk_token_equal_till, + Some(&mut out), + ); + truncate_hunks_from_from_front(b_hunks, remove_leading_b_hunks_from, b_hunk_token_equal_till, None); + let hunks_in_front = out.len(); + (out, hunks_in_front) + }; + + (last_a_hunk_idx, last_b_hunk_idx) = (0, 0); + { + let (mut remove_trailing_a_hunks_from, mut remove_trailing_b_hunks_from) = (None, None); + let (mut a_hunk_token_equal_from, mut b_hunk_token_equal_from) = (None, None); + for ((a_token_idx, a_hunk_idx, a_hunk_side), (b_token_idx, b_hunk_idx, b_hunk_side)) in + iterate_hunks_rev(a_hunks).zip(iterate_hunks_rev(b_hunks)) + { + let a_line = line_content(a_token_idx, a_hunk_side).as_bstr(); + let b_line = line_content(b_token_idx, b_hunk_side).as_bstr(); + + if last_a_hunk_idx != a_hunk_idx { + a_hunk_token_equal_from = None; + last_a_hunk_idx = a_hunk_idx; + } + if last_b_hunk_idx != b_hunk_idx { + b_hunk_token_equal_from = None; + last_b_hunk_idx = b_hunk_idx; + } + + if a_line == b_line { + (remove_trailing_a_hunks_from, remove_trailing_b_hunks_from) = (Some(a_hunk_idx), Some(b_hunk_idx)); + (a_hunk_token_equal_from, b_hunk_token_equal_from) = (Some(a_token_idx), Some(b_token_idx)); + } else { + break; + } + } + + truncate_hunks_from_from_back( + a_hunks, + remove_trailing_a_hunks_from, + a_hunk_token_equal_from, + Some(&mut out), + ); + truncate_hunks_from_from_back(b_hunks, remove_trailing_b_hunks_from, b_hunk_token_equal_from, None); + } + + (out, hunks_in_front) +} + +fn range_by_side(hunk: &mut Hunk) -> &mut Range { + match hunk.side { + Side::Current | Side::Other => &mut hunk.after, + Side::Ancestor => &mut hunk.before, + } +} +fn truncate_hunks_from_from_front( + hunks: &mut Vec, + hunks_to_remove_until_idx: Option, + hunk_token_equal_till: Option, + mut out_hunks: Option<&mut Vec>, +) { + let Some(hunks_to_remove_until_idx) = hunks_to_remove_until_idx else { + assert!(hunk_token_equal_till.is_none()); + return; + }; + let mut last_index_to_remove = Some(hunks_to_remove_until_idx); + let hunk = &mut hunks[hunks_to_remove_until_idx]; + let range = range_by_side(hunk); + if let Some(hunk_token_equal_till) = hunk_token_equal_till { + let orig_start = range.start; + let new_start = hunk_token_equal_till + 1; + range.start = new_start; + if Range::::is_empty(range) { + range.start = orig_start; + } else if let Some(out) = out_hunks.as_deref_mut() { + last_index_to_remove = hunks_to_remove_until_idx.checked_sub(1); + let mut removed_hunk = hunk.clone(); + let new_range = range_by_side(&mut removed_hunk); + + new_range.start = orig_start; + new_range.end = new_start; + + out.push(removed_hunk); + } else { + last_index_to_remove = hunks_to_remove_until_idx.checked_sub(1); + } + } + if let Some(last_index_to_remove) = last_index_to_remove { + let mut current_idx = 0; + hunks.retain(|hunk| { + if current_idx > last_index_to_remove { + true + } else { + current_idx += 1; + if let Some(out) = out_hunks.as_deref_mut() { + out.push(hunk.clone()); + } + false + } + }); + } +} + +fn truncate_hunks_from_from_back( + hunks: &mut Vec, + remove_trailing_hunks_from_idx: Option, + hunk_token_equal_from: Option, + mut out_hunks: Option<&mut Vec>, +) { + let Some(mut remove_trailing_hunks_from_idx) = remove_trailing_hunks_from_idx else { + assert!(hunk_token_equal_from.is_none()); + return; + }; + + let hunk = &mut hunks[remove_trailing_hunks_from_idx]; + let range = range_by_side(hunk); + if let Some(hunk_token_equal_from) = hunk_token_equal_from { + let orig_end = range.end; + let new_end = hunk_token_equal_from; + range.end = new_end; + if Range::::is_empty(range) { + range.end = orig_end; + } else if let Some(out) = out_hunks.as_deref_mut() { + remove_trailing_hunks_from_idx += 1; + let mut removed_hunk = hunk.clone(); + let new_range = range_by_side(&mut removed_hunk); + + new_range.start = new_end; + new_range.end = orig_end; + + out.push(removed_hunk); + } else { + remove_trailing_hunks_from_idx += 1; + } + } + if let Some(out) = out_hunks { + out.extend_from_slice(&hunks[remove_trailing_hunks_from_idx..]); + } + hunks.truncate(remove_trailing_hunks_from_idx); +} + +/// Return an iterator over `(token_idx, hunk_idx, hunk_side)` from `hunks`. +fn iterate_hunks(hunks: &[Hunk]) -> impl Iterator + '_ { + hunks.iter().enumerate().flat_map(|(hunk_idx, hunk)| { + match hunk.side { + Side::Current | Side::Other => &hunk.after, + Side::Ancestor => &hunk.before, + } + .clone() + .map(move |idx| (idx, hunk_idx, hunk.side)) + }) +} + +/// Return a reverse iterator over `(token_idx, hunk_idx, hunk_side)` from `hunks`. +fn iterate_hunks_rev(hunks: &[Hunk]) -> impl Iterator + '_ { + hunks.iter().enumerate().rev().flat_map(|(hunk_idx, hunk)| { + match hunk.side { + Side::Current | Side::Other => &hunk.after, + Side::Ancestor => &hunk.before, + } + .clone() + .rev() + .map(move |idx| (idx, hunk_idx, hunk.side)) + }) +} + +pub fn write_hunks( + hunks: &[Hunk], + input: &imara_diff::intern::InternedInput<&[u8]>, + current_tokens: &[imara_diff::intern::Token], + out: &mut Vec, +) { + for hunk in hunks { + let (tokens, range) = match hunk.side { + Side::Current => (current_tokens, &hunk.after), + Side::Other => (input.after.as_slice(), &hunk.after), + Side::Ancestor => (input.before.as_slice(), &hunk.before), + }; + write_tokens(&input.interner, &tokens[usize_range(range)], out); + } +} + +fn usize_range(range: &Range) -> Range { + range.start as usize..range.end as usize +} + +fn write_tokens( + interner: &imara_diff::intern::Interner<&[u8]>, + tokens: &[imara_diff::intern::Token], + out: &mut Vec, +) { + for token in tokens { + out.extend_from_slice(interner[*token]); + } +} + +/// Find all hunks in `iter` which aren't from the same side as `hunk` and intersect with it. +/// Return `true` if `out` is non-empty after the operation, indicating overlapping hunks were found. +pub fn take_intersecting(hunk: &Hunk, iter: &mut Peekable>, out: &mut Vec) -> bool { + out.clear(); + while iter + .peek() + .filter(|b_hunk| { + b_hunk.side != hunk.side + && (hunk.before.contains(&b_hunk.before.start) + || (hunk.before.is_empty() && hunk.before.start == b_hunk.before.start)) + }) + .is_some() + { + out.extend(iter.next()); + } + !out.is_empty() +} + +pub fn tokens(input: &[u8]) -> imara_diff::sources::ByteLines<'_, true> { + imara_diff::sources::byte_lines_with_terminator(input) +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Side { + Current, + Other, + /// A special marker that is just used to be able to mix-in hunks that only point to the ancestor. + /// Only `before` matters then. + Ancestor, +} + +#[derive(Debug, Clone)] +pub struct Hunk { + pub before: Range, + pub after: Range, + pub side: Side, +} + +pub struct CollectHunks { + pub hunks: Vec, + pub side: Side, +} + +impl imara_diff::Sink for CollectHunks { + type Out = Vec; + + fn process_change(&mut self, before: Range, after: Range) { + self.hunks.push(Hunk { + before, + after, + side: self.side, + }); + } + + fn finish(self) -> Self::Out { + self.hunks + } +} diff --git a/gix-merge/src/blob/mod.rs b/gix-merge/src/blob/mod.rs new file mode 100644 index 00000000000..07f544a2e23 --- /dev/null +++ b/gix-merge/src/blob/mod.rs @@ -0,0 +1,183 @@ +// TODO: remove this - only needed while &mut Vec isn't used. +#![allow(clippy::ptr_arg)] + +use crate::blob::platform::{DriverChoice, ResourceRef}; +use bstr::BString; +use std::path::PathBuf; + +/// +pub mod builtin_driver; +/// +pub mod pipeline; +/// +pub mod platform; + +/// Define if a merge is conflicted or not. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum Resolution { + /// Everything could be resolved during the merge. + /// + /// Conflicts may have been resolved automatically, depending on the options. + Complete, + /// A conflict is still present in the form of conflict markers. + /// + /// Note that this won't be the case if conflicts were automatically resolved. + Conflict, +} + +/// A way to classify the side of a resource for merging. +#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub enum ResourceKind { + /// Our side of the state. + CurrentOrOurs, + /// Their side of the state. + OtherOrTheirs, + /// The state of the common base of both ours and theirs. + CommonAncestorOrBase, +} + +/// Define a built-in way of performing a three-way merge, including auto-resolution support. +/// +/// Some values are related to diffing, some are related to conversions. +#[derive(Default, Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum BuiltinDriver { + /// Perform a merge between text-sources such that conflicts are marked according to + /// `merge.conflictStyle` in the Git configuration. + /// + /// If any of the inputs, *base*, *ours* or *theirs* looks like non-text/binary, + /// the [`Binary`](Self::Binary) driver will be used instead. + /// + /// Also see [`builtin_driver::text::ConflictStyle`]. + #[default] + Text, + /// Merge 'unmergable' content by choosing *ours* or *theirs*, without performing + /// an actual merge. + /// + /// Note that if the merge operation is for virtual ancestor (a merge for merge-bases), + /// then *ours* will always be chosen. + Binary, + /// Merge text-sources and resolve conflicts by adding conflicting lines one after another, + /// in random order, without adding conflict markers either. + /// + /// This can be useful for files that change a lot, but will remain usable merely by adding + /// all changed lines. + Union, +} + +/// Define a driver program that performs a three-way merge. +/// +/// Some values are related to diffing, some are related to conversions. +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub struct Driver { + /// The name of the driver, as referred to by `[merge "name"]` in the git configuration. + pub name: BString, + /// The human-readable version of `name`, only to be used for displaying driver-information to the user. + pub display_name: BString, + /// The command to execute to perform the merge entirely like ` %O %A %B %L %P %S %X %Y`. + /// + /// * **%O** + /// - the common ancestor version, or *base*. + /// * **%A** + /// - the current version, or *ours*. + /// * **%B** + /// - the other version, or *theirs*. + /// * **%L** + /// - The conflict-marker size as positive number. + /// * **%P** + /// - The path in which the merged result would be stored, as workspace-relative path, of the current/ours side. + /// * **%S** + /// - The conflict-label for the common ancestor or *base*. + /// * **%X** + /// - The conflict-label for the current version or *ours*. + /// * **%Y** + /// - The conflict-label for the other version or *theirs*. + /// + /// Note that conflict-labels are behind the conflict markers, to annotate them. + /// + /// A typical invocation with all arguments substituted could then look like this: + /// + /// ``` + /// .merge_file_nR2Qs1 .merge_file_WYXCJe .merge_file_UWbzrm 7 file e2a2970 HEAD feature + /// ``` + /// + /// The driver is expected to leave its version in the file at `%A`, by overwriting it. + pub command: BString, + /// If `true`, this is the `name` of the driver to use when a virtual-merge-base is created, as a merge of all + /// available merge-bases if there are more than one. + /// + /// This value can also be special built-in drivers named `text`, `binary` or `union`. Note that user-defined + /// drivers with the same name will be preferred over built-in ones, but only for files whose git attributes + /// specified the driver by *name*. + pub recursive: Option, +} + +/// A conversion pipeline to take an object or path from what's stored in Git to what can be merged, while +/// following the guidance of git-attributes at the respective path to learn how the merge should be performed. +/// +/// Depending on the source, different conversions are performed: +/// +/// * `worktree on disk` -> `object for storage in git` +/// * `object` -> `possibly renormalized object` +/// - Renormalization means that the `object` is converted to what would be checked out into the work-tree, +/// just to turn it back into an object. +#[derive(Clone)] +pub struct Pipeline { + /// A way to read data directly from the worktree. + pub roots: pipeline::WorktreeRoots, + /// A pipeline to convert objects from the worktree to Git, and also from Git to the worktree, and back to Git. + pub filter: gix_filter::Pipeline, + /// Options affecting the way we read files. + pub options: pipeline::Options, + /// A buffer to produce disk-accessible paths from worktree roots. + path: PathBuf, +} + +/// A utility for gathering and processing all state necessary to perform a three-way merge. +/// +/// It can re-use buffers if all three parts of participating in the merge are +/// set repeatedly. +#[derive(Clone)] +pub struct Platform { + /// The current version (ours). + current: Option, + /// The ancestor version (base). + ancestor: Option, + /// The other version (theirs). + other: Option, + + /// A way to convert objects into a diff-able format. + pub filter: Pipeline, + /// A way to access `.gitattributes` + pub attr_stack: gix_worktree::Stack, + /// Further configuration that affects the merge. + pub options: platform::Options, + /// All available merge drivers. + /// + /// They are referenced in git-attributes by name, and we hand out indices into this array. + drivers: Vec, + /// Pre-configured attributes to obtain additional merge-related information. + attrs: gix_filter::attributes::search::Outcome, + /// The way we convert resources into mergeable states. + filter_mode: pipeline::Mode, +} + +/// The product of a [`prepare_merge()`](Platform::prepare_merge()) call to finally +/// perform the merge and retrieve the merge results. +#[derive(Copy, Clone)] +pub struct PlatformRef<'parent> { + /// The platform that hosts the resources, used to access drivers. + pub(super) parent: &'parent Platform, + /// The current or our side of the merge operation. + pub current: ResourceRef<'parent>, + /// The ancestor or base of the merge operation. + pub ancestor: ResourceRef<'parent>, + /// The other or their side of the merge operation. + pub other: ResourceRef<'parent>, + /// Which driver to use according to the resource's configuration, + /// using the path of `current` to read git-attributes. + pub driver: DriverChoice, + /// Possibly processed options for use when performing the actual merge. + /// + /// They may be inspected before the merge, or altered at will. + pub options: platform::merge::Options, +} diff --git a/gix-merge/src/blob/pipeline.rs b/gix-merge/src/blob/pipeline.rs new file mode 100644 index 00000000000..776d908e203 --- /dev/null +++ b/gix-merge/src/blob/pipeline.rs @@ -0,0 +1,340 @@ +use super::{Pipeline, ResourceKind}; +use bstr::BStr; +use gix_filter::driver::apply::{Delay, MaybeDelayed}; +use gix_filter::pipeline::convert::{ToGitOutcome, ToWorktreeOutcome}; +use gix_object::tree::EntryKind; +use std::io::Read; +use std::path::{Path, PathBuf}; + +/// Options for use in a [`Pipeline`]. +#[derive(Default, Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +pub struct Options { + /// The amount of bytes that an object has to reach before being treated as binary. + /// These objects will not be queried, nor will their data be processed in any way. + /// If `0`, no file is ever considered binary due to their size. + /// + /// Note that for files stored in `git`, what counts is their stored, decompressed size, + /// thus `git-lfs` files would typically not be considered binary unless one explicitly sets + /// them. + /// However, if they are to be retrieved from the worktree, the worktree size is what matters, + /// even though that also might be a `git-lfs` file which is small in Git. + pub large_file_threshold_bytes: u64, +} + +/// The specific way to convert a resource. +#[derive(Default, Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum Mode { + /// Prepare resources as they are stored in `git`. + /// + /// This is naturally the case when object-ids are used, but a conversion is needed + /// when data is read from a worktree. + #[default] + ToGit, + /// For sources that are object-ids, convert them to what *would* be stored in the worktree, + /// and back to what *would* be stored in Git. + /// + /// Sources that are located in a worktree are merely converted to what *would* be stored in Git. + /// + /// This is useful to prevent merge conflicts due to inconcistent whitespace. + Renormalize, +} + +/// A way to access roots for different kinds of resources that are possibly located and accessible in a worktree. +#[derive(Clone, Debug, Default)] +pub struct WorktreeRoots { + /// The worktree root where the current (or our) version of the resource is present. + pub current_root: Option, + /// The worktree root where the other (or their) version of the resource is present. + pub other_root: Option, + /// The worktree root where containing the resource of the common ancestor of our and their version. + pub common_ancestor_root: Option, +} + +impl WorktreeRoots { + /// Return the root path for the given `kind` + pub fn by_kind(&self, kind: ResourceKind) -> Option<&Path> { + match kind { + ResourceKind::CurrentOrOurs => self.current_root.as_deref(), + ResourceKind::CommonAncestorOrBase => self.common_ancestor_root.as_deref(), + ResourceKind::OtherOrTheirs => self.other_root.as_deref(), + } + } + + /// Return `true` if all worktree roots are unset. + pub fn is_unset(&self) -> bool { + self.current_root.is_none() && self.other_root.is_none() && self.common_ancestor_root.is_none() + } +} + +/// Lifecycle +impl Pipeline { + /// Create a new instance of a pipeline which produces blobs suitable for merging. + /// + /// `roots` allow to read worktree files directly, and `worktree_filter` is used + /// to transform object database data directly. + /// `options` are used to further configure the way we act. + pub fn new(roots: WorktreeRoots, worktree_filter: gix_filter::Pipeline, options: Options) -> Self { + Pipeline { + roots, + filter: worktree_filter, + options, + path: Default::default(), + } + } +} + +/// Access +impl Pipeline {} + +/// Data as returned by [`Pipeline::convert_to_mergeable()`]. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] +pub enum Data { + /// The data to use for merging was written into the buffer that was passed during the call to [`Pipeline::convert_to_mergeable()`]. + Buffer, + /// The file or blob is above the big-file threshold and cannot be processed. + /// + /// In this state, the file cannot be merged. + TooLarge { + /// The size of the object prior to performing any filtering or as it was found on disk. + /// + /// Note that technically, the size isn't always representative of the same 'state' of the + /// content, as once it can be the size of the blob in git, and once it's the size of file + /// in the worktree - both can differ a lot depending on filters. + size: u64, + }, +} + +/// +pub mod convert_to_mergeable { + use std::collections::TryReserveError; + + use bstr::BString; + use gix_object::tree::EntryKind; + + /// The error returned by [Pipeline::convert_to_mergeable()](super::Pipeline::convert_to_mergeable()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("Entry at '{rela_path}' must be regular file or symlink, but was {actual:?}")] + InvalidEntryKind { rela_path: BString, actual: EntryKind }, + #[error("Entry at '{rela_path}' could not be read as symbolic link")] + ReadLink { rela_path: BString, source: std::io::Error }, + #[error("Entry at '{rela_path}' could not be opened for reading or read from")] + OpenOrRead { rela_path: BString, source: std::io::Error }, + #[error("Entry at '{rela_path}' could not be copied from a filter process to a memory buffer")] + StreamCopy { rela_path: BString, source: std::io::Error }, + #[error(transparent)] + FindObject(#[from] gix_object::find::existing_object::Error), + #[error(transparent)] + ConvertToWorktree(#[from] gix_filter::pipeline::convert::to_worktree::Error), + #[error(transparent)] + ConvertToGit(#[from] gix_filter::pipeline::convert::to_git::Error), + #[error("Memory allocation failed")] + OutOfMemory(#[from] TryReserveError), + } +} + +/// Conversion +impl Pipeline { + /// Convert the object at `id`, `mode`, `rela_path` and `kind`, providing access to `attributes` and `objects`. + /// The resulting merge-able data is written into `out`, if it's not too large. + /// The returned [`Data`] contains information on how to use `out`, which will be cleared if it is `None`, indicating + /// that no object was found at the location *on disk* - it's always an error to provide an object ID that doesn't exist + /// in the object database. + /// + /// `attributes` must be returning the attributes at `rela_path` and is used for obtaining worktree filter settings, + /// and `objects` must be usable if `kind` is a resource in the object database, + /// i.e. if no worktree root is available. It's notable that if a worktree root is present for `kind`, + /// then a `rela_path` is used to access it on disk. + /// + /// If `id` [is null](gix_hash::ObjectId::is_null()) or the file in question doesn't exist in the worktree in case + /// [a root](WorktreeRoots) is present, then `out` will be left cleared and the output data will be `None`. + /// This is useful to simplify the calling code as empty buffers signal that nothing is there. + /// + /// Note that `mode` is trusted, and we will not re-validate that the entry in the worktree actually is of that mode. + /// Only blobs are allowed. + /// + /// Use `convert` to control what kind of the resource will be produced. + #[allow(clippy::too_many_arguments)] + pub fn convert_to_mergeable( + &mut self, + id: &gix_hash::oid, + mode: EntryKind, + rela_path: &BStr, + kind: ResourceKind, + attributes: &mut dyn FnMut(&BStr, &mut gix_filter::attributes::search::Outcome), + objects: &dyn gix_object::FindObjectOrHeader, + convert: Mode, + out: &mut Vec, + ) -> Result, convert_to_mergeable::Error> { + if !matches!(mode, EntryKind::Blob | EntryKind::BlobExecutable) { + return Err(convert_to_mergeable::Error::InvalidEntryKind { + rela_path: rela_path.to_owned(), + actual: mode, + }); + } + + out.clear(); + match self.roots.by_kind(kind) { + Some(root) => { + self.path.clear(); + self.path.push(root); + self.path.push(gix_path::from_bstr(rela_path)); + let size_in_bytes = (self.options.large_file_threshold_bytes > 0) + .then(|| { + none_if_missing(self.path.metadata().map(|md| md.len())).map_err(|err| { + convert_to_mergeable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + } + }) + }) + .transpose()?; + let data = match size_in_bytes { + Some(None) => None, // missing as identified by the size check + Some(Some(size)) if size > self.options.large_file_threshold_bytes => Some(Data::TooLarge { size }), + _ => { + let file = none_if_missing(std::fs::File::open(&self.path)).map_err(|err| { + convert_to_mergeable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + } + })?; + + if let Some(file) = file { + match convert { + Mode::ToGit | Mode::Renormalize => { + let res = self.filter.convert_to_git( + file, + gix_path::from_bstr(rela_path).as_ref(), + attributes, + &mut |buf| { + if convert == Mode::Renormalize { + Ok(None) + } else { + objects.try_find(id, buf).map(|obj| obj.map(|_| ())) + } + }, + )?; + + match res { + ToGitOutcome::Unchanged(mut file) => { + file.read_to_end(out).map_err(|err| { + convert_to_mergeable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + } + })?; + } + ToGitOutcome::Process(mut stream) => { + stream.read_to_end(out).map_err(|err| { + convert_to_mergeable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + } + })?; + } + ToGitOutcome::Buffer(buf) => { + out.clear(); + out.try_reserve(buf.len())?; + out.extend_from_slice(buf); + } + } + } + } + + Some(Data::Buffer) + } else { + None + } + } + }; + Ok(data) + } + None => { + let data = if id.is_null() { + None + } else { + let header = objects + .try_header(id) + .map_err(gix_object::find::existing_object::Error::Find)? + .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?; + let is_binary = self.options.large_file_threshold_bytes > 0 + && header.size > self.options.large_file_threshold_bytes; + let data = if is_binary { + Data::TooLarge { size: header.size } + } else { + objects + .try_find(id, out) + .map_err(gix_object::find::existing_object::Error::Find)? + .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?; + + if convert == Mode::Renormalize { + { + let res = self + .filter + .convert_to_worktree(out, rela_path, attributes, Delay::Forbid)?; + + match res { + ToWorktreeOutcome::Unchanged(_) => {} + ToWorktreeOutcome::Buffer(src) => { + out.clear(); + out.try_reserve(src.len())?; + out.extend_from_slice(src); + } + ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut stream)) => { + std::io::copy(&mut stream, out).map_err(|err| { + convert_to_mergeable::Error::StreamCopy { + rela_path: rela_path.to_owned(), + source: err, + } + })?; + } + ToWorktreeOutcome::Process(MaybeDelayed::Delayed(_)) => { + unreachable!("we prohibit this") + } + }; + } + + let res = self.filter.convert_to_git( + &**out, + &gix_path::from_bstr(rela_path), + attributes, + &mut |_buf| Ok(None), + )?; + + match res { + ToGitOutcome::Unchanged(_) => {} + ToGitOutcome::Process(mut stream) => { + stream + .read_to_end(out) + .map_err(|err| convert_to_mergeable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + })?; + } + ToGitOutcome::Buffer(buf) => { + out.clear(); + out.try_reserve(buf.len())?; + out.extend_from_slice(buf); + } + } + } + + Data::Buffer + }; + Some(data) + }; + Ok(data) + } + } + } +} + +fn none_if_missing(res: std::io::Result) -> std::io::Result> { + match res { + Ok(data) => Ok(Some(data)), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None), + Err(err) => Err(err), + } +} diff --git a/gix-merge/src/blob/platform/merge.rs b/gix-merge/src/blob/platform/merge.rs new file mode 100644 index 00000000000..1c2ec1a48b4 --- /dev/null +++ b/gix-merge/src/blob/platform/merge.rs @@ -0,0 +1,417 @@ +use crate::blob::{builtin_driver, PlatformRef, Resolution}; +use std::io::Read; +use std::path::PathBuf; + +/// Options for the use in the [`PlatformRef::merge()`] call. +#[derive(Default, Copy, Clone, Debug, Eq, PartialEq)] +pub struct Options { + /// If `true`, the resources being merged are contained in a virtual ancestor, + /// which is the case when merge bases are merged into one. + pub is_virtual_ancestor: bool, + /// Determine how to resolve conflicts. If `None`, no conflict resolution is possible, and it picks a side. + pub resolve_binary_with: Option, + /// Options for the builtin [text driver](crate::blob::BuiltinDriver::Text). + pub text: builtin_driver::text::Options, +} + +/// The error returned by [`PlatformRef::merge()`]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("At least one resource was too large to be processed")] + ResourceTooLarge, + #[error(transparent)] + PrepareExternalDriver(#[from] inner::prepare_external_driver::Error), + #[error("Failed to launch external merge driver: {cmd}")] + SpawnExternalDriver { cmd: String, source: std::io::Error }, + #[error("External merge driver failed with non-zero exit status {status:?}: {cmd}")] + ExternalDriverFailure { + status: std::process::ExitStatus, + cmd: String, + }, + #[error("IO failed when dealing with merge-driver output")] + ExternalDriverIO(#[from] std::io::Error), +} + +/// The product of a [`PlatformRef::prepare_external_driver()`] operation. +/// +/// This type allows to creation of [`std::process::Command`], ready to run, with `stderr` and `stdout` set to *inherit*, +/// but `stdin` closed. +/// It's expected to leave its result in the file substituted at `current` which is then supposed to be read back from there. +// TODO: remove dead-code annotation +#[allow(dead_code)] +pub struct Command { + /// The pre-configured command + cmd: std::process::Command, + /// A tempfile holding the *current* (ours) state of the resource. + current: gix_tempfile::Handle, + /// The path at which `current` is located, for reading the result back from later. + current_path: PathBuf, + /// A tempfile holding the *ancestor* (base) state of the resource. + ancestor: gix_tempfile::Handle, + /// A tempfile holding the *other* (their) state of the resource. + other: gix_tempfile::Handle, +} + +// Just to keep things here but move them a level up later. +pub(super) mod inner { + /// + pub mod prepare_external_driver { + use crate::blob::builtin_driver::text::Conflict; + use crate::blob::platform::{merge, DriverChoice}; + use crate::blob::{builtin_driver, BuiltinDriver, Driver, PlatformRef, ResourceKind}; + use bstr::{BString, ByteVec}; + use gix_tempfile::{AutoRemove, ContainingDirectory}; + use std::io::Write; + use std::ops::{Deref, DerefMut}; + use std::path::{Path, PathBuf}; + use std::process::Stdio; + + /// The error returned by [PlatformRef::prepare_external_driver()](PlatformRef::prepare_external_driver()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("The resource of kind {kind:?} was too large to be processed")] + ResourceTooLarge { kind: ResourceKind }, + #[error( + "Tempfile to store content of '{rela_path}' ({kind:?}) for passing to external merge command could not be created" + )] + CreateTempfile { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, + #[error( + "Could not write content of '{rela_path}' ({kind:?}) to tempfile for passing to external merge command" + )] + WriteTempfile { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, + } + + /// Plumbing + impl<'parent> PlatformRef<'parent> { + /// Given `merge_command` and `context`, typically obtained from git-configuration, and the currently set merge-resources, + /// prepare the invocation and temporary files needed to launch it according to protocol. + /// See the documentation of [`Driver::command`] for possible substitutions. + /// + /// Please note that this is an expensive operation this will always create three temporary files to hold all sides of the merge. + /// + /// The resulting command should be spawned, and when successful, [the result file can be opened](merge::Command::open_result_file) + /// to read back the result into a suitable buffer. + /// + /// ### Deviation + /// + /// * We allow passing more context than Git would by taking a whole `context`, + /// it's up to the caller to decide how much is filled. + /// * Our tempfiles aren't suffixed `.merge_file_XXXXXX` with `X` replaced with characters for uniqueness. + pub fn prepare_external_driver( + &self, + merge_command: BString, + builtin_driver::text::Labels { + ancestor, + current, + other, + }: builtin_driver::text::Labels<'_>, + context: gix_command::Context, + ) -> Result { + fn write_data( + data: &[u8], + ) -> std::io::Result<(gix_tempfile::Handle, PathBuf)> { + let mut file = gix_tempfile::new(Path::new(""), ContainingDirectory::Exists, AutoRemove::Tempfile)?; + file.write_all(data)?; + let mut path = Default::default(); + file.with_mut(|f| { + f.path().clone_into(&mut path); + })?; + let file = file.close()?; + Ok((file, path)) + } + + let base = self.ancestor.data.as_slice().ok_or(Error::ResourceTooLarge { + kind: ResourceKind::CommonAncestorOrBase, + })?; + let ours = self.current.data.as_slice().ok_or(Error::ResourceTooLarge { + kind: ResourceKind::CurrentOrOurs, + })?; + let theirs = self.other.data.as_slice().ok_or(Error::ResourceTooLarge { + kind: ResourceKind::OtherOrTheirs, + })?; + + let (base_tmp, base_path) = write_data(base).map_err(|err| Error::CreateTempfile { + rela_path: self.ancestor.rela_path.into(), + kind: ResourceKind::CommonAncestorOrBase, + source: err, + })?; + let (ours_tmp, ours_path) = write_data(ours).map_err(|err| Error::CreateTempfile { + rela_path: self.current.rela_path.into(), + kind: ResourceKind::CurrentOrOurs, + source: err, + })?; + let (theirs_tmp, theirs_path) = write_data(theirs).map_err(|err| Error::CreateTempfile { + rela_path: self.other.rela_path.into(), + kind: ResourceKind::OtherOrTheirs, + source: err, + })?; + + let mut cmd = BString::from(Vec::with_capacity(merge_command.len())); + let mut count = 0; + for token in merge_command.split(|b| *b == b'%') { + count += 1; + let token = if count > 1 { + match token.first() { + Some(&b'O') => { + cmd.push_str(gix_path::into_bstr(&base_path).as_ref()); + &token[1..] + } + Some(&b'A') => { + cmd.push_str(gix_path::into_bstr(&ours_path).as_ref()); + &token[1..] + } + Some(&b'B') => { + cmd.push_str(gix_path::into_bstr(&theirs_path).as_ref()); + &token[1..] + } + Some(&b'L') => { + let marker_size = self + .options + .text + .conflict + .marker_size() + .unwrap_or(Conflict::DEFAULT_MARKER_SIZE); + cmd.push_str(format!("{marker_size}")); + &token[1..] + } + Some(&b'P') => { + cmd.push_str(gix_quote::single(self.current.rela_path)); + &token[1..] + } + Some(&b'S') => { + cmd.push_str(gix_quote::single(ancestor.unwrap_or_default())); + &token[1..] + } + Some(&b'X') => { + cmd.push_str(gix_quote::single(current.unwrap_or_default())); + &token[1..] + } + Some(&b'Y') => { + cmd.push_str(gix_quote::single(other.unwrap_or_default())); + &token[1..] + } + Some(_other) => { + cmd.push(b'%'); + token + } + None => b"%", + } + } else { + token + }; + cmd.extend_from_slice(token); + } + + Ok(merge::Command { + cmd: gix_command::prepare(gix_path::from_bstring(cmd)) + .with_context(context) + .with_shell() + .stdin(Stdio::null()) + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()) + .into(), + current: ours_tmp, + current_path: ours_path, + ancestor: base_tmp, + other: theirs_tmp, + }) + } + + /// Return the configured driver program for use with [`Self::prepare_external_driver()`], or `Err` + /// with the built-in driver to use instead. + pub fn configured_driver(&self) -> Result<&'parent Driver, BuiltinDriver> { + match self.driver { + DriverChoice::BuiltIn(builtin) => Err(builtin), + DriverChoice::Index(idx) => self.parent.drivers.get(idx).ok_or(BuiltinDriver::default()), + } + } + } + + impl std::fmt::Debug for merge::Command { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.cmd.fmt(f) + } + } + + impl Deref for merge::Command { + type Target = std::process::Command; + + fn deref(&self) -> &Self::Target { + &self.cmd + } + } + + impl DerefMut for merge::Command { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.cmd + } + } + + impl merge::Command { + /// Open the file which should have been written to the location of `ours`, to yield the result of the merge operation. + /// Calling this makes sense only after the merge command has finished successfully. + pub fn open_result_file(&self) -> std::io::Result { + std::fs::File::open(&self.current_path) + } + } + } + + /// + pub mod builtin_merge { + use crate::blob::{builtin_driver, BuiltinDriver, PlatformRef, Resolution}; + + /// An identifier to tell us how a merge conflict was resolved by [builtin_merge](PlatformRef::builtin_merge). + #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] + pub enum Pick { + /// In a binary merge, chose the ancestor. + /// + /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it. + Ancestor, + /// In a binary merge, chose our side. + /// + /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it. + Ours, + /// In a binary merge, chose their side. + /// + /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it. + Theirs, + /// New data was produced with the result of the merge, to be found in the buffer that was passed to + /// [builtin_merge()](PlatformRef::builtin_merge). + /// This happens for any merge that isn't a binary merge. + Buffer, + } + + /// Plumbing + impl<'parent> PlatformRef<'parent> { + /// Perform the merge using the given `driver`, possibly placing the output in `out`. + /// `input` can be used to keep tokens between runs, but note it will only grow in size unless cleared manually. + /// Use `labels` to annotate conflict sections in case of a text-merge. + /// Returns `None` if one of the buffers is too large, making a merge impossible. + /// Note that if the *pick* wasn't [`Pick::Buffer`], then `out` will not have been cleared, + /// and one has to take the data from the respective resource. + pub fn builtin_merge( + &self, + driver: BuiltinDriver, + out: &mut Vec, + input: &mut imara_diff::intern::InternedInput<&'parent [u8]>, + labels: builtin_driver::text::Labels<'_>, + ) -> Option<(Pick, Resolution)> { + let base = self.ancestor.data.as_slice()?; + let ours = self.current.data.as_slice()?; + let theirs = self.other.data.as_slice()?; + let driver = if driver != BuiltinDriver::Binary + && (is_binary_buf(ours) || is_binary_buf(theirs) || is_binary_buf(base)) + { + BuiltinDriver::Binary + } else { + driver + }; + Some(match driver { + BuiltinDriver::Text => { + let resolution = + builtin_driver::text(out, input, labels, ours, base, theirs, self.options.text); + (Pick::Buffer, resolution) + } + BuiltinDriver::Binary => { + let (pick, resolution) = builtin_driver::binary(self.options.resolve_binary_with); + let pick = match pick { + builtin_driver::binary::Pick::Ours => Pick::Ours, + builtin_driver::binary::Pick::Theirs => Pick::Theirs, + builtin_driver::binary::Pick::Ancestor => Pick::Ancestor, + }; + (pick, resolution) + } + BuiltinDriver::Union => { + let resolution = builtin_driver::text( + out, + input, + labels, + ours, + base, + theirs, + builtin_driver::text::Options { + conflict: builtin_driver::text::Conflict::ResolveWithUnion, + ..self.options.text + }, + ); + (Pick::Buffer, resolution) + } + }) + } + } + + fn is_binary_buf(buf: &[u8]) -> bool { + let buf = &buf[..buf.len().min(8000)]; + buf.contains(&0) + } + } +} + +/// Convenience +impl<'parent> PlatformRef<'parent> { + /// Perform the merge, possibly invoking an external merge command, and store the result in `out`, returning `(pick, resolution)`. + /// Note that `pick` indicates which resource the buffer should be taken from, unless it's [`Pick::Buffer`](inner::builtin_merge::Pick::Buffer) + /// to indicate it's `out`. + /// Use `labels` to annotate conflict sections in case of a text-merge. + /// The merge is configured by `opts` and possible merge driver command executions are affected by `context`. + /// + /// Note that at this stage, none-existing input data will simply default to an empty buffer when running the actual merge algorithm. + /// Too-large resources will result in an error. + /// + /// Generally, it is assumed that standard logic, like deletions of files, is handled before any of this is called, so we are lenient + /// in terms of buffer handling to make it more useful in the face of missing local files. + pub fn merge( + &self, + out: &mut Vec, + labels: builtin_driver::text::Labels<'_>, + context: gix_command::Context, + ) -> Result<(inner::builtin_merge::Pick, Resolution), Error> { + match self.configured_driver() { + Ok(driver) => { + let mut cmd = self.prepare_external_driver(driver.command.clone(), labels, context)?; + let status = cmd.status().map_err(|err| Error::SpawnExternalDriver { + cmd: format!("{:?}", cmd.cmd), + source: err, + })?; + if !status.success() { + return Err(Error::ExternalDriverFailure { + cmd: format!("{:?}", cmd.cmd), + status, + }); + } + out.clear(); + cmd.open_result_file()?.read_to_end(out)?; + Ok((inner::builtin_merge::Pick::Buffer, Resolution::Complete)) + } + Err(builtin) => { + let mut input = imara_diff::intern::InternedInput::new(&[][..], &[]); + out.clear(); + let (pick, resolution) = self + .builtin_merge(builtin, out, &mut input, labels) + .ok_or(Error::ResourceTooLarge)?; + Ok((pick, resolution)) + } + } + } + + /// Using a `pick` obtained from [`merge()`](Self::merge), obtain the respective buffer suitable for reading or copying. + /// Return `None` if the buffer is too large, or if the `pick` corresponds to a buffer (that was written separately). + pub fn buffer_by_pick(&self, pick: inner::builtin_merge::Pick) -> Option<&'parent [u8]> { + match pick { + inner::builtin_merge::Pick::Ancestor => self.ancestor.data.as_slice(), + inner::builtin_merge::Pick::Ours => self.current.data.as_slice(), + inner::builtin_merge::Pick::Theirs => self.other.data.as_slice(), + inner::builtin_merge::Pick::Buffer => None, + } + } +} diff --git a/gix-merge/src/blob/platform/mod.rs b/gix-merge/src/blob/platform/mod.rs new file mode 100644 index 00000000000..14b33d03fd5 --- /dev/null +++ b/gix-merge/src/blob/platform/mod.rs @@ -0,0 +1,127 @@ +use crate::blob::{pipeline, BuiltinDriver, Pipeline, Platform}; +use bstr::{BStr, BString}; +use gix_filter::attributes; + +/// A stored value representing a resource that participates in a merge. +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] +pub(super) struct Resource { + /// The `id` of the value, or `null` if it's only living in a worktree. + id: gix_hash::ObjectId, + /// The repository-relative path where the resource lives in the tree. + rela_path: BString, + /// The outcome of converting a resource into a mergable format using [Pipeline::convert_to_mergeable()]. + data: Option, + /// The kind of the resource we are looking at. Only possible values are `Blob` and `BlobExecutable`. + mode: gix_object::tree::EntryKind, + /// A possibly empty buffer, depending on `conversion.data` which may indicate the data is considered binary + /// or the resource doesn't exist. + buffer: Vec, +} + +/// A blob or executable ready to be merged in one way or another. +#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub struct ResourceRef<'a> { + /// The data itself, suitable for merging, and if the object or worktree item is present at all. + pub data: resource::Data<'a>, + /// The location of the resource, relative to the working tree. + pub rela_path: &'a BStr, + /// The id of the content as it would be stored in `git`, or `null` if the content doesn't exist anymore at + /// `rela_path` or if it was never computed. This can happen with content read from the worktree, which + /// after its 'to-git' conversion never had its hash computed. + pub id: &'a gix_hash::oid, +} + +/// Options for use in [`Platform::new()`]. +#[derive(Default, Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +pub struct Options { + /// Define which driver to use by name if the `merge` attribute for a resource is unspecified. + /// + /// This is the value of the `merge.default` git configuration. + pub default_driver: Option, +} + +/// The selection of the driver to use by a resource obtained with [`Platform::prepare_merge()`]. +/// +/// If available, an index into the `drivers` field to access more diff-related information of the driver for items +/// at the given path, as previously determined by git-attributes. +/// +/// * `merge` is set +/// - Use the [`BuiltinDriver::Text`] +/// * `-merge` is unset +/// - Use the [`BuiltinDriver::Binary`] +/// * `!merge` is unspecified +/// - Use [`Options::default_driver`] or [`BuiltinDriver::Text`]. +/// * `merge=name` +/// - Search for a user-configured or built-in driver called `name`. +/// - If not found, silently default to [`BuiltinDriver::Text`] +/// +/// Note that drivers are queried even if there is no object available. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug, Hash)] +pub enum DriverChoice { + /// Use the given built-in driver to perform the merge. + BuiltIn(BuiltinDriver), + /// Use the user-provided driver program using the index into [the platform drivers array](Platform::drivers()). + Index(usize), +} + +impl Default for DriverChoice { + fn default() -> Self { + DriverChoice::BuiltIn(Default::default()) + } +} + +/// Lifecycle +impl Platform { + /// Create a new instance with a way to `filter` data from the object database and turn it into something that is merge-able. + /// `filter_mode` decides how to do that specifically. + /// Use `attr_stack` to access attributes pertaining worktree filters and merge settings. + /// `drivers` are the list of available merge drivers that individual paths can refer to by means of git attributes. + /// `options` further configure the operation. + pub fn new( + filter: Pipeline, + filter_mode: pipeline::Mode, + attr_stack: gix_worktree::Stack, + mut drivers: Vec, + options: Options, + ) -> Self { + drivers.sort_by(|a, b| a.name.cmp(&b.name)); + Platform { + drivers, + current: None, + ancestor: None, + other: None, + filter, + filter_mode, + attr_stack, + attrs: { + let mut out = attributes::search::Outcome::default(); + out.initialize_with_selection(&Default::default(), Some("merge")); + out + }, + options, + } + } +} + +/// Access +impl Platform { + /// Return all drivers that this instance was initialized with. + /// + /// They are sorted by [`name`](super::Driver::name) to support binary searches. + pub fn drivers(&self) -> &[super::Driver] { + &self.drivers + } +} + +/// +pub mod set_resource; + +/// +pub mod resource; + +/// +pub mod merge; +pub use merge::inner::{builtin_merge, prepare_external_driver}; + +/// +pub mod prepare_merge; diff --git a/gix-merge/src/blob/platform/prepare_merge.rs b/gix-merge/src/blob/platform/prepare_merge.rs new file mode 100644 index 00000000000..24ffb5af32e --- /dev/null +++ b/gix-merge/src/blob/platform/prepare_merge.rs @@ -0,0 +1,100 @@ +use crate::blob::platform::{merge, DriverChoice, ResourceRef}; +use crate::blob::{BuiltinDriver, Platform, PlatformRef, ResourceKind}; +use bstr::{BStr, BString, ByteSlice}; +use gix_filter::attributes; + +/// The error returned by [Platform::prepare_merge_state()](Platform::prepare_merge()). +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("The 'current', 'ancestor' or 'other' resource for the merge operation were not set")] + UnsetResource, + #[error("Failed to obtain attributes for {kind:?} resource at '{rela_path}'")] + Attributes { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, +} + +/// Preparation +impl Platform { + /// Prepare all state needed for performing a merge, using all [previously set](Self::set_resource()) resources. + /// `objects` is used to possibly lookup attribute files when obtaining merge-related attributes. + /// + /// `options` are to be used when merging later, and they may be altered to implement correct binary merges + /// in the present of [virtual merge bases](merge::Options::is_virtual_ancestor). + /// + /// Note that no additional validation is performed here to facilitate inspection, which means that + /// resource buffers might still be too large to be merged, preventing a successful merge at a later time. + pub fn prepare_merge( + &mut self, + objects: &impl gix_object::Find, + mut options: merge::Options, + ) -> Result, Error> { + let current = self.current.as_ref().ok_or(Error::UnsetResource)?; + let ancestor = self.ancestor.as_ref().ok_or(Error::UnsetResource)?; + let other = self.other.as_ref().ok_or(Error::UnsetResource)?; + + let entry = self + .attr_stack + .at_entry(current.rela_path.as_bstr(), None, objects) + .map_err(|err| Error::Attributes { + source: err, + kind: ResourceKind::CurrentOrOurs, + rela_path: current.rela_path.clone(), + })?; + entry.matching_attributes(&mut self.attrs); + let attr = self.attrs.iter_selected().next().expect("pre-initialized with 'diff'"); + let mut driver = match attr.assignment.state { + attributes::StateRef::Set => DriverChoice::BuiltIn(BuiltinDriver::Text), + attributes::StateRef::Unset => DriverChoice::BuiltIn(BuiltinDriver::Binary), + attributes::StateRef::Value(_) | attributes::StateRef::Unspecified => { + let name = match attr.assignment.state { + attributes::StateRef::Value(name) => Some(name.as_bstr()), + attributes::StateRef::Unspecified => { + self.options.default_driver.as_ref().map(|name| name.as_bstr()) + } + _ => unreachable!("only value and unspecified are possible here"), + }; + self.find_driver_by_name(name) + } + }; + if let Some(recursive_driver_name) = match driver { + DriverChoice::Index(idx) => self.drivers.get(idx), + _ => None, + } + .and_then(|driver| driver.recursive.as_deref()) + .filter(|_| options.is_virtual_ancestor) + { + driver = self.find_driver_by_name(Some(recursive_driver_name.as_bstr())); + options.resolve_binary_with = Some(crate::blob::builtin_driver::binary::ResolveWith::Ours); + } + + let out = PlatformRef { + parent: self, + driver, + current: ResourceRef::new(current), + ancestor: ResourceRef::new(ancestor), + other: ResourceRef::new(other), + options, + }; + Ok(out) + } + + fn find_driver_by_name(&self, name: Option<&BStr>) -> DriverChoice { + name.and_then(|name| { + self.drivers + .binary_search_by(|d| d.name.as_bstr().cmp(name)) + .ok() + .map(DriverChoice::Index) + .or_else(|| { + name.to_str() + .ok() + .and_then(BuiltinDriver::by_name) + .map(DriverChoice::BuiltIn) + }) + }) + .unwrap_or_default() + } +} diff --git a/gix-merge/src/blob/platform/resource.rs b/gix-merge/src/blob/platform/resource.rs new file mode 100644 index 00000000000..ed646c94233 --- /dev/null +++ b/gix-merge/src/blob/platform/resource.rs @@ -0,0 +1,50 @@ +use crate::blob::{ + pipeline, + platform::{Resource, ResourceRef}, +}; + +impl<'a> ResourceRef<'a> { + pub(super) fn new(cache: &'a Resource) -> Self { + ResourceRef { + data: cache.data.map_or(Data::Missing, |data| match data { + pipeline::Data::Buffer => Data::Buffer(&cache.buffer), + pipeline::Data::TooLarge { size } => Data::TooLarge { size }, + }), + rela_path: cache.rela_path.as_ref(), + id: &cache.id, + } + } +} + +/// The data of a mergeable resource, as it could be determined and computed previously. +#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub enum Data<'a> { + /// The object is missing, either because it didn't exist in the working tree or because its `id` was null. + /// Such data equals an empty buffer. + Missing, + /// The textual data as processed and ready for merging, i.e. suitable for storage in Git. + Buffer(&'a [u8]), + /// The file or blob is above the big-file threshold and cannot be processed. + /// + /// In this state, the file cannot be merged. + TooLarge { + /// The size of the object prior to performing any filtering or as it was found on disk. + /// + /// Note that technically, the size isn't always representative of the same 'state' of the + /// content, as once it can be the size of the blob in Git, and once it's the size of file + /// in the worktree. + size: u64, + }, +} + +impl<'a> Data<'a> { + /// Return ourselves as slice of bytes if this instance stores data. + /// Note that missing data is interpreted as empty slice, to facilitate additions and deletions. + pub fn as_slice(&self) -> Option<&'a [u8]> { + match self { + Data::Buffer(d) => Some(d), + Data::Missing => Some(&[]), + Data::TooLarge { .. } => None, + } + } +} diff --git a/gix-merge/src/blob/platform/set_resource.rs b/gix-merge/src/blob/platform/set_resource.rs new file mode 100644 index 00000000000..377642e2355 --- /dev/null +++ b/gix-merge/src/blob/platform/set_resource.rs @@ -0,0 +1,103 @@ +use bstr::{BStr, BString}; + +use crate::blob::platform::Resource; +use crate::blob::{pipeline, Platform, ResourceKind}; + +/// The error returned by [Platform::set_resource](Platform::set_resource). +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Can only diff blobs, not {mode:?}")] + InvalidMode { mode: gix_object::tree::EntryKind }, + #[error("Failed to read {kind:?} worktree data from '{rela_path}'")] + Io { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, + #[error("Failed to obtain attributes for {kind:?} resource at '{rela_path}'")] + Attributes { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, + #[error(transparent)] + ConvertToMergeable(#[from] pipeline::convert_to_mergeable::Error), +} + +/// Preparation +impl Platform { + /// Store enough information about a resource to eventually use it in a merge, where… + /// + /// * `id` is the hash of the resource. If it [is null](gix_hash::ObjectId::is_null()), it should either + /// be a resource in the worktree, or it's considered a non-existing, deleted object. + /// If an `id` is known, as the hash of the object as (would) be stored in `git`, then it should be provided + /// for completeness. Note that it's not expected to be in `objects` if `rela_path` is set and a worktree-root + /// is available for `kind`. + /// * `mode` is the kind of object (only blobs and links are allowed) + /// * `rela_path` is the relative path as seen from the (work)tree root. + /// * `kind` identifies the side of the merge this resource will be used for. + /// * `objects` provides access to the object database in case the resource can't be read from a worktree. + pub fn set_resource( + &mut self, + id: gix_hash::ObjectId, + mode: gix_object::tree::EntryKind, + rela_path: &BStr, + kind: ResourceKind, + objects: &impl gix_object::FindObjectOrHeader, + ) -> Result<(), Error> { + if !matches!( + mode, + gix_object::tree::EntryKind::Blob | gix_object::tree::EntryKind::BlobExecutable + ) { + return Err(Error::InvalidMode { mode }); + } + let entry = self + .attr_stack + .at_entry(rela_path, None, objects) + .map_err(|err| Error::Attributes { + source: err, + kind, + rela_path: rela_path.to_owned(), + })?; + + let storage = match kind { + ResourceKind::OtherOrTheirs => &mut self.other, + ResourceKind::CommonAncestorOrBase => &mut self.ancestor, + ResourceKind::CurrentOrOurs => &mut self.current, + }; + + let mut buf_storage = Vec::new(); + let out = self.filter.convert_to_mergeable( + &id, + mode, + rela_path, + kind, + &mut |_, out| { + let _ = entry.matching_attributes(out); + }, + objects, + self.filter_mode, + storage.as_mut().map_or(&mut buf_storage, |s| &mut s.buffer), + )?; + + match storage { + None => { + *storage = Some(Resource { + id, + rela_path: rela_path.to_owned(), + data: out, + mode, + buffer: buf_storage, + }); + } + Some(storage) => { + storage.id = id; + storage.rela_path = rela_path.to_owned(); + storage.data = out; + storage.mode = mode; + } + }; + Ok(()) + } +} diff --git a/gix-merge/src/lib.rs b/gix-merge/src/lib.rs new file mode 100644 index 00000000000..8e608c53ab4 --- /dev/null +++ b/gix-merge/src/lib.rs @@ -0,0 +1,6 @@ +#![deny(rust_2018_idioms)] +#![forbid(unsafe_code)] + +/// +#[cfg(feature = "blob")] +pub mod blob; diff --git a/gix-merge/tests/fixtures/generated-archives/make_blob_repo.tar b/gix-merge/tests/fixtures/generated-archives/make_blob_repo.tar new file mode 100644 index 00000000000..9105e6caa79 Binary files /dev/null and b/gix-merge/tests/fixtures/generated-archives/make_blob_repo.tar differ diff --git a/gix-merge/tests/fixtures/generated-archives/text-baseline.tar b/gix-merge/tests/fixtures/generated-archives/text-baseline.tar new file mode 100644 index 00000000000..8bd4e8f2244 Binary files /dev/null and b/gix-merge/tests/fixtures/generated-archives/text-baseline.tar differ diff --git a/gix-merge/tests/fixtures/make_blob_repo.sh b/gix-merge/tests/fixtures/make_blob_repo.sh new file mode 100644 index 00000000000..8f4d23f38ec --- /dev/null +++ b/gix-merge/tests/fixtures/make_blob_repo.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +set -eu -o pipefail + +git init -q + +echo just-set > just-set +echo b > b +echo union > union +echo e > e-no-attr +echo unset > unset +echo unspecified > unspecified + +cat <.gitattributes +just-set merge +b merge=b +union merge=union +missing merge=missing +unset -merge +unspecified !merge +EOF + +git add . && git commit -m "init" diff --git a/gix-merge/tests/fixtures/text-baseline.sh b/gix-merge/tests/fixtures/text-baseline.sh new file mode 100644 index 00000000000..17d954aa278 --- /dev/null +++ b/gix-merge/tests/fixtures/text-baseline.sh @@ -0,0 +1,668 @@ +#!/usr/bin/env bash +set -eu -o pipefail + +git init +rm -Rf .git/hooks + +function baseline() { + local ours=$DIR/${1:?1: our file}.blob; + local base=$DIR/${2:?2: base file}.blob; + local theirs=$DIR/${3:?3: their file}.blob; + local output=$DIR/${4:?4: the name of the output file}.merged; + + shift 4 + git merge-file --stdout "$@" "$ours" "$base" "$theirs" > "$output" || true + + echo "$ours" "$base" "$theirs" "$output" "$@" >> baseline.cases +} + +mkdir simple +(cd simple + echo -e "line1-changed-by-both\nline2-to-be-changed-in-incoming" > ours.blob + echo -e "line1-to-be-changed-by-both\nline2-to-be-changed-in-incoming" > base.blob + echo -e "line1-changed-by-both\nline2-changed" > theirs.blob +) + +# one big change includes multiple smaller ones +mkdir multi-change +(cd multi-change + cat < base.blob +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +EOF + + cat < ours.blob +0 +1 +X +X +4 +5 +Y +Y +8 +Z +EOF + + cat < theirs.blob +T +T +T +T +T +T +T +T +T +T +EOF +) + +# a change with deletion/clearing our file +mkdir clear-ours +(cd clear-ours + cat < base.blob +0 +1 +2 +3 +4 +5 +EOF + + touch ours.blob + + cat < theirs.blob +T +T +T +T +T +EOF +) + +# a change with deletion/clearing their file +mkdir clear-theirs +(cd clear-theirs + cat < base.blob +0 +1 +2 +3 +4 +5 +EOF + + cat < ours.blob +O +O +O +O +O +EOF + + touch theirs.blob +) + +# differently sized changes +mkdir ours-2-lines-theirs-1-line +(cd ours-2-lines-theirs-1-line + cat < base.blob +0 +1 +2 +3 +4 +5 +EOF + + cat < ours.blob +0 +1 +X +X +4 +5 +EOF + + cat < theirs.blob +0 +1 +Y +3 +4 +5 +EOF +) + +# partial match +mkdir partial-match +(cd partial-match + cat < base.blob +0 +1 +2 +3 +4 +5 +EOF + + cat < ours.blob +0 +X1 +X2 +X3 +X4 +5 +EOF + + cat < theirs.blob +0 +X1 +2 +X3 +X4 +5 +EOF +) + +# based on 'unique merge base' from 'diff3-conflict-markers' +mkdir unique-merge-base-with-insertion +(cd unique-merge-base-with-insertion + cat < base.blob +1 +2 +3 +4 +5 +EOF + + # no trailing newline + echo -n $'1\n2\n3\n4\n5\n7' > ours.blob + echo -n $'1\n2\n3\n4\n5\nsix' > theirs.blob +) + +mkdir zdiff3-basic +(cd zdiff3-basic + cat < base.blob +1 +2 +3 +4 +5 +6 +7 +8 +9 +EOF + + cat < ours.blob +1 +2 +3 +4 +A +B +C +D +E +7 +8 +9 +EOF + + cat < theirs.blob +1 +2 +3 +4 +A +X +C +Y +E +7 +8 +9 +EOF +) + +mkdir zdiff3-middlecommon +(cd zdiff3-middlecommon + cat < base.blob +1 +2 +3 +AA +4 +5 +BB +6 +7 +8 +EOF + + cat < ours.blob +1 +2 +3 +CC +4 +5 +DD +6 +7 +8 +EOF + + cat < theirs.blob +1 +2 +3 +EE +4 +5 +FF +6 +7 +8 +EOF +) + + +mkdir zdiff3-interesting +(cd zdiff3-interesting + cat < base.blob +1 +2 +3 +4 +5 +6 +7 +8 +9 +EOF + + cat < ours.blob +1 +2 +3 +4 +A +B +C +D +E +F +G +H +I +J +7 +8 +9 +EOF + + cat < theirs.blob +1 +2 +3 +4 +A +B +C +5 +6 +G +H +I +J +7 +8 +9 +EOF +) + +mkdir zdiff3-evil +(cd zdiff3-evil + cat < base.blob +1 +2 +3 +4 +5 +6 +7 +8 +9 +EOF + + cat < ours.blob +1 +2 +3 +4 +X +A +B +C +7 +8 +9 +EOF + + cat < theirs.blob +1 +2 +3 +4 +Y +A +B +C +B +C +7 +8 +9 +EOF +) + +mkdir no-change-add +(cd no-change-add + + echo -e " line 1\n\n line 2" > base.blob + echo -e " line 1\n\n line in between\n\n line 2\n\n line in between\n\n line 3" > ours.blob + cp ours.blob theirs.blob +) + +mkdir no-change-remove +(cd no-change-remove + + echo -e " line 1\n\n line in between\n\n line 2\n\n line in between\n\n line 3" > base.blob + echo -e " line 1\n\n line 2" > ours.blob + cp ours.blob theirs.blob +) + +mkdir complex +(cd complex + cat <base.blob +Dominus regit me, +et nihil mihi deerit. +In loco pascuae ibi me collocavit, +super aquam refectionis educavit me; +animam meam convertit, +deduxit me super semitas jusitiae, +propter nomen suum. +EOF + + cat <new1.blob +Dominus regit me, +et nihil mihi deerit. +In loco pascuae ibi me collocavit, +super aquam refectionis educavit me; +animam meam convertit, +deduxit me super semitas jusitiae, +propter nomen suum. +Nam et si ambulavero in medio umbrae mortis, +non timebo mala, quoniam tu mecum es: +virga tua et baculus tuus ipsa me consolata sunt. +EOF + + cat <new2.blob +Dominus regit me, et nihil mihi deerit. +In loco pascuae ibi me collocavit, +super aquam refectionis educavit me; +animam meam convertit, +deduxit me super semitas jusitiae, +propter nomen suum. +EOF + + cat <new3.blob +DOMINUS regit me, +et nihil mihi deerit. +In loco pascuae ibi me collocavit, +super aquam refectionis educavit me; +animam meam convertit, +deduxit me super semitas jusitiae, +propter nomen suum. +EOF + + cat <new4.blob +Dominus regit me, et nihil mihi deerit. +In loco pascuae ibi me collocavit, +super aquam refectionis educavit me; +animam meam convertit, +deduxit me super semitas jusitiae, +EOF + + cat <new5.blob +Dominus regit me, +et nihil mihi deerit. +In loco pascuae ibi me collocavit, +super aquam refectionis educavit me; +animam meam convertit, +deduxit me super semitas jusitiae, +propter nomen suum. +Nam et si ambulavero in medio umbrae mortis, +non timebo mala, quoniam TU mecum es: +virga tua et baculus tuus ipsa me consolata sunt. +EOF + + echo -n "propter nomen suum." >>new4.blob + + cat <base.c +int f(int x, int y) +{ + if (x == 0) + { + return y; + } + return x; +} + +int g(size_t u) +{ + while (u < 30) + { + u++; + } + return u; +} +EOF + + cat <ours.c +int g(size_t u) +{ + while (u < 30) + { + u++; + } + return u; +} + +int h(int x, int y, int z) +{ + if (z == 0) + { + return x; + } + return y; +} +EOF + + cat <theirs.c +int f(int x, int y) +{ + if (x == 0) + { + return y; + } + return x; +} + +int g(size_t u) +{ + while (u > 34) + { + u--; + } + return u; +} +EOF + + mkdir no-change + (cd no-change + cp ../base.blob . + cp base.blob ours.blob + cp base.blob theirs.blob + ) + mkdir no-conflict + (cd no-conflict + cp ../new1.blob ours.blob + cp ../base.blob base.blob + cp ../new2.blob theirs.blob + ) + mkdir no-conflict-too + (cd no-conflict-too + cp ../base.blob ours.blob + cp ../base.blob base.blob + cp ../new2.blob theirs.blob + ) + mkdir they-changed + (cd they-changed + touch ours.blob base.blob + cp ../new2.blob theirs.blob + ) + mkdir missing-LF-at-EOF + (cd missing-LF-at-EOF + cp ../new1.blob ours.blob + cp ../base.blob base.blob + cp ../new4.blob theirs.blob + ) + mkdir missing-LF-at-EOF-no-conflict + (cd missing-LF-at-EOF-no-conflict + cp ../new4.blob ours.blob + cp ../new2.blob base.blob + cp ../new3.blob theirs.blob + ) + mkdir with-conflicts + (cd with-conflicts + cp ../new1.blob ours.blob + cp ../base.blob base.blob + cp ../new3.blob theirs.blob + ) + mkdir with-conflicts-in-removed-tail + (cd with-conflicts-in-removed-tail + cp ../base.blob ours.blob + cp ../new1.blob base.blob + cp ../new5.blob theirs.blob + ) + mkdir auto-simplification + (cd auto-simplification + sed -e "s/deerit.\$/deerit;/" -e "s/me;\$/me./" <../new5.blob >ours.blob + cp ../new5.blob base.blob + sed -e "s/deerit.\$/deerit,/" -e "s/me;\$/me,/" <../new5.blob >theirs.blob + ) + mkdir auto-simplification2 + (cd auto-simplification2 + sed -e "s/deerit./&%%%%/" -e "s/locavit,/locavit;/" <../auto-simplification/ours.blob | tr % "\012" >ours.blob + cp ../new5.blob base.blob + sed -e "s/deerit./&%%%%/" -e "s/locavit,/locavit --/" <../auto-simplification/theirs.blob | tr % "\012" >theirs.blob + ) + mkdir conflict-without-LF + (cd conflict-without-LF + printf "line1\nline2\nline3" >base.blob + printf "line1\nline2\nline3x" >ours.blob + printf "line1\nline2\nline3y" >theirs.blob + ) + + mkdir marker-newline-handling-crlf + (cd marker-newline-handling-crlf + printf "1\\r\\n2\\r\\n3" >base.blob + printf "1\\r\\n2\\r\\n4" >ours.blob + printf "1\\r\\n2\\r\\n5" >theirs.blob + ) + + mkdir marker-newline-handling-lf + (cd marker-newline-handling-lf + printf "1\\r\\n2\\n3" >base.blob + printf "1\\r\\n2\\n4" >ours.blob + printf "1\\r\\n2\\n5" >theirs.blob + ) + + mkdir marker-newline-handling-lf2 + (cd marker-newline-handling-lf2 + printf "1\\r\\n2\\r\\n3" >base.blob + printf "1\\r\\n2\\n4" >ours.blob + printf "1\\r\\n2\\n5" >theirs.blob + ) + + mkdir spurious-c-conflicts + (cd spurious-c-conflicts + cp ../base.c base.blob + cp ../ours.c ours.blob + cp ../theirs.c theirs.blob + ) +) + +mkdir line-ending-change +(cd line-ending-change + + echo -e "a\n" > base.blob + echo -e "a\r\n" > ours.blob + echo -e "a\n" > theirs.blob +) + + +for dir in simple \ + multi-change \ + clear-ours \ + clear-theirs \ + ours-2-lines-theirs-1-line \ + partial-match \ + unique-merge-base-with-insertion \ + zdiff3-basic \ + zdiff3-middlecommon \ + zdiff3-interesting \ + zdiff3-evil \ + no-change-add \ + no-change-remove \ + line-ending-change \ + complex/no-change \ + complex/no-conflict \ + complex/no-conflict-too \ + complex/they-changed \ + complex/missing-LF-at-EOF \ + complex/missing-LF-at-EOF-no-conflict \ + complex/with-conflicts \ + complex/with-conflicts-in-removed-tail \ + complex/auto-simplification \ + complex/auto-simplification2 \ + complex/conflict-without-LF \ + complex/marker-newline-handling-crlf \ + complex/marker-newline-handling-lf \ + complex/marker-newline-handling-lf2 \ + complex/spurious-c-conflicts; do + DIR=$dir + baseline ours base theirs merge + baseline ours base theirs diff3 --diff3 + baseline ours base theirs zdiff3 --zdiff3 + baseline ours base theirs merge-ours --ours + baseline ours base theirs merge-theirs --theirs + baseline ours base theirs merge-union --union + ( + export GIT_CONFIG_COUNT=1 GIT_CONFIG_KEY_0=diff.algorithm GIT_CONFIG_KEY_VALUE=0=histogram + baseline ours base theirs diff3-histogram --diff3 + baseline ours base theirs zdiff3-histogram --zdiff3 + ) +done \ No newline at end of file diff --git a/gix-merge/tests/merge/blob/builtin_driver.rs b/gix-merge/tests/merge/blob/builtin_driver.rs new file mode 100644 index 00000000000..b0d7afa8f85 --- /dev/null +++ b/gix-merge/tests/merge/blob/builtin_driver.rs @@ -0,0 +1,228 @@ +use gix_merge::blob::builtin_driver::binary::{Pick, ResolveWith}; +use gix_merge::blob::{builtin_driver, Resolution}; + +#[test] +fn binary() { + assert_eq!( + builtin_driver::binary(None), + (Pick::Ours, Resolution::Conflict), + "by default it picks ours and marks it as conflict" + ); + assert_eq!( + builtin_driver::binary(Some(ResolveWith::Ancestor)), + (Pick::Ancestor, Resolution::Complete), + "Otherwise we can pick anything and it will mark it as complete" + ); + assert_eq!( + builtin_driver::binary(Some(ResolveWith::Ours)), + (Pick::Ours, Resolution::Complete) + ); + assert_eq!( + builtin_driver::binary(Some(ResolveWith::Theirs)), + (Pick::Theirs, Resolution::Complete) + ); +} + +mod text { + use bstr::ByteSlice; + use gix_merge::blob::Resolution; + use pretty_assertions::assert_str_eq; + + const DIVERGING: &[&str] = &[ + // Somehow, on in zdiff mode, it's different, and I wasn't able to figure out the rule properly. + // Now we prefer ancestor/before newlines and somewhat ignore our hunks. It's probably a minor issue in practice. + // gix: "1\r\n2\n<<<<<<< complex/marker-newline-handling-lf2/ours.blob\n4\r\n||||||| complex/marker-newline-handling-lf2/base.blob\r\n2\r\n3\n=======\n5\n>>>>>>> complex/marker-newline-handling-lf2/theirs.blob\n" + // git: "1\r\n2\n<<<<<<< complex/marker-newline-handling-lf2/ours.blob\n4 \n||||||| complex/marker-newline-handling-lf2/base.blob \n2\r\n3\n=======\n5\n>>>>>>> complex/marker-newline-handling-lf2/theirs.blob\n" + "complex/marker-newline-handling-lf2/zdiff3.merged", + "complex/marker-newline-handling-lf2/zdiff3-histogram.merged", + // This is related to Git seemingly extending a hunk to increase overlap (see diff3) + "zdiff3-interesting/merge.merged", + "zdiff3-interesting/merge-ours.merged", + "zdiff3-interesting/diff3.merged", + "zdiff3-interesting/diff3-histogram.merged", + "zdiff3-interesting/zdiff3.merged", + "zdiff3-interesting/zdiff3-histogram.merged", + "zdiff3-interesting/merge-union.merged", + // Git can extend hunks, similar to above, but the effect is not as noticeable. + // Implementing this would be interesting, to figure out when the hunk processing should apply. + "zdiff3-evil/merge.merged", + "zdiff3-evil/merge-union.merged", + // Git seems to merge to hunks if they are close together to get a less noisy diff. + "zdiff3-middlecommon/merge.merged", + "zdiff3-middlecommon/merge-union.merged", + // Git has special character handling, which does magic to prevent conflicts + "complex/auto-simplification/merge.merged", + "complex/auto-simplification/merge-union.merged", + // Git has special newline handling when diffing, + // which auto-inserts a newline when it was removed, kind of. + "complex/missing-LF-at-EOF/merge.merged", + "complex/missing-LF-at-EOF/diff3.merged", + "complex/missing-LF-at-EOF/diff3-histogram.merged", + "complex/missing-LF-at-EOF/zdiff3.merged", + "complex/missing-LF-at-EOF/zdiff3-histogram.merged", + "complex/missing-LF-at-EOF/merge-ours.merged", + "complex/missing-LF-at-EOF/merge-theirs.merged", + "complex/missing-LF-at-EOF/merge-union.merged", + // Git has different diff-slider-heuristics so diffs can be different. + // See https://github.com/mhagger/diff-slider-tools. + "complex/spurious-c-conflicts/merge.merged", + "complex/spurious-c-conflicts/merge-union.merged", + "complex/spurious-c-conflicts/diff3-histogram.merged", + "complex/spurious-c-conflicts/zdiff3-histogram.merged", + ]; + + // TODO: fix all of these eventually + fn is_case_diverging(case: &baseline::Expectation) -> bool { + DIVERGING.iter().any(|name| case.name == *name) + } + + #[test] + fn run_baseline() -> crate::Result { + let root = gix_testtools::scripted_fixture_read_only("text-baseline.sh")?; + let cases = std::fs::read_to_string(root.join("baseline.cases"))?; + let mut out = Vec::new(); + let mut num_diverging = 0; + let mut num_cases = 0; + for case in baseline::Expectations::new(&root, &cases) { + num_cases += 1; + let mut input = imara_diff::intern::InternedInput::default(); + let actual = gix_merge::blob::builtin_driver::text( + &mut out, + &mut input, + case.labels(), + &case.ours, + &case.base, + &case.theirs, + case.options, + ); + if is_case_diverging(&case) { + num_diverging += 1; + } else { + let expected_resolution = if case.expected.contains_str("<<<<<<<") { + Resolution::Conflict + } else { + Resolution::Complete + }; + assert_eq!(out.as_bstr(), case.expected); + assert_str_eq!( + out.as_bstr().to_str_lossy(), + case.expected.to_str_lossy(), + "{}: output mismatch\n{}", + case.name, + out.as_bstr() + ); + assert_eq!(actual, expected_resolution, "{}: resolution mismatch", case.name,); + } + } + + assert_eq!( + num_diverging, + DIVERGING.len(), + "Number of expected diverging cases must match the actual one - probably the implementation improved" + ); + assert_eq!( + ((num_diverging as f32 / num_cases as f32) * 100.0) as usize, + 11, + "Just to show the percentage of skipped tests - this should get better" + ); + Ok(()) + } + + mod baseline { + use bstr::BString; + use gix_merge::blob::builtin_driver::text::{Conflict, ConflictStyle}; + use std::path::Path; + + #[derive(Debug)] + pub struct Expectation { + pub ours: BString, + pub ours_marker: String, + pub theirs: BString, + pub theirs_marker: String, + pub base: BString, + pub base_marker: String, + pub name: BString, + pub expected: BString, + pub options: gix_merge::blob::builtin_driver::text::Options, + } + + impl Expectation { + pub fn labels(&self) -> gix_merge::blob::builtin_driver::text::Labels<'_> { + gix_merge::blob::builtin_driver::text::Labels { + ancestor: Some(self.base_marker.as_str().as_ref()), + current: Some(self.ours_marker.as_str().as_ref()), + other: Some(self.theirs_marker.as_str().as_ref()), + } + } + } + + pub struct Expectations<'a> { + root: &'a Path, + lines: std::str::Lines<'a>, + } + + impl<'a> Expectations<'a> { + pub fn new(root: &'a Path, cases: &'a str) -> Self { + Expectations { + root, + lines: cases.lines(), + } + } + } + + impl Iterator for Expectations<'_> { + type Item = Expectation; + + fn next(&mut self) -> Option { + let line = self.lines.next()?; + let mut words = line.split(' '); + let (Some(ours), Some(base), Some(theirs), Some(output)) = + (words.next(), words.next(), words.next(), words.next()) + else { + panic!("need at least the input and output") + }; + + let read = |rela_path: &str| read_blob(self.root, rela_path); + + let mut options = gix_merge::blob::builtin_driver::text::Options::default(); + for arg in words { + options.conflict = match arg { + "--diff3" => Conflict::Keep { + style: ConflictStyle::Diff3, + marker_size: 7, + }, + "--zdiff3" => Conflict::Keep { + style: ConflictStyle::ZealousDiff3, + marker_size: 7, + }, + "--ours" => Conflict::ResolveWithOurs, + "--theirs" => Conflict::ResolveWithTheirs, + "--union" => Conflict::ResolveWithUnion, + _ => panic!("Unknown argument to parse into options: '{arg}'"), + } + } + if output.contains("histogram") { + options.diff_algorithm = imara_diff::Algorithm::Histogram; + } + + Some(Expectation { + ours: read(ours), + ours_marker: ours.into(), + theirs: read(theirs), + theirs_marker: theirs.into(), + base: read(base), + base_marker: base.into(), + expected: read(output), + name: output.into(), + options, + }) + } + } + + fn read_blob(root: &Path, rela_path: &str) -> BString { + std::fs::read(root.join(rela_path)) + .unwrap_or_else(|_| panic!("Failed to read '{rela_path}' in '{}'", root.display())) + .into() + } + } +} diff --git a/gix-merge/tests/merge/blob/mod.rs b/gix-merge/tests/merge/blob/mod.rs new file mode 100644 index 00000000000..57d9205d79a --- /dev/null +++ b/gix-merge/tests/merge/blob/mod.rs @@ -0,0 +1,52 @@ +mod builtin_driver; +mod pipeline; +mod platform; + +mod util { + use std::collections::HashMap; + + use gix_hash::oid; + use gix_object::{bstr::BString, find::Error}; + + #[derive(Default)] + pub struct ObjectDb { + data_by_id: HashMap, + } + + impl gix_object::FindHeader for ObjectDb { + fn try_header(&self, id: &oid) -> Result, Error> { + match self.data_by_id.get(&id.to_owned()) { + Some(data) => Ok(Some(gix_object::Header { + kind: gix_object::Kind::Blob, + size: data.len() as u64, + })), + None => Ok(None), + } + } + } + + impl gix_object::Find for ObjectDb { + fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec) -> Result>, Error> { + match self.data_by_id.get(&id.to_owned()) { + Some(data) => { + buffer.clear(); + buffer.extend_from_slice(data); + Ok(Some(gix_object::Data { + kind: gix_object::Kind::Blob, + data: buffer.as_slice(), + })) + } + None => Ok(None), + } + } + } + + impl ObjectDb { + /// Insert `data` and return its hash. That can be used to find it again. + pub fn insert(&mut self, data: &str) -> gix_hash::ObjectId { + let id = gix_object::compute_hash(gix_hash::Kind::Sha1, gix_object::Kind::Blob, data.as_bytes()); + self.data_by_id.insert(id, data.into()); + id + } + } +} diff --git a/gix-merge/tests/merge/blob/pipeline.rs b/gix-merge/tests/merge/blob/pipeline.rs new file mode 100644 index 00000000000..080a9d601f6 --- /dev/null +++ b/gix-merge/tests/merge/blob/pipeline.rs @@ -0,0 +1,433 @@ +use crate::blob::util::ObjectDb; +use bstr::ByteSlice; +use gix_filter::eol; +use gix_filter::eol::AutoCrlf; +use gix_merge::blob::pipeline::{self, Mode, WorktreeRoots}; +use gix_merge::blob::{Pipeline, ResourceKind}; +use gix_object::tree::EntryKind; + +const ALL_MODES: [pipeline::Mode; 2] = [pipeline::Mode::ToGit, pipeline::Mode::Renormalize]; + +#[test] +fn without_transformation() -> crate::Result { + for mode in ALL_MODES { + let tmp = gix_testtools::tempfile::TempDir::new()?; + let mut filter = Pipeline::new( + WorktreeRoots { + common_ancestor_root: Some(tmp.path().to_owned()), + ..Default::default() + }, + gix_filter::Pipeline::default(), + default_options(), + ); + + let does_not_matter = gix_hash::Kind::Sha1.null(); + let mut buf = Vec::new(); + let a_name = "a"; + let a_content = "a-content"; + std::fs::write(tmp.path().join(a_name), a_content.as_bytes())?; + let out = filter.convert_to_mergeable( + &does_not_matter, + EntryKind::Blob, + a_name.into(), + ResourceKind::CommonAncestorOrBase, + &mut |_, _| {}, + &gix_object::find::Never, + mode, + &mut buf, + )?; + assert_eq!(out, Some(pipeline::Data::Buffer)); + assert_eq!(buf.as_bstr(), a_content, "there is no transformations configured"); + + let link_name = "link"; + gix_fs::symlink::create(a_name.as_ref(), &tmp.path().join(link_name))?; + let err = filter + .convert_to_mergeable( + &does_not_matter, + EntryKind::Link, + link_name.into(), + ResourceKind::CommonAncestorOrBase, + &mut |_, _| {}, + &gix_object::find::Never, + mode, + &mut buf, + ) + .unwrap_err(); + + assert!( + matches!(err, pipeline::convert_to_mergeable::Error::InvalidEntryKind {rela_path,actual} + if rela_path == link_name && actual == EntryKind::Link) + ); + assert_eq!( + buf.len(), + 9, + "input buffers are cleared only if we think they are going to be used" + ); + drop(tmp); + + let mut db = ObjectDb::default(); + let b_content = "b-content"; + let id = db.insert(b_content); + + let out = filter.convert_to_mergeable( + &id, + EntryKind::Blob, + a_name.into(), + ResourceKind::CurrentOrOurs, + &mut |_, _| {}, + &db, + mode, + &mut buf, + )?; + + assert_eq!(out, Some(pipeline::Data::Buffer)); + assert_eq!( + buf.as_bstr(), + b_content, + "there is no transformations configured, it fetched the data from the ODB" + ); + + let out = filter.convert_to_mergeable( + &does_not_matter, + EntryKind::Blob, + a_name.into(), + ResourceKind::OtherOrTheirs, + &mut |_, _| {}, + &gix_object::find::Never, + mode, + &mut buf, + )?; + assert_eq!(out, None, "the lack of object in the database isn't a problem"); + + let out = filter.convert_to_mergeable( + &does_not_matter, + EntryKind::Blob, + "does not exist on disk".into(), + ResourceKind::CommonAncestorOrBase, + &mut |_, _| {}, + &gix_object::find::Never, + mode, + &mut buf, + )?; + assert_eq!(out, None, "the lack of file on disk is fine as well"); + } + + Ok(()) +} + +#[test] +fn binary_below_large_file_threshold() -> crate::Result { + let tmp = gix_testtools::tempfile::TempDir::new()?; + let mut filter = Pipeline::new( + WorktreeRoots { + current_root: Some(tmp.path().to_owned()), + ..Default::default() + }, + gix_filter::Pipeline::default(), + pipeline::Options { + large_file_threshold_bytes: 5, + }, + ); + + let does_not_matter = gix_hash::Kind::Sha1.null(); + let mut buf = Vec::new(); + let a_name = "a"; + let binary_content = "a\0b"; + std::fs::write(tmp.path().join(a_name), binary_content.as_bytes())?; + let out = filter.convert_to_mergeable( + &does_not_matter, + EntryKind::BlobExecutable, + a_name.into(), + ResourceKind::CurrentOrOurs, + &mut |_, _| {}, + &gix_object::find::Never, + pipeline::Mode::ToGit, + &mut buf, + )?; + assert_eq!(out, Some(pipeline::Data::Buffer), "binary data can still be merged"); + assert_eq!(buf.as_bstr(), binary_content); + + let mut db = ObjectDb::default(); + let id = db.insert(binary_content); + let out = filter.convert_to_mergeable( + &id, + EntryKind::Blob, + a_name.into(), + ResourceKind::OtherOrTheirs, + &mut |_, _| {}, + &db, + pipeline::Mode::ToGit, + &mut buf, + )?; + assert_eq!(out, Some(pipeline::Data::Buffer)); + assert_eq!(buf.as_bstr(), binary_content); + + Ok(()) +} + +#[test] +fn above_large_file_threshold() -> crate::Result { + let tmp = gix_testtools::tempfile::TempDir::new()?; + let mut filter = gix_merge::blob::Pipeline::new( + WorktreeRoots { + current_root: Some(tmp.path().to_owned()), + ..Default::default() + }, + gix_filter::Pipeline::default(), + pipeline::Options { + large_file_threshold_bytes: 4, + }, + ); + + let does_not_matter = gix_hash::Kind::Sha1.null(); + let mut buf = Vec::new(); + let a_name = "a"; + let large_content = "hello"; + std::fs::write(tmp.path().join(a_name), large_content.as_bytes())?; + let out = filter.convert_to_mergeable( + &does_not_matter, + EntryKind::BlobExecutable, + a_name.into(), + ResourceKind::CurrentOrOurs, + &mut |_, _| {}, + &gix_object::find::Never, + pipeline::Mode::ToGit, + &mut buf, + )?; + assert_eq!( + out, + Some(pipeline::Data::TooLarge { size: 5 }), + "it indicates that the file is too large" + ); + assert_eq!(buf.len(), 0, "it should avoid querying that data in the first place"); + + drop(tmp); + let mut db = ObjectDb::default(); + let id = db.insert(large_content); + + let out = filter.convert_to_mergeable( + &id, + EntryKind::Blob, + a_name.into(), + ResourceKind::CommonAncestorOrBase, + &mut |_, _| {}, + &db, + pipeline::Mode::ToGit, + &mut buf, + )?; + + assert_eq!(out, Some(pipeline::Data::TooLarge { size: 5 })); + assert_eq!( + buf.len(), + 0, + "it won't have queried the blob, first it checks the header" + ); + + Ok(()) +} + +#[test] +fn non_existing() -> crate::Result { + let tmp = gix_testtools::tempfile::TempDir::new()?; + let mut filter = Pipeline::new( + WorktreeRoots { + common_ancestor_root: Some(tmp.path().to_owned()), + ..Default::default() + }, + gix_filter::Pipeline::default(), + default_options(), + ); + + let null = gix_hash::Kind::Sha1.null(); + let mut buf = vec![1]; + let a_name = "a"; + assert!( + !tmp.path().join(a_name).exists(), + "precondition: worktree file doesn't exist" + ); + let out = filter.convert_to_mergeable( + &null, + EntryKind::Blob, + a_name.into(), + ResourceKind::CommonAncestorOrBase, + &mut |_, _| {}, + &gix_object::find::Never, + pipeline::Mode::ToGit, + &mut buf, + )?; + assert_eq!( + out, None, + "it's OK for a resource to not exist on disk - they'd then count as deleted" + ); + assert_eq!(buf.len(), 0, "always cleared"); + + drop(tmp); + + buf.push(1); + let out = filter.convert_to_mergeable( + &null, + EntryKind::Blob, + a_name.into(), + ResourceKind::OtherOrTheirs, + &mut |_, _| {}, + &gix_object::find::Never, + pipeline::Mode::ToGit, + &mut buf, + )?; + + assert_eq!( + out, None, + "the root path isn't configured and the object database returns nothing" + ); + assert_eq!(buf.len(), 0, "it's always cleared before any potential use"); + + let some_id = gix_hash::ObjectId::from_hex(b"45c160c35c17ad264b96431cceb9793160396e99")?; + let err = filter + .convert_to_mergeable( + &some_id, + EntryKind::Blob, + a_name.into(), + ResourceKind::OtherOrTheirs, + &mut |_, _| {}, + &gix_object::find::Never, + pipeline::Mode::ToGit, + &mut buf, + ) + .unwrap_err(); + assert!( + matches!( + err, + gix_merge::blob::pipeline::convert_to_mergeable::Error::FindObject( + gix_object::find::existing_object::Error::NotFound { .. } + ), + ), + "missing object database ids are always an error (even though missing objects on disk are allowed)" + ); + Ok(()) +} + +#[test] +fn worktree_filter() -> crate::Result { + let tmp = gix_testtools::tempfile::TempDir::new()?; + let filter = gix_filter::Pipeline::new( + Default::default(), + gix_filter::pipeline::Options { + eol_config: eol::Configuration { + auto_crlf: AutoCrlf::Enabled, + ..Default::default() + }, + ..Default::default() + }, + ); + let mut filter = gix_merge::blob::Pipeline::new( + WorktreeRoots { + common_ancestor_root: Some(tmp.path().to_owned()), + ..Default::default() + }, + filter, + default_options(), + ); + + let mut db = ObjectDb::default(); + let a_name = "a"; + let mut buf = Vec::new(); + let a_content = "a-content\r\n"; + std::fs::write(tmp.path().join(a_name), a_content.as_bytes())?; + for mode in ALL_MODES { + let does_not_matter = gix_hash::Kind::Sha1.null(); + let out = filter.convert_to_mergeable( + &does_not_matter, + EntryKind::Blob, + a_name.into(), + ResourceKind::CommonAncestorOrBase, + &mut |_, _| {}, + &gix_object::find::Never, + mode, + &mut buf, + )?; + assert_eq!(out, Some(pipeline::Data::Buffer)); + assert_eq!( + buf.as_bstr(), + "a-content\n", + "worktree files need to be converted back to what's stored in Git" + ); + + let id = db.insert(a_content); + let out = filter.convert_to_mergeable( + &id, + EntryKind::Blob, + a_name.into(), + ResourceKind::CommonAncestorOrBase, + &mut |_, _| {}, + &db, + mode, + &mut buf, + )?; + assert_eq!(out, Some(pipeline::Data::Buffer)); + match mode { + Mode::ToGit => { + assert_eq!( + buf.as_bstr(), + "a-content\r\n", + "if an object with CRLF already exists, we don't 'renormalize' it, it's a feature" + ); + } + Mode::Renormalize => { + assert_eq!( + buf.as_bstr(), + "a-content\n", + "we can also do it if the file exists both on disk and is known to the ODB" + ); + } + } + } + + drop(tmp); + + let b_content = "b-content\n"; + let id = db.insert(b_content); + + let out = filter.convert_to_mergeable( + &id, + EntryKind::Blob, + a_name.into(), + ResourceKind::CurrentOrOurs, + &mut |_, _| {}, + &db, + pipeline::Mode::ToGit, + &mut buf, + )?; + + assert_eq!(out, Some(pipeline::Data::Buffer)); + assert_eq!(buf.as_bstr(), b_content, "no work is done for what's already in Git"); + + let mut db = ObjectDb::default(); + let b_content = "b-content\r\n"; + let id = db.insert(b_content); + let out = filter.convert_to_mergeable( + &id, + EntryKind::Blob, + a_name.into(), + ResourceKind::OtherOrTheirs, + &mut |_, _| {}, + &db, + pipeline::Mode::Renormalize, + &mut buf, + )?; + + assert_eq!(out, Some(pipeline::Data::Buffer)); + assert_eq!( + buf.as_bstr(), + "b-content\n", + "we see what would have been stored if the file was checked out and checked in again.\ + It explicitly ignores what's in Git already (or it wouldn't do anyting)" + ); + + Ok(()) +} + +fn default_options() -> pipeline::Options { + pipeline::Options { + large_file_threshold_bytes: 0, + } +} diff --git a/gix-merge/tests/merge/blob/platform.rs b/gix-merge/tests/merge/blob/platform.rs new file mode 100644 index 00000000000..d03a3ddc960 --- /dev/null +++ b/gix-merge/tests/merge/blob/platform.rs @@ -0,0 +1,661 @@ +use gix_worktree::stack::state::attributes; + +use gix_merge::blob::Platform; + +mod merge { + use crate::blob::platform::new_platform; + use crate::blob::util::ObjectDb; + use bstr::{BStr, ByteSlice}; + use gix_merge::blob::builtin_driver::text::ConflictStyle; + use gix_merge::blob::platform::builtin_merge::Pick; + use gix_merge::blob::platform::DriverChoice; + use gix_merge::blob::{builtin_driver, pipeline, platform, BuiltinDriver, Resolution, ResourceKind}; + use gix_object::tree::EntryKind; + use std::process::Stdio; + + #[test] + fn builtin_text_uses_binary_if_needed() -> crate::Result { + let mut platform = new_platform(None, pipeline::Mode::ToGit); + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "a".into(), + ResourceKind::CommonAncestorOrBase, + &gix_object::find::Never, + )?; + + let mut db = ObjectDb::default(); + for (content, kind) in [ + ("ours", ResourceKind::CurrentOrOurs), + ("theirs\0", ResourceKind::OtherOrTheirs), + ] { + let id = db.insert(content); + platform.set_resource( + id, + EntryKind::Blob, + "path matters only for attribute lookup".into(), + kind, + &db, + )?; + } + let mut platform_ref = platform.prepare_merge(&db, Default::default())?; + assert_eq!( + platform_ref.driver, + DriverChoice::BuiltIn(BuiltinDriver::Text), + "it starts out at the default text driver" + ); + + let mut buf = Vec::new(); + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!( + res, + (Pick::Ours, Resolution::Conflict), + "it detected the binary buffer, ran the binary merge with default conflict resolution" + ); + + platform_ref.options.resolve_binary_with = Some(builtin_driver::binary::ResolveWith::Theirs); + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!( + res, + (Pick::Theirs, Resolution::Complete), + "the auto-binary driver respects its own options" + ); + Ok(()) + } + + #[test] + fn builtin_with_conflict() -> crate::Result { + let mut platform = new_platform(None, pipeline::Mode::ToGit); + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "b".into(), + ResourceKind::CommonAncestorOrBase, + &gix_object::find::Never, + )?; + + let mut db = ObjectDb::default(); + for (content, kind) in [ + ("ours", ResourceKind::CurrentOrOurs), + ("theirs", ResourceKind::OtherOrTheirs), + ] { + let id = db.insert(content); + platform.set_resource(id, EntryKind::Blob, "b".into(), kind, &db)?; + } + + let mut platform_ref = platform.prepare_merge(&db, Default::default())?; + assert_eq!(platform_ref.driver, DriverChoice::BuiltIn(BuiltinDriver::Text)); + let mut buf = Vec::new(); + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!(res, (Pick::Buffer, Resolution::Conflict)); + assert_eq!( + buf.as_bstr(), + r#"<<<<<<< current label +ours +======= +theirs +>>>>>>> other label +"#, + "default options apply, hence the 'merge' style conflict" + ); + platform_ref.options.text.conflict = builtin_driver::text::Conflict::Keep { + style: ConflictStyle::Diff3, + marker_size: 3, + }; + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!(res, (Pick::Buffer, Resolution::Conflict)); + + assert_eq!( + buf.as_bstr(), + r#"<<< current label +ours +||| ancestor label +b +=== +theirs +>>> other label +"#, + "options apply correctly" + ); + + platform_ref.options.text.conflict = builtin_driver::text::Conflict::ResolveWithOurs; + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!( + res, + (Pick::Buffer, Resolution::Complete), + "it's actually unclear now if there ever was a conflict, but we *could* compute it" + ); + assert_eq!(buf.as_bstr(), "ours"); + + platform_ref.options.text.conflict = builtin_driver::text::Conflict::ResolveWithTheirs; + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!(res, (Pick::Buffer, Resolution::Complete)); + assert_eq!(buf.as_bstr(), "theirs"); + + platform_ref.options.text.conflict = builtin_driver::text::Conflict::ResolveWithUnion; + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!(res, (Pick::Buffer, Resolution::Complete)); + assert_eq!(buf.as_bstr(), "ours\ntheirs"); + + platform_ref.driver = DriverChoice::BuiltIn(BuiltinDriver::Union); + platform_ref.options.text.conflict = builtin_driver::text::Conflict::default(); + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!(res, (Pick::Buffer, Resolution::Complete)); + assert_eq!(buf.as_bstr(), "ours\ntheirs"); + + platform_ref.driver = DriverChoice::BuiltIn(BuiltinDriver::Binary); + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!( + res, + (Pick::Ours, Resolution::Conflict), + "binary merges choose ours but conflict by default" + ); + assert!(buf.is_empty(), "it tells us where to get the content from"); + assert_eq!( + platform_ref.buffer_by_pick(res.0).unwrap().as_bstr(), + "ours", + "getting access to the content is simplified" + ); + + for (expected, expected_pick, resolve) in [ + ("ours", Pick::Ours, builtin_driver::binary::ResolveWith::Ours), + ("theirs", Pick::Theirs, builtin_driver::binary::ResolveWith::Theirs), + ("b\n", Pick::Ancestor, builtin_driver::binary::ResolveWith::Ancestor), + ] { + platform_ref.options.resolve_binary_with = Some(resolve); + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!(res, (expected_pick, Resolution::Complete)); + assert_eq!(platform_ref.buffer_by_pick(res.0).unwrap().as_bstr(), expected); + } + + Ok(()) + } + + #[test] + fn with_external() -> crate::Result { + let mut platform = new_platform( + [gix_merge::blob::Driver { + name: "b".into(), + command: + "for arg in %O %A %B %L %P %S %X %Y %F; do echo $arg >> \"%A\"; done; cat \"%O\" \"%B\" >> \"%A\"" + .into(), + ..Default::default() + }], + pipeline::Mode::ToGit, + ); + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "b".into(), + ResourceKind::CommonAncestorOrBase, + &gix_object::find::Never, + )?; + + let mut db = ObjectDb::default(); + for (content, kind) in [ + ("ours", ResourceKind::CurrentOrOurs), + ("theirs", ResourceKind::OtherOrTheirs), + ] { + let id = db.insert(content); + platform.set_resource(id, EntryKind::Blob, "b".into(), kind, &db)?; + } + + let platform_ref = platform.prepare_merge(&db, Default::default())?; + let mut buf = Vec::new(); + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!(res, (Pick::Buffer, Resolution::Complete), "merge drivers always merge "); + let mut lines = cleaned_driver_lines(&buf)?; + for tmp_file in lines.by_ref().take(3) { + assert!(tmp_file.contains_str(&b".tmp"[..]), "{tmp_file}"); + } + + let lines: Vec<_> = lines.collect(); + assert_eq!( + lines, + [ + "7", + "b", + "ancestor label", + "current label", + "other label", + "%F", + "b", + "theirs" + ], + "we handle word-splitting and definitely pick-up what's written into the %A buffer" + ); + + let id = db.insert("binary\0"); + platform.set_resource(id, EntryKind::Blob, "b".into(), ResourceKind::OtherOrTheirs, &db)?; + let platform_ref = platform.prepare_merge(&db, Default::default())?; + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!( + res, + (Pick::Buffer, Resolution::Complete), + "merge drivers deal with binary themselves" + ); + let mut lines = cleaned_driver_lines(&buf)?; + for tmp_file in lines.by_ref().take(3) { + assert!(tmp_file.contains_str(&b".tmp"[..]), "{tmp_file}"); + } + let lines: Vec<_> = lines.collect(); + assert_eq!( + lines, + [ + "7", + "b", + "ancestor label", + "current label", + "other label", + "%F", + "b", + "binary\0" + ], + "in this case, the binary lines are just taken verbatim" + ); + + Ok(()) + } + + #[test] + fn missing_buffers_are_empty_buffers() -> crate::Result { + let mut platform = new_platform(None, pipeline::Mode::ToGit); + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "just-set".into(), + ResourceKind::CommonAncestorOrBase, + &gix_object::find::Never, + )?; + + // Two deletions + for kind in [ResourceKind::CurrentOrOurs, ResourceKind::OtherOrTheirs] { + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "does not matter for driver".into(), + kind, + &gix_object::find::Never, + )?; + } + + let platform_ref = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + + let mut buf = Vec::new(); + let res = platform_ref.merge(&mut buf, Default::default(), Default::default())?; + assert_eq!( + res, + (Pick::Buffer, Resolution::Complete), + "both versions are deleted, an actual merge happened" + ); + assert!( + buf.is_empty(), + "the new buffer is considered empty, both sides were deleted, too" + ); + + let mut input = imara_diff::intern::InternedInput::new(&[][..], &[]); + let res = platform_ref.builtin_merge(BuiltinDriver::Text, &mut buf, &mut input, Default::default()); + assert_eq!( + res, + Some((Pick::Buffer, Resolution::Complete)), + "both versions are deleted" + ); + assert!(buf.is_empty(), "the result is the same on direct invocation"); + + let print_all = "for arg in $@ %O %A %B %L %P %S %X %Y %F; do echo $arg; done"; + let mut cmd = platform_ref.prepare_external_driver(print_all.into(), default_labels(), Default::default())?; + let stdout = cmd.stdout(Stdio::piped()).output()?.stdout; + let mut lines = cleaned_driver_lines(&stdout)?; + for tmp_file in lines.by_ref().take(3) { + assert!(tmp_file.contains_str(&b".tmp"[..]), "{tmp_file}"); + } + let lines: Vec<_> = lines.collect(); + assert_eq!( + lines, + [ + "7", + "does not matter for driver", + "ancestor label", + "current label", + "other label", + "%F" + ], + "word splitting is prevented thanks to proper quoting" + ); + Ok(()) + } + + #[test] + fn one_buffer_too_large() -> crate::Result { + let mut platform = new_platform(None, pipeline::Mode::ToGit); + platform.filter.options.large_file_threshold_bytes = 9; + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "just-set".into(), + ResourceKind::CommonAncestorOrBase, + &gix_object::find::Never, + )?; + platform.filter.roots.other_root = platform.filter.roots.common_ancestor_root.clone(); + platform.filter.roots.current_root = platform.filter.roots.common_ancestor_root.clone(); + + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "b".into(), + ResourceKind::CurrentOrOurs, + &gix_object::find::Never, + )?; + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "unspecified".into(), + ResourceKind::OtherOrTheirs, + &gix_object::find::Never, + )?; + + let platform_ref = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + assert_eq!(platform_ref.other.data, platform::resource::Data::TooLarge { size: 12 }); + + let mut out = Vec::new(); + let err = platform_ref + .merge(&mut out, Default::default(), Default::default()) + .unwrap_err(); + assert!(matches!(err, platform::merge::Error::ResourceTooLarge)); + + let mut input = imara_diff::intern::InternedInput::new(&[][..], &[]); + assert_eq!( + platform_ref.builtin_merge(BuiltinDriver::Text, &mut out, &mut input, Default::default(),), + None + ); + + let err = platform_ref + .prepare_external_driver("bogus".into(), Default::default(), Default::default()) + .unwrap_err(); + assert!(matches!( + err, + platform::prepare_external_driver::Error::ResourceTooLarge { .. } + )); + Ok(()) + } + + fn cleaned_driver_lines(buf: &[u8]) -> std::io::Result> { + let current_dir = gix_path::into_bstr(std::env::current_dir()?); + Ok(buf + .lines() + .map(move |line| line.strip_prefix(current_dir.as_bytes()).unwrap_or(line).as_bstr())) + } + + fn default_labels() -> builtin_driver::text::Labels<'static> { + builtin_driver::text::Labels { + ancestor: Some("ancestor label".into()), + current: Some("current label".into()), + other: Some("other label".into()), + } + } +} + +mod prepare_merge { + use crate::blob::platform::new_platform; + use gix_merge::blob::platform::{resource, DriverChoice}; + use gix_merge::blob::{builtin_driver, pipeline, BuiltinDriver, ResourceKind}; + use gix_object::tree::EntryKind; + + #[test] + fn ancestor_and_current_and_other_do_not_exist() -> crate::Result { + let mut platform = new_platform(None, pipeline::Mode::ToGit); + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "also-missing".into(), + ResourceKind::CommonAncestorOrBase, + &gix_object::find::Never, + )?; + + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "can't-be-found-in-odb".into(), + ResourceKind::CurrentOrOurs, + &gix_object::find::Never, + )?; + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::BlobExecutable, + "can't-be-found-in-odb".into(), + ResourceKind::OtherOrTheirs, + &gix_object::find::Never, + )?; + + let state = platform + .prepare_merge(&gix_object::find::Never, Default::default()) + .expect("no validation is done here, let the caller inspect"); + assert_eq!(state.ancestor.data, resource::Data::Missing); + assert_eq!(state.current.data, resource::Data::Missing); + assert_eq!(state.other.data, resource::Data::Missing); + Ok(()) + } + + #[test] + fn driver_selection() -> crate::Result { + let mut platform = new_platform( + [ + gix_merge::blob::Driver { + name: "union".into(), + ..Default::default() + }, + gix_merge::blob::Driver { + name: "to proof it will be sorted".into(), + ..Default::default() + }, + gix_merge::blob::Driver { + name: "b".into(), + recursive: Some("for-recursion".into()), + ..Default::default() + }, + gix_merge::blob::Driver { + name: "for-recursion".into(), + recursive: Some("should not be looked up".into()), + ..Default::default() + }, + ], + pipeline::Mode::ToGit, + ); + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "just-set".into(), + ResourceKind::CommonAncestorOrBase, + &gix_object::find::Never, + )?; + + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "does not matter for driver".into(), + ResourceKind::CurrentOrOurs, + &gix_object::find::Never, + )?; + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::BlobExecutable, + "also does not matter for driver".into(), + ResourceKind::OtherOrTheirs, + &gix_object::find::Never, + )?; + + let prepared = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + assert_eq!( + prepared.driver, + DriverChoice::BuiltIn(BuiltinDriver::Text), + "`merge` attribute means text" + ); + + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "unset".into(), + ResourceKind::CommonAncestorOrBase, + &gix_object::find::Never, + )?; + let prepared = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + assert_eq!( + prepared.driver, + DriverChoice::BuiltIn(BuiltinDriver::Text), + "`-merge` attribute means binary, but it looked up 'current' which is still at some bogus worktree path" + ); + + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "unset".into(), + ResourceKind::CurrentOrOurs, + &gix_object::find::Never, + )?; + let prepared = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + assert_eq!( + prepared.driver, + DriverChoice::BuiltIn(BuiltinDriver::Binary), + "`-merge` attribute means binary" + ); + + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "unspecified".into(), + ResourceKind::CurrentOrOurs, + &gix_object::find::Never, + )?; + let prepared = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + assert_eq!( + prepared.driver, + DriverChoice::BuiltIn(BuiltinDriver::Text), + "`!merge` attribute means the hardcoded default" + ); + + platform.options.default_driver = Some("union".into()); + let prepared = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + let expected_idx = 3; + assert_eq!( + prepared.driver, + DriverChoice::Index(expected_idx), + "`!merge` attribute will also pick up the 'merge.default' configuration, and find the name in passed drivers first.\ + Note that the index is 1, even though it was 0 when passing the drivers - they are sorted by name." + ); + assert_eq!(platform.drivers()[expected_idx].name, "union"); + + platform.options.default_driver = Some("binary".into()); + let prepared = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + assert_eq!( + prepared.driver, + DriverChoice::BuiltIn(BuiltinDriver::Binary), + "`!merge` attribute will also pick up the 'merge.default' configuration, non-overridden builtin filters work as well" + ); + + platform.options.default_driver = Some("Binary".into()); + let prepared = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + assert_eq!( + prepared.driver, + DriverChoice::BuiltIn(BuiltinDriver::Text), + "'merge.default' is case-sensitive" + ); + + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "b".into(), + ResourceKind::CurrentOrOurs, + &gix_object::find::Never, + )?; + let prepared = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + let expected_idx = 0; + assert_eq!(prepared.driver, DriverChoice::Index(expected_idx)); + assert_eq!( + platform.drivers()[expected_idx].name, + "b", + "by default, even if recursive is specified, it doesn't look it up" + ); + + let prepared = platform.prepare_merge( + &gix_object::find::Never, + gix_merge::blob::platform::merge::Options { + is_virtual_ancestor: true, + resolve_binary_with: None, + ..Default::default() + }, + )?; + let expected_idx = 1; + assert_eq!(prepared.driver, DriverChoice::Index(expected_idx),); + assert_eq!( + prepared.options.resolve_binary_with, + Some(builtin_driver::binary::ResolveWith::Ours), + "it automatically adjusts the merge mode for binary operations to work for bases" + ); + assert_eq!( + platform.drivers()[expected_idx].name, + "for-recursion", + "It looks up the final driver, including recursion, it only looks it up once though" + ); + Ok(()) + } +} + +mod set_resource { + use crate::blob::platform::new_platform; + use gix_merge::blob::{pipeline, ResourceKind}; + use gix_object::tree::EntryKind; + + #[test] + fn invalid_resource_types() { + let mut platform = new_platform(None, pipeline::Mode::ToGit); + for (mode, name) in [(EntryKind::Commit, "Commit"), (EntryKind::Tree, "Tree")] { + assert_eq!( + platform + .set_resource( + gix_hash::Kind::Sha1.null(), + mode, + "a".into(), + ResourceKind::OtherOrTheirs, + &gix_object::find::Never, + ) + .unwrap_err() + .to_string(), + format!("Can only diff blobs, not {name}") + ); + } + } +} + +fn new_platform( + drivers: impl IntoIterator, + filter_mode: gix_merge::blob::pipeline::Mode, +) -> Platform { + let root = gix_testtools::scripted_fixture_read_only("make_blob_repo.sh").expect("valid fixture"); + let attributes = gix_worktree::Stack::new( + &root, + gix_worktree::stack::State::AttributesStack(gix_worktree::stack::state::Attributes::new( + Default::default(), + None, + attributes::Source::WorktreeThenIdMapping, + Default::default(), + )), + gix_worktree::glob::pattern::Case::Sensitive, + Vec::new(), + Vec::new(), + ); + let filter = gix_merge::blob::Pipeline::new( + gix_merge::blob::pipeline::WorktreeRoots { + common_ancestor_root: Some(root.clone()), + ..Default::default() + }, + gix_filter::Pipeline::default(), + Default::default(), + ); + Platform::new( + filter, + filter_mode, + attributes, + drivers.into_iter().collect(), + Default::default(), + ) +} diff --git a/gix-merge/tests/merge/main.rs b/gix-merge/tests/merge/main.rs new file mode 100644 index 00000000000..9f7a6989d2c --- /dev/null +++ b/gix-merge/tests/merge/main.rs @@ -0,0 +1,6 @@ +extern crate core; + +#[cfg(feature = "blob")] +mod blob; + +pub use gix_testtools::Result; diff --git a/gix/src/repository/diff.rs b/gix/src/repository/diff.rs index e2efb11ec14..4f98ebe52f3 100644 --- a/gix/src/repository/diff.rs +++ b/gix/src/repository/diff.rs @@ -38,10 +38,10 @@ impl Repository { mode, self.attributes_only( &index, - if worktree_roots.new_root.is_some() || worktree_roots.old_root.is_some() { - gix_worktree::stack::state::attributes::Source::WorktreeThenIdMapping - } else { + if worktree_roots.is_unset() { gix_worktree::stack::state::attributes::Source::IdMapping + } else { + gix_worktree::stack::state::attributes::Source::WorktreeThenIdMapping }, )? .inner,