Skip to content

Commit

Permalink
Sketch the entire API surface to capture all parts of blob-merges
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Sep 15, 2024
1 parent 865282f commit b96d11f
Show file tree
Hide file tree
Showing 8 changed files with 1,240 additions and 0 deletions.
16 changes: 16 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions gix-diff/src/blob/platform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ impl Platform {
///
/// If one of the resources is binary, the operation reports an error as such resources don't make their data available
/// which is required for the external diff to run.
// TODO: fix this - the diff shouldn't fail if binary (or large) files are used, just copy them into tempfiles.
pub fn prepare_diff_command(
&self,
diff_command: BString,
Expand Down
26 changes: 26 additions & 0 deletions gix-merge/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,31 @@ workspace = true
[lib]
doctest = false

[features]
default = ["blob"]
## Enable diffing of blobs using imara-diff, which also allows for a generic rewrite tracking implementation.
blob = ["dep:imara-diff", "dep:gix-filter", "dep:gix-worktree", "dep:gix-path", "dep:gix-fs", "dep:gix-command", "dep:gix-tempfile", "dep:gix-trace"]
## Data structures implement `serde::Serialize` and `serde::Deserialize`.
serde = ["dep:serde", "gix-hash/serde", "gix-object/serde"]

[dependencies]
gix-hash = { version = "^0.14.2", path = "../gix-hash" }
gix-object = { version = "^0.44.0", path = "../gix-object" }
gix-filter = { version = "^0.13.0", path = "../gix-filter", optional = true }
gix-worktree = { version = "^0.36.0", path = "../gix-worktree", default-features = false, features = ["attributes"], optional = true }
gix-command = { version = "^0.3.9", path = "../gix-command", optional = true }
gix-path = { version = "^0.10.11", path = "../gix-path", optional = true }
gix-fs = { version = "^0.11.3", path = "../gix-fs", optional = true }
gix-tempfile = { version = "^14.0.0", path = "../gix-tempfile", optional = true }
gix-trace = { version = "^0.1.10", path = "../gix-trace", optional = true }

thiserror = "1.0.63"
imara-diff = { version = "0.1.7", optional = true }
bstr = { version = "1.5.0", default-features = false }
serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] }

document-features = { version = "0.2.0", optional = true }

[package.metadata.docs.rs]
all-features = true
features = ["document-features"]
156 changes: 156 additions & 0 deletions gix-merge/src/blob/builtin_driver.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
use crate::blob::BuiltinDriver;

impl BuiltinDriver {
/// Return the name of this instance.
pub fn as_str(&self) -> &str {
match self {
BuiltinDriver::Text => "text",
BuiltinDriver::Binary => "binary",
BuiltinDriver::Union => "union",
}
}

/// Get all available built-in drivers.
pub fn all() -> &'static [Self] {
&[BuiltinDriver::Text, BuiltinDriver::Binary, BuiltinDriver::Union]
}

/// Try to match one of our variants to `name`, case-sensitive, and return its instance.
pub fn by_name(name: &str) -> Option<Self> {
Self::all().iter().find(|variant| variant.as_str() == name).copied()
}
}

///
pub mod binary {
use crate::blob::Resolution;

/// What to do when having to pick a side to resolve a conflict.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub enum ResolveWith {
/// Chose the ancestor to resolve a conflict.
Ancestor,
/// Chose our side to resolve a conflict.
Ours,
/// Chose their side to resolve a conflict.
Theirs,
}

/// Tell the caller of [`merge()`] which side was picked
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub enum Pick {
/// Chose the ancestor.
Ancestor,
/// Chose our side.
Ours,
/// Chose their side.
Theirs,
}

/// As this algorithm doesn't look at the actual data, it returns a choice solely based on logic.
///
/// It always results in a conflict with `current` being picked unless `on_conflict` is not `None`.
pub fn merge(on_conflict: Option<ResolveWith>) -> (Pick, Resolution) {
match on_conflict {
None => (Pick::Ours, Resolution::Conflict),
Some(ResolveWith::Ours) => (Pick::Ours, Resolution::Complete),
Some(ResolveWith::Theirs) => (Pick::Theirs, Resolution::Complete),
Some(ResolveWith::Ancestor) => (Pick::Ancestor, Resolution::Complete),
}
}
}

///
pub mod text {
use crate::blob::Resolution;

/// The way the built-in [text driver](crate::blob::BuiltinDriver::Text) will express
/// merge conflicts in the resulting file.
#[derive(Default, Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub enum ConflictStyle {
/// Only show the zealously minified conflicting lines of the local changes and the incoming (other) changes,
/// hiding the base version entirely.
///
/// ```
/// line1-changed-by-both
/// <<<<<<< local
/// line2-to-be-changed-in-incoming
/// =======
/// line2-changed
/// >>>>>>> incoming
///```
#[default]
Merge,
/// Show non-minimized hunks of local changes, the base, and the incoming (other) changes.
///
/// This mode does not hide any information.
/// ```
/// <<<<<<< local
/// line1-changed-by-both
/// line2-to-be-changed-in-incoming
/// ||||||| 9a8d80c
/// line1-to-be-changed-by-both
/// line2-to-be-changed-in-incoming
/// =======
/// line1-changed-by-both
/// line2-changed
/// >>>>>>> incoming
///```
Diff3,
/// Like [`Diff3](Self::Diff3), but will show *minimized* hunks of local change and the incoming (other) changes,
/// as well as non-minimized hunks of the base.
///
/// ```
/// line1-changed-by-both
/// <<<<<<< local
/// line2-to-be-changed-in-incoming
/// ||||||| 9a8d80c
/// line1-to-be-changed-by-both
/// line2-to-be-changed-in-incoming
/// =======
/// line2-changed
/// >>>>>>> incoming
/// ```
ZealousDiff3,
}

/// Options for the builtin [text driver](crate::blob::BuiltinDriver::Text).
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct Options {
/// How to visualize conflicts in merged files.
pub conflict_style: ConflictStyle,
/// The amount of markers to draw, defaults to 7, i.e. `<<<<<<<`
pub marker_size: usize,
/// Decide what to do to automatically resolve conflicts.
/// If `None`, add conflict markers according to `conflict_style` and `marker_size`.
pub on_conflict: Option<ResolveWith>,
}

impl Default for Options {
fn default() -> Self {
Options {
conflict_style: Default::default(),
marker_size: 7,
on_conflict: None,
}
}
}

/// What to do to resolve a conflict.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub enum ResolveWith {
/// Chose our side to resolve a conflict.
Ours,
/// Chose their side to resolve a conflict.
Theirs,
/// Place our and their lines one after another, in any order
Union,
}

/// Merge `current` and `other` with `ancestor` as base according to `opts`.
///
/// Place the merged result in `out` and return the resolution.
pub fn merge(_out: &mut Vec<u8>, _current: &[u8], _ancestor: &[u8], _other: &[u8], _opts: Options) -> Resolution {
todo!("text merge");
}
}
154 changes: 154 additions & 0 deletions gix-merge/src/blob/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
// TODO: remove this - only needed while &mut Vec<u8> isn't used.
#![allow(clippy::ptr_arg)]

use bstr::BString;
use std::path::PathBuf;

///
pub mod builtin_driver;
///
pub mod pipeline;
///
pub mod platform;

/// Identify a merge resolution.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub enum Resolution {
/// Everything could be resolved during the merge.
Complete,
/// A conflict is still present.
Conflict,
}

/// A way to classify a resource suitable for merging.
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub enum ResourceKind {
/// Our side of the state.
CurrentOrOurs,
/// Their side of the state.
OtherOrTheirs,
/// The state of the common base of both ours and theirs.
CommonAncestorOrBase,
}

/// Define a driver program that merges
///
/// Some values are related to diffing, some are related to conversions.
#[derive(Default, Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub enum BuiltinDriver {
/// Perform a merge between text-sources such that conflicts are marked according to
/// `merge.conflictStyle` in the Git configuration.
///
/// If any of the inputs, *base*, *ours* or *theirs* looks like non-text/binary,
/// the [`Binary`](Self::Binary) driver will be used instead.
///
/// Also see [`builtin_driver::text::ConflictStyle`].
#[default]
Text,
/// Merge 'unmergable' content by choosing *ours* or *theirs*, without performing
/// an actual merge.
///
/// Note that if the merge operation is for virtual ancestor (a merge for merge-bases),
/// then *ours* will always be chosen.
Binary,
/// Merge text-sources and resolve conflicts by adding conflicting lines one after another,
/// in random order, without adding conflict markers either.
///
/// This can be useful for files that change a lot, but will remain usable merely by adding
/// all changed lines.
Union,
}

/// Define a driver program that merges
///
/// Some values are related to diffing, some are related to conversions.
#[derive(Default, Debug, Clone, PartialEq, Eq)]
pub struct Driver {
/// The name of the driver, as referred to by `[merge "name"]` in the git configuration.
pub name: BString,
/// The human-readable version of `name`, only to be used for displaying driver-information to the user.
pub display_name: BString,
/// The command to execute to perform the merge entirely like `<command> %O %A %B %L %P %S %X %Y`.
///
/// * **%O**
/// - the common ancestor version, or *base*.
/// * **%A**
/// - the current version, or *ours*.
/// * **%B**
/// - the other version, or *theirs*.
/// * **%L**
/// - The conflict-marker size as positive number.
/// * **%P**
/// - The path in which the merged result will be stored.
/// * **%S**
/// - The conflict-label for the common ancestor or *base*.
/// * **%X**
/// - The conflict-label for the current version or *ours*.
/// * **%Y**
/// - The conflict-label for the other version or *theirs*.
///
/// Note that conflict-labels are behind the conflict markers, to annotate them.
///
/// A typical invocation with all arguments substituted could then look like this:
///
/// ```
/// <driver-program> .merge_file_nR2Qs1 .merge_file_WYXCJe .merge_file_UWbzrm 7 file e2a2970 HEAD feature
/// ```
pub command: BString,
/// If `true`, this is the `name` of the driver to use when a virtual-merge-base is created, as a merge of all
/// available merge-bases if there are more than one.
///
/// This value can also be special built-in drivers named `text`, `binary` or `union`. Note that user-defined
/// drivers with the same name will be preferred over built-in ones, but only for files whose git attributes
/// specified the driver by *name*.
pub recursive: Option<BString>,
}

/// A conversion pipeline to take an object or path from what's stored in Git to what can be merged, while
/// following the guidance of git-attributes at the respective path to learn how the merge should be performed.
///
/// Depending on the source, different conversions are performed:
///
/// * `worktree on disk` -> `object for storage in git`
/// * `object` -> `possibly renormalized object`
/// - Renormalization means that the `object` is converted to what would be checked out into the work-tree,
/// just to turn it back into an object.
#[derive(Clone)]
pub struct Pipeline {
/// A way to read data directly from the worktree.
pub roots: pipeline::WorktreeRoots,
/// A pipeline to convert objects from the worktree to Git, and also from Git to the worktree, and back to Git.
pub filter: gix_filter::Pipeline,
/// Options affecting the way we read files.
pub options: pipeline::Options,
/// All available merge drivers.
///
/// They are referenced in git-attributes by name, and we hand out indices into this array.
drivers: Vec<Driver>,
/// Pre-configured attributes to obtain additional merge-related information.
attrs: gix_filter::attributes::search::Outcome,
/// A buffer to produce disk-accessible paths from worktree roots.
path: PathBuf,
}

/// A utility for gathering and processing all state necessary to perform a three-way merge.
///
/// It can re-use buffers if all three parts of participating in the merge are
/// set repeatedly.
#[derive(Clone)]
pub struct Platform {
/// The current version (ours).
current: Option<platform::Resource>,
/// The ancestor version (base).
ancestor: Option<platform::Resource>,
/// The other version (theirs).
other: Option<platform::Resource>,

/// A way to convert objects into a diff-able format.
pub filter: Pipeline,
/// A way to access `.gitattributes`
pub attr_stack: gix_worktree::Stack,

/// The way we convert resources into mergeable states.
filter_mode: pipeline::Mode,
}
Loading

0 comments on commit b96d11f

Please sign in to comment.