Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use gitoxide for merging trees #5411

Merged
merged 5 commits into from
Nov 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
746 changes: 385 additions & 361 deletions Cargo.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@ resolver = "2"
[workspace.dependencies]
bstr = "1.10.0"
# Add the `tracing` or `tracing-detail` features to see more of gitoxide in the logs. Useful to see which programs it invokes.
gix = { git = "https://github.com/Byron/gitoxide", rev = "b36d7efb9743766338ac7bb7fb2399a06fae5e60", default-features = false, features = [
gix = { git = "https://github.com/Byron/gitoxide", rev = "3fb989be21c739bbfeac93953c1685e7c6cd2106", default-features = false, features = [
] }
git2 = { version = "0.19.0", features = [
"vendored-openssl",
"vendored-libgit2",
] }
uuid = { version = "1.11.0", features = ["serde"] }
uuid = { version = "1.11.0", features = ["v4", "serde"] }
serde = { version = "1.0", features = ["derive"] }
thiserror = "1.0.66"
tokio = { version = "1.41.0", default-features = false }
Expand Down
2 changes: 1 addition & 1 deletion crates/gitbutler-branch-actions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ publish = false
tracing.workspace = true
anyhow = "1.0.92"
git2.workspace = true
gix = { workspace = true, features = ["blob-diff", "revision"] }
gix = { workspace = true, features = ["blob-diff", "revision", "blob-merge"] }
tokio.workspace = true
gitbutler-oplog.workspace = true
gitbutler-repo.workspace = true
Expand Down
22 changes: 17 additions & 5 deletions crates/gitbutler-branch-actions/src/stack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use gitbutler_oplog::entry::{OperationKind, SnapshotDetails};
use gitbutler_oplog::{OplogExt, SnapshotExt};
use gitbutler_project::Project;
use gitbutler_reference::normalize_branch_name;
use gitbutler_repo::GixRepositoryExt;
use gitbutler_repo_actions::RepoActionsExt;
use gitbutler_stack::{
CommitOrChangeId, ForgeIdentifier, PatchReference, PatchReferenceUpdate, Series,
Expand Down Expand Up @@ -191,14 +192,20 @@ pub fn push_stack(project: &Project, branch_id: StackId, with_force: bool) -> Re

// First fetch, because we dont want to push integrated series
ctx.fetch(&default_target.push_remote_name(), None)?;
let check_commit = IsCommitIntegrated::new(ctx, &default_target)?;
let gix_repo = ctx
.gix_repository()?
.for_tree_diffing()?
.with_object_memory();
let cache = gix_repo.commit_graph_if_enabled()?;
let mut graph = gix_repo.revision_graph(cache.as_ref());
let mut check_commit = IsCommitIntegrated::new(ctx, &default_target, &gix_repo, &mut graph)?;
let stack_series = stack.list_series(ctx)?;
for series in stack_series {
if series.head.target == merge_base {
// Nothing to push for this one
continue;
}
if series_integrated(&check_commit, &series)? {
if series_integrated(&mut check_commit, &series)? {
// Already integrated, nothing to push
continue;
}
Expand All @@ -214,7 +221,7 @@ pub fn push_stack(project: &Project, branch_id: StackId, with_force: bool) -> Re
Ok(())
}

fn series_integrated(check_commit: &IsCommitIntegrated, series: &Series) -> Result<bool> {
fn series_integrated(check_commit: &mut IsCommitIntegrated, series: &Series) -> Result<bool> {
let mut is_integrated = false;
for commit in series.clone().local_commits.iter().rev() {
if !is_integrated {
Expand All @@ -226,12 +233,14 @@ fn series_integrated(check_commit: &IsCommitIntegrated, series: &Series) -> Resu

/// Returns the stack series for the API.
/// Newest first, oldest last in the list
/// `commits` is used to accelerate the is-integrated check.
pub(crate) fn stack_series(
ctx: &CommandContext,
branch: &mut Stack,
default_target: &Target,
check_commit: &IsCommitIntegrated,
check_commit: &mut IsCommitIntegrated,
remote_commit_data: HashMap<CommitData, git2::Oid>,
commits: &[VirtualBranchCommit],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general this is probably fine - but the commits that are passed in here, are something deprecated that I plan to remove asap

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am looking forward to seeing that work only done once, and what I understood is that stack_series is the new version, and I was basically altering the old one. The few changes I made should be easy to port to be internal to stack_series() (if this is what's needed).

) -> Result<(Vec<PatchSeries>, bool)> {
let mut requires_force = false;
let mut api_series: Vec<PatchSeries> = vec![];
Expand All @@ -248,7 +257,10 @@ pub(crate) fn stack_series(
// Reverse first instead of later, so that we catch the first integrated commit
for commit in series.clone().local_commits.iter().rev() {
if !is_integrated {
is_integrated = check_commit.is_integrated(commit)?;
is_integrated = commits
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to save time and be reasonable in comparison to what it was before. However, since the stack can have its own remote I am not sure if the check_commit instance is configured correctly.

.iter()
.find_map(|c| (c.id == commit.id()).then_some(Ok(c.is_integrated)))
.unwrap_or_else(|| check_commit.is_integrated(commit))?;
}
let copied_from_remote_id = CommitData::try_from(commit)
.ok()
Expand Down
4 changes: 1 addition & 3 deletions crates/gitbutler-branch-actions/src/status.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,7 @@ pub fn get_applied_status_cached(
.filter_map(|claimed_hunk| {
// if any of the current hunks intersects with the owned hunk, we want to keep it
for (i, git_diff_hunk) in git_diff_hunks.iter().enumerate() {
if claimed_hunk == &Hunk::from(git_diff_hunk)
|| claimed_hunk.intersects(git_diff_hunk)
{
if claimed_hunk.intersects(git_diff_hunk) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please double-check this logic. I think it's the same as before, but avoids a ton of hashing that would happen otherwise and slow things down to a crawl.

let hash = Hunk::hash_diff(&git_diff_hunk.diff_lines);
if locks.contains_key(&hash) {
return None; // Defer allocation to unclaimed hunks processing
Expand Down
138 changes: 89 additions & 49 deletions crates/gitbutler-branch-actions/src/virtual.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,20 @@ use gitbutler_commit::{commit_ext::CommitExt, commit_headers::HasCommitHeaders};
use gitbutler_diff::{trees, GitHunk, Hunk};
use gitbutler_error::error::Code;
use gitbutler_operating_modes::assure_open_workspace_mode;
use gitbutler_oxidize::git2_signature_to_gix_signature;
use gitbutler_oxidize::{git2_signature_to_gix_signature, git2_to_gix_object_id, gix_to_git2_oid};
use gitbutler_project::access::WorktreeWritePermission;
use gitbutler_reference::{normalize_branch_name, Refname, RemoteRefname};
use gitbutler_repo::{
rebase::{cherry_rebase, cherry_rebase_group},
LogUntil, RepositoryExt,
GixRepositoryExt, LogUntil, RepositoryExt,
};
use gitbutler_repo_actions::RepoActionsExt;
use gitbutler_stack::{
reconcile_claims, BranchOwnershipClaims, ForgeIdentifier, Stack, StackId, Target,
VirtualBranchesHandle,
};
use gitbutler_time::time::now_since_unix_epoch_ms;
use gix::objs::Write;
use serde::Serialize;
use std::collections::HashSet;
use std::{collections::HashMap, path::PathBuf, vec};
Expand Down Expand Up @@ -300,8 +301,15 @@ pub fn list_virtual_branches_cached(

let branches_span =
tracing::debug_span!("handle branches", num_branches = status.branches.len()).entered();
let repo = ctx.repository();
let gix_repo = ctx
.gix_repository()?
.for_tree_diffing()?
.with_object_memory();
// We will perform virtual merges, no need to write them to the ODB.
let cache = gix_repo.commit_graph_if_enabled()?;
let mut graph = gix_repo.revision_graph(cache.as_ref());
for (mut branch, mut files) in status.branches {
let repo = ctx.repository();
update_conflict_markers(ctx, files.clone())?;

let upstream_branch = match branch.clone().upstream {
Expand All @@ -323,13 +331,18 @@ pub fn list_virtual_branches_cached(
.as_ref()
.map(
|upstream| -> Result<(HashSet<git2::Oid>, HashMap<CommitData, git2::Oid>)> {
let merge_base =
repo.merge_base(upstream.id(), default_target.sha)
.context(format!(
"failed to find merge base between {} and {}",
upstream.id(),
default_target.sha
))?;
let merge_base = gix_repo
.merge_base_with_graph(
git2_to_gix_object_id(upstream.id()),
git2_to_gix_object_id(default_target.sha),
&mut graph,
)
.context(format!(
"failed to find merge base between {} and {}",
upstream.id(),
default_target.sha
))?;
let merge_base = gitbutler_oxidize::gix_to_git2_oid(merge_base);
let remote_commit_ids = HashSet::from_iter(repo.l(
upstream.id(),
LogUntil::Commit(merge_base),
Expand All @@ -356,7 +369,8 @@ pub fn list_virtual_branches_cached(

// find all commits on head that are not on target.sha
let commits = repo.log(branch.head(), LogUntil::Commit(default_target.sha), false)?;
let check_commit = IsCommitIntegrated::new(ctx, &default_target)?;
let mut check_commit =
IsCommitIntegrated::new(ctx, &default_target, &gix_repo, &mut graph)?;
let vbranch_commits = {
let _span = tracing::debug_span!(
"is-commit-integrated",
Expand Down Expand Up @@ -397,9 +411,14 @@ pub fn list_virtual_branches_cached(
.collect::<Result<Vec<_>>>()?
};

let merge_base = repo
.merge_base(default_target.sha, branch.head())
let merge_base = gix_repo
.merge_base_with_graph(
git2_to_gix_object_id(default_target.sha),
git2_to_gix_object_id(branch.head()),
check_commit.graph,
)
.context("failed to find merge base")?;
let merge_base = gix_to_git2_oid(merge_base);
let base_current = true;

let upstream = upstream_branch.and_then(|upstream_branch| {
Expand Down Expand Up @@ -436,8 +455,9 @@ pub fn list_virtual_branches_cached(
ctx,
&mut branch,
&default_target,
&check_commit,
&mut check_commit,
remote_commit_data,
&vbranch_commits,
) {
Ok((series, force)) => {
if series.iter().any(|s| s.upstream_reference.is_some()) {
Expand Down Expand Up @@ -943,40 +963,50 @@ pub(crate) fn push(
})
}

pub(crate) struct IsCommitIntegrated<'repo> {
repo: &'repo git2::Repository,
target_commit_id: git2::Oid,
remote_head_id: git2::Oid,
type MergeBaseCommitGraph<'repo, 'cache> = gix::revwalk::Graph<
'repo,
'cache,
gix::revision::plumbing::graph::Commit<gix::revision::plumbing::merge_base::Flags>,
>;

pub(crate) struct IsCommitIntegrated<'repo, 'cache, 'graph> {
gix_repo: &'repo gix::Repository,
graph: &'graph mut MergeBaseCommitGraph<'repo, 'cache>,
target_commit_id: gix::ObjectId,
upstream_tree_id: gix::ObjectId,
upstream_commits: Vec<git2::Oid>,
/// A repository opened at the same path as `repo`, but with an in-memory ODB attached
/// to avoid writing intermediate objects.
inmemory_repo: git2::Repository,
}

impl<'repo> IsCommitIntegrated<'repo> {
pub(crate) fn new(ctx: &'repo CommandContext, target: &Target) -> anyhow::Result<Self> {
impl<'repo, 'cache, 'graph> IsCommitIntegrated<'repo, 'cache, 'graph> {
pub(crate) fn new(
ctx: &'repo CommandContext,
target: &Target,
gix_repo: &'repo gix::Repository,
graph: &'graph mut MergeBaseCommitGraph<'repo, 'cache>,
) -> anyhow::Result<Self> {
let remote_branch = ctx
.repository()
.maybe_find_branch_by_refname(&target.branch.clone().into())?
.ok_or(anyhow!("failed to get branch"))?;
let remote_head = remote_branch.get().peel_to_commit()?;
let upstream_commits =
let mut upstream_commits =
ctx.repository()
.l(remote_head.id(), LogUntil::Commit(target.sha), false)?;
let inmemory_repo = ctx.repository().in_memory_repo()?;
upstream_commits.sort();
let upstream_tree_id = ctx.repository().find_commit(remote_head.id())?.tree_id();
Ok(Self {
repo: ctx.repository(),
target_commit_id: target.sha,
remote_head_id: remote_head.id(),
gix_repo,
graph,
target_commit_id: git2_to_gix_object_id(target.sha),
upstream_tree_id: git2_to_gix_object_id(upstream_tree_id),
upstream_commits,
inmemory_repo,
})
}
}

impl IsCommitIntegrated<'_> {
pub(crate) fn is_integrated(&self, commit: &git2::Commit) -> Result<bool> {
if self.target_commit_id == commit.id() {
impl IsCommitIntegrated<'_, '_, '_> {
pub(crate) fn is_integrated(&mut self, commit: &git2::Commit) -> Result<bool> {
if self.target_commit_id == git2_to_gix_object_id(commit.id()) {
// could not be integrated if heads are the same.
return Ok(false);
}
Expand All @@ -986,44 +1016,54 @@ impl IsCommitIntegrated<'_> {
return Ok(false);
}

if self.upstream_commits.contains(&commit.id()) {
if self.upstream_commits.binary_search(&commit.id()).is_ok() {
return Ok(true);
}

let merge_base_id = self.repo.merge_base(self.target_commit_id, commit.id())?;
if merge_base_id.eq(&commit.id()) {
let merge_base_id = self.gix_repo.merge_base_with_graph(
self.target_commit_id,
git2_to_gix_object_id(commit.id()),
self.graph,
)?;
if gix_to_git2_oid(merge_base_id).eq(&commit.id()) {
// if merge branch is the same as branch head and there are upstream commits
// then it's integrated
return Ok(true);
}

let merge_base = self.repo.find_commit(merge_base_id)?;
let merge_base_tree = merge_base.tree()?;
let upstream = self.repo.find_commit(self.remote_head_id)?;
let upstream_tree = upstream.tree()?;

if merge_base_tree.id() == upstream_tree.id() {
let merge_base_tree_id = self.gix_repo.find_commit(merge_base_id)?.tree_id()?;
if merge_base_tree_id == self.upstream_tree_id {
// if merge base is the same as upstream tree, then it's integrated
return Ok(true);
}

// try to merge our tree into the upstream tree
let mut merge_index = self
.repo
.merge_trees(&merge_base_tree, &commit.tree()?, &upstream_tree, None)
let mut merge_options = self.gix_repo.tree_merge_options()?;
let conflict_kind = gix::merge::tree::UnresolvedConflict::Renames;
merge_options.fail_on_conflict = Some(conflict_kind);
let mut merge_output = self
.gix_repo
.merge_trees(
merge_base_tree_id,
git2_to_gix_object_id(commit.tree_id()),
self.upstream_tree_id,
Default::default(),
merge_options,
)
.context("failed to merge trees")?;

if merge_index.has_conflicts() {
if merge_output.has_unresolved_conflicts(conflict_kind) {
return Ok(false);
}

let merge_tree_oid = merge_index
.write_tree_to(&self.inmemory_repo)
.context("failed to write tree")?;
let merge_tree_id = merge_output
.tree
.write(|tree| self.gix_repo.write(tree))
.map_err(|err| anyhow!("failed to write tree: {err}"))?;

// if the merge_tree is the same as the new_target_tree and there are no files (uncommitted changes)
// then the vbranch is fully merged
Ok(merge_tree_oid == upstream_tree.id())
Ok(merge_tree_id == self.upstream_tree_id)
}
}

Expand Down
5 changes: 1 addition & 4 deletions crates/gitbutler-diff/src/diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,9 @@ pub fn trees(
false => 0,
};
diff_opts
.recurse_untracked_dirs(true)
.include_untracked(true)
.show_binary(true)
.ignore_submodules(true)
.context_lines(context_lines)
.show_untracked_content(true);
.context_lines(context_lines);

let diff = repo.diff_tree_to_tree(Some(old_tree), Some(new_tree), Some(&mut diff_opts))?;
hunks_by_filepath(None, &diff)
Expand Down
Loading
Loading