From e8f2db300735bccd0bf20bd4f9a7779e01292519 Mon Sep 17 00:00:00 2001 From: Benjamin Saunders Date: Sat, 12 Nov 2022 11:19:03 -0800 Subject: [PATCH] local_backend: use ContentHash rather than hashing protos Insulates identifiers from the unstable serialized form. --- lib/src/backend.rs | 130 +++++++++++++++++++++++++++------------ lib/src/content_hash.rs | 6 ++ lib/src/local_backend.rs | 30 +++++---- lib/src/repo_path.rs | 8 ++- 4 files changed, 115 insertions(+), 59 deletions(-) diff --git a/lib/src/backend.rs b/lib/src/backend.rs index b3a323adad..49534cadd4 100644 --- a/lib/src/backend.rs +++ b/lib/src/backend.rs @@ -20,6 +20,7 @@ use std::vec::Vec; use thiserror::Error; +use crate::content_hash::ContentHash; use crate::repo_path::{RepoPath, RepoPathComponent}; content_hash! { @@ -59,8 +60,10 @@ impl CommitId { } } -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] -pub struct ChangeId(Vec); +content_hash! { + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] + pub struct ChangeId(Vec); +} impl Debug for ChangeId { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { @@ -94,8 +97,10 @@ impl ChangeId { } } -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] -pub struct TreeId(Vec); +content_hash! { + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] + pub struct TreeId(Vec); +} impl Debug for TreeId { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { @@ -129,8 +134,10 @@ impl TreeId { } } -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] -pub struct FileId(Vec); +content_hash! { + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] + pub struct FileId(Vec); +} impl Debug for FileId { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { @@ -160,8 +167,10 @@ impl FileId { } } -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] -pub struct SymlinkId(Vec); +content_hash! { + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] + pub struct SymlinkId(Vec); +} impl Debug for SymlinkId { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { @@ -191,8 +200,10 @@ impl SymlinkId { } } -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] -pub struct ConflictId(Vec); +content_hash! { + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] + pub struct ConflictId(Vec); +} impl Debug for ConflictId { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { @@ -256,39 +267,47 @@ impl Timestamp { } } -#[derive(Debug, PartialEq, Eq, Clone)] -pub struct Signature { - pub name: String, - pub email: String, - pub timestamp: Timestamp, +content_hash! { + #[derive(Debug, PartialEq, Eq, Clone)] + pub struct Signature { + pub name: String, + pub email: String, + pub timestamp: Timestamp, + } } -#[derive(Debug, Clone)] -pub struct Commit { - pub parents: Vec, - pub predecessors: Vec, - pub root_tree: TreeId, - pub change_id: ChangeId, - pub description: String, - pub author: Signature, - pub committer: Signature, +content_hash! { + #[derive(Debug, Clone)] + pub struct Commit { + pub parents: Vec, + pub predecessors: Vec, + pub root_tree: TreeId, + pub change_id: ChangeId, + pub description: String, + pub author: Signature, + pub committer: Signature, + } } -#[derive(Debug, PartialEq, Eq, Clone)] -pub struct ConflictPart { - // TODO: Store e.g. CommitId here too? Labels (theirs/ours/base)? Would those still be - // useful e.g. after rebasing this conflict? - pub value: TreeValue, +content_hash! { + #[derive(Debug, PartialEq, Eq, Clone)] + pub struct ConflictPart { + // TODO: Store e.g. CommitId here too? Labels (theirs/ours/base)? Would those still be + // useful e.g. after rebasing this conflict? + pub value: TreeValue, + } } -#[derive(Default, Debug, PartialEq, Eq, Clone)] -pub struct Conflict { - // A conflict is represented by a list of positive and negative states that need to be applied. - // In a simple 3-way merge of B and C with merge base A, the conflict will be { add: [B, C], - // remove: [A] }. Also note that a conflict of the form { add: [A], remove: [] } is the - // same as non-conflict A. - pub removes: Vec, - pub adds: Vec, +content_hash! { + #[derive(Default, Debug, PartialEq, Eq, Clone)] + pub struct Conflict { + // A conflict is represented by a list of positive and negative states that need to be applied. + // In a simple 3-way merge of B and C with merge base A, the conflict will be { add: [B, C], + // remove: [A] }. Also note that a conflict of the form { add: [A], remove: [] } is the + // same as non-conflict A. + pub removes: Vec, + pub adds: Vec, + } } #[derive(Debug, Error, PartialEq, Eq)] @@ -310,6 +329,35 @@ pub enum TreeValue { Conflict(ConflictId), } +impl ContentHash for TreeValue { + fn hash(&self, state: &mut impl digest::Update) { + use TreeValue::*; + match *self { + Normal { ref id, executable } => { + state.update(&0u32.to_le_bytes()); + id.hash(state); + executable.hash(state); + } + Symlink(ref id) => { + state.update(&1u32.to_le_bytes()); + id.hash(state); + } + Tree(ref id) => { + state.update(&2u32.to_le_bytes()); + id.hash(state); + } + GitSubmodule(ref id) => { + state.update(&3u32.to_le_bytes()); + id.hash(state); + } + Conflict(ref id) => { + state.update(&4u32.to_le_bytes()); + id.hash(state); + } + } + } +} + #[derive(Debug, PartialEq, Eq, Clone)] pub struct TreeEntry<'a> { name: &'a RepoPathComponent, @@ -344,9 +392,11 @@ impl<'a> Iterator for TreeEntriesNonRecursiveIterator<'a> { } } -#[derive(Default, Debug, Clone)] -pub struct Tree { - entries: BTreeMap, +content_hash! { + #[derive(Default, Debug, Clone)] + pub struct Tree { + entries: BTreeMap, + } } impl Tree { diff --git a/lib/src/content_hash.rs b/lib/src/content_hash.rs index 4a79426dd9..a2eae7512b 100644 --- a/lib/src/content_hash.rs +++ b/lib/src/content_hash.rs @@ -15,6 +15,12 @@ impl ContentHash for () { fn hash(&self, _: &mut impl digest::Update) {} } +impl ContentHash for bool { + fn hash(&self, state: &mut impl digest::Update) { + u8::from(*self).hash(state); + } +} + impl ContentHash for u8 { fn hash(&self, state: &mut impl digest::Update) { state.update(&[*self]); diff --git a/lib/src/local_backend.rs b/lib/src/local_backend.rs index 73b3159a9b..353bf0721f 100644 --- a/lib/src/local_backend.rs +++ b/lib/src/local_backend.rs @@ -27,6 +27,7 @@ use crate::backend::{ ConflictId, ConflictPart, FileId, MillisSinceEpoch, Signature, SymlinkId, Timestamp, Tree, TreeId, TreeValue, }; +use crate::content_hash::ContentHash; use crate::file_util::persist_content_addressed_temp_file; use crate::repo_path::{RepoPath, RepoPathComponent}; @@ -72,7 +73,7 @@ impl LocalBackend { pub fn load(store_path: &Path) -> Self { let root_commit_id = CommitId::from_bytes(&[0; 64]); - let empty_tree_id = TreeId::from_hex("786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce"); + let empty_tree_id = TreeId::from_hex("482ae5a29fbe856c7272f2071b8b0f0359ee2d89ff392b8a900643fbd0836eccd067b8bf41909e206c90d45d6e7d8b6686b93ecaee5fe1a9060d87b672101310"); LocalBackend { path: store_path.to_path_buf(), root_commit_id, @@ -192,12 +193,9 @@ impl Backend for LocalBackend { let temp_file = NamedTempFile::new_in(&self.path)?; let proto = tree_to_proto(tree); - let mut proto_bytes: Vec = Vec::new(); - proto.write_to_writer(&mut proto_bytes)?; + proto.write_to_writer(&mut temp_file.as_file())?; - temp_file.as_file().write_all(&proto_bytes)?; - - let id = TreeId::new(Blake2b512::digest(&proto_bytes).to_vec()); + let id = TreeId::new(hash(tree).to_vec()); persist_content_addressed_temp_file(temp_file, self.tree_path(&id))?; Ok(id) @@ -215,12 +213,9 @@ impl Backend for LocalBackend { let temp_file = NamedTempFile::new_in(&self.path)?; let proto = conflict_to_proto(conflict); - let mut proto_bytes: Vec = Vec::new(); - proto.write_to_writer(&mut proto_bytes)?; - - temp_file.as_file().write_all(&proto_bytes)?; + proto.write_to_writer(&mut temp_file.as_file())?; - let id = ConflictId::new(Blake2b512::digest(&proto_bytes).to_vec()); + let id = ConflictId::new(hash(conflict).to_vec()); persist_content_addressed_temp_file(temp_file, self.conflict_path(&id))?; Ok(id) @@ -242,12 +237,9 @@ impl Backend for LocalBackend { let temp_file = NamedTempFile::new_in(&self.path)?; let proto = commit_to_proto(commit); - let mut proto_bytes: Vec = Vec::new(); - proto.write_to_writer(&mut proto_bytes)?; - - temp_file.as_file().write_all(&proto_bytes)?; + proto.write_to_writer(&mut temp_file.as_file())?; - let id = CommitId::new(Blake2b512::digest(&proto_bytes).to_vec()); + let id = CommitId::new(hash(commit).to_vec()); persist_content_addressed_temp_file(temp_file, self.commit_path(&id))?; Ok(id) @@ -412,3 +404,9 @@ fn conflict_part_to_proto(part: &ConflictPart) -> crate::protos::store::conflict proto.content = MessageField::some(tree_value_to_proto(&part.value)); proto } + +fn hash(x: &impl ContentHash) -> digest::Output { + let mut hasher = Blake2b512::default(); + x.hash(&mut hasher); + hasher.finalize() +} diff --git a/lib/src/repo_path.rs b/lib/src/repo_path.rs index 81332fedf1..81cf2ba589 100644 --- a/lib/src/repo_path.rs +++ b/lib/src/repo_path.rs @@ -20,9 +20,11 @@ use thiserror::Error; use crate::file_util; -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] -pub struct RepoPathComponent { - value: String, +content_hash! { + #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] + pub struct RepoPathComponent { + value: String, + } } impl RepoPathComponent {