From ca5294af0971da372c5d5a5c7a569810dd53de93 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 5 Sep 2024 08:31:41 +0200 Subject: [PATCH] feat: add `InMemoryPassThrough` implementation. An implementation of `Header`, `Write` and `Find`, that can optionally write everything to an in-memory store, and if enabled, also read objects back from there. That way it can present a consistent view to objects from two locations. --- Cargo.lock | 1 + gix-odb/Cargo.toml | 1 + gix-odb/src/lib.rs | 3 + gix-odb/src/memory.rs | 258 ++++++++++++++++++++++++++++++++++++ gix-odb/tests/odb/memory.rs | 115 ++++++++++++++++ gix-odb/tests/odb/mod.rs | 1 + 6 files changed, 379 insertions(+) create mode 100644 gix-odb/src/memory.rs create mode 100644 gix-odb/tests/odb/memory.rs diff --git a/Cargo.lock b/Cargo.lock index c4c7d061430..fadad33adf5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2115,6 +2115,7 @@ dependencies = [ "gix-features 0.38.2", "gix-fs 0.11.3", "gix-hash 0.14.2", + "gix-hashtable 0.5.2", "gix-object 0.44.0", "gix-pack", "gix-path 0.10.10", diff --git a/gix-odb/Cargo.toml b/gix-odb/Cargo.toml index ce87448814c..bad6f16033b 100644 --- a/gix-odb/Cargo.toml +++ b/gix-odb/Cargo.toml @@ -21,6 +21,7 @@ serde = ["dep:serde", "gix-hash/serde", "gix-object/serde", "gix-pack/serde"] [dependencies] gix-features = { version = "^0.38.2", path = "../gix-features", features = ["rustsha1", "walkdir", "zlib", "crc32"] } +gix-hashtable = { version = "^0.5.2", path = "../gix-hashtable" } gix-hash = { version = "^0.14.2", path = "../gix-hash" } gix-date = { version = "^0.9.0", path = "../gix-date" } gix-path = { version = "^0.10.10", path = "../gix-path" } diff --git a/gix-odb/src/lib.rs b/gix-odb/src/lib.rs index 40d7c2aea90..aaedc74a175 100644 --- a/gix-odb/src/lib.rs +++ b/gix-odb/src/lib.rs @@ -66,6 +66,9 @@ pub fn sink(object_hash: gix_hash::Kind) -> Sink { } } +/// +pub mod memory; + mod sink; /// diff --git a/gix-odb/src/memory.rs b/gix-odb/src/memory.rs new file mode 100644 index 00000000000..5fdbd2abe3e --- /dev/null +++ b/gix-odb/src/memory.rs @@ -0,0 +1,258 @@ +use crate::find::Header; +use crate::Cache; +use gix_object::Data; +use std::cell::RefCell; +use std::ops::{Deref, DerefMut}; +use std::rc::Rc; +use std::sync::Arc; + +/// An object database to read from any implementation but write to memory. +/// Previously written objects can be returned from memory upon query, which makes the view of objects consistent. +/// In-Memory objects can be disabled by [taking out its storage](Proxy::take_object_memory). From there in-memory +/// object can also be persisted one by one. +/// +/// It's possible to turn off the memory by removing it from the instance. +pub struct Proxy { + /// The actual odb implementation + inner: T, + /// The kind of hash to produce when writing new objects. + object_hash: gix_hash::Kind, + /// The storage for in-memory objects. + /// If `None`, the proxy will always read from and write-through to `inner`. + memory: Option>, +} + +/// Lifecycle +impl Proxy { + /// Create a new instance using `odb` as actual object provider, with an empty in-memory store for + /// objects that are to be written. + /// Use `object_hash` to determine the kind of hash to produce when writing new objects. + pub fn new(odb: T, object_hash: gix_hash::Kind) -> Proxy { + Proxy { + inner: odb, + object_hash, + memory: Some(Default::default()), + } + } + + /// Turn ourselves into our inner object database, while deallocating objects stored in memory. + pub fn into_inner(self) -> T { + self.inner + } + + /// Strip object memory off this instance, which means that writes will go through to the inner object database + /// right away. + /// This mode makes the proxy fully transparent. + pub fn with_write_passthrough(mut self) -> Self { + self.memory.take(); + self + } +} + +impl Proxy>>> { + /// No op, as we are containing an arc handle already. + pub fn into_arc(self) -> std::io::Result>>>> { + Ok(self) + } +} + +impl Proxy>>> { + /// Create an entirely new instance, but with the in-memory objects moving between them. + pub fn into_arc(self) -> std::io::Result>>>> { + Ok(Proxy { + inner: self.inner.into_arc()?, + object_hash: self.object_hash, + memory: self.memory, + }) + } +} + +impl From for Proxy { + fn from(odb: crate::Handle) -> Self { + let object_hash = odb.store.object_hash; + Proxy::new(odb, object_hash) + } +} + +/// Memory Access +impl Proxy { + /// Take all the objects in memory so far, with the memory storage itself and return it. + /// + /// The instance will remain in a state where it won't be able to store objects in memory at all, + /// they will now be stored in the underlying object database. + /// This mode makes the proxy fully transparent. + /// + /// To avoid that, use [`reset_object_memory()`](Self::reset_object_memory()) or return the storage + /// using [`set_object_memory()`](Self::set_object_memory()). + pub fn take_object_memory(&mut self) -> Option { + self.memory.take().map(RefCell::into_inner) + } + + /// Set the object storage to contain only `new` objects, and return whichever objects were there previously. + pub fn set_object_memory(&mut self, new: Storage) -> Option { + let previous = self.take_object_memory(); + self.memory = Some(RefCell::new(new)); + previous + } + + /// If objects aren't written to memory yet, this will happen after the call. + /// + /// Otherwise, no change will be performed. + pub fn enable_object_memory(&mut self) -> &mut Self { + if self.memory.is_none() { + self.memory = Some(Default::default()); + } + self + } + + /// Reset the internal storage to be empty, and return the previous storage, with all objects + /// it contained. + /// + /// Note that this does nothing if this instance didn't contain object memory in the first place. + /// In that case, set it explicitly. + pub fn reset_object_memory(&self) -> Option { + self.memory.as_ref().map(|m| std::mem::take(&mut *m.borrow_mut())) + } + + /// Return the amount of objects currently stored in memory. + pub fn num_objects_in_memory(&self) -> usize { + self.memory.as_ref().map_or(0, |m| m.borrow().len()) + } +} + +impl Clone for Proxy +where + T: Clone, +{ + fn clone(&self) -> Self { + Proxy { + inner: self.inner.clone(), + object_hash: self.object_hash, + memory: self.memory.clone(), + } + } +} + +impl gix_object::Find for Proxy +where + T: gix_object::Find, +{ + fn try_find<'a>( + &self, + id: &gix_hash::oid, + buffer: &'a mut Vec, + ) -> Result>, gix_object::find::Error> { + if let Some(map) = self.memory.as_ref() { + let map = map.borrow(); + if let Some((kind, data)) = map.get(id) { + buffer.clear(); + buffer.extend_from_slice(data); + return Ok(Some(Data { + kind: *kind, + data: &*buffer, + })); + } + } + self.inner.try_find(id, buffer) + } +} + +impl gix_object::Exists for Proxy +where + T: gix_object::Exists, +{ + fn exists(&self, id: &gix_hash::oid) -> bool { + self.memory.as_ref().map_or(false, |map| map.borrow().contains_key(id)) || self.inner.exists(id) + } +} + +impl crate::Header for Proxy +where + T: crate::Header, +{ + fn try_header(&self, id: &gix_hash::oid) -> Result, gix_object::find::Error> { + if let Some(map) = self.memory.as_ref() { + let map = map.borrow(); + if let Some((kind, data)) = map.get(id) { + return Ok(Some(Header::Loose { + kind: *kind, + size: data.len() as u64, + })); + } + } + self.inner.try_header(id) + } +} + +impl gix_object::FindHeader for Proxy +where + T: gix_object::FindHeader, +{ + fn try_header(&self, id: &gix_hash::oid) -> Result, gix_object::find::Error> { + if let Some(map) = self.memory.as_ref() { + let map = map.borrow(); + if let Some((kind, data)) = map.get(id) { + return Ok(Some(gix_object::Header { + kind: *kind, + size: data.len() as u64, + })); + } + } + self.inner.try_header(id) + } +} + +impl crate::Write for Proxy +where + T: crate::Write, +{ + fn write_stream( + &self, + kind: gix_object::Kind, + size: u64, + from: &mut dyn std::io::Read, + ) -> Result { + let Some(map) = self.memory.as_ref() else { + return self.inner.write_stream(kind, size, from); + }; + + let mut buf = Vec::new(); + from.read_to_end(&mut buf)?; + + let id = gix_object::compute_hash(self.object_hash, kind, &buf); + map.borrow_mut().insert(id, (kind, buf)); + Ok(id) + } +} + +impl Deref for Proxy { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl DerefMut for Proxy { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + +/// A mapping between an object id and all data corresponding to an object, acting like a `HashMap`. +#[derive(Default, Debug, Clone, Eq, PartialEq)] +pub struct Storage(gix_hashtable::HashMap)>); + +impl Deref for Storage { + type Target = gix_hashtable::HashMap)>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for Storage { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} diff --git a/gix-odb/tests/odb/memory.rs b/gix-odb/tests/odb/memory.rs new file mode 100644 index 00000000000..d0d8ddb2bfd --- /dev/null +++ b/gix-odb/tests/odb/memory.rs @@ -0,0 +1,115 @@ +use crate::odb::hex_to_id; +use gix_object::{tree, Exists, FindExt}; +use gix_odb::Write; +use gix_testtools::tempfile::TempDir; + +#[test] +fn without_memory() -> crate::Result { + use gix_odb::HeaderExt; + let (mut odb, _tmp) = db_rw()?; + let mut buf = Vec::new(); + let mem = odb.take_object_memory().expect("it starts out with memory set"); + assert_eq!(mem.len(), 0, "no object is stored initially"); + let existing = hex_to_id("21d3ba9a26b790a4858d67754ae05d04dfce4d0c"); + let tree = odb.find_tree(&existing, &mut buf).expect("present and valid"); + assert_eq!(tree.entries.len(), 1); + odb.header(existing).expect("header can be found just the same"); + assert!(odb.exists(&existing)); + + let mut tree = tree.to_owned(); + tree.entries.push(tree::Entry { + mode: tree::EntryKind::Blob.into(), + filename: "z-for-sorting_another-file-with-same-content".into(), + oid: existing, + }); + let new_tree_id = odb.write(&tree)?; + assert_eq!(new_tree_id, hex_to_id("249b0b4106a5e9e7875e446a26468e22ec47a05c")); + let actual = odb.header(new_tree_id).expect("header of new objects can be found"); + assert_eq!(actual.kind(), gix_object::Kind::Tree); + assert_eq!(actual.size(), 104); + + let new_tree = odb + .find_tree(&new_tree_id, &mut buf) + .expect("new tree is also available as object") + .to_owned(); + assert_eq!(new_tree, tree); + assert!(!odb.exists(&gix_hash::Kind::Sha1.null())); + + Ok(()) +} + +#[test] +fn with_memory() -> crate::Result { + use gix_object::FindHeader; + let mut odb = db()?; + assert_eq!( + (*odb).iter()?.count(), + 6, + "let's be sure we didn't accidentally write anything" + ); + let mut buf = Vec::new(); + let existing = hex_to_id("21d3ba9a26b790a4858d67754ae05d04dfce4d0c"); + let tree = odb.find_tree(&existing, &mut buf).expect("present and valid"); + assert!(odb.exists(&existing)); + assert_eq!(tree.entries.len(), 1); + odb.try_header(&existing)?.expect("header can be found just the same"); + assert_eq!( + odb.num_objects_in_memory(), + 0, + "nothing is stored when fetching objects - it's not an object cache" + ); + + let mut tree = tree.to_owned(); + tree.entries.push(tree::Entry { + mode: tree::EntryKind::Blob.into(), + filename: "z-for-sorting_another-file-with-same-content".into(), + oid: existing, + }); + let new_tree_id = odb.write(&tree)?; + assert_eq!(new_tree_id, hex_to_id("249b0b4106a5e9e7875e446a26468e22ec47a05c")); + let actual = odb + .try_header(&new_tree_id)? + .expect("header of new objects can be found"); + assert_eq!(actual.kind, gix_object::Kind::Tree); + assert_eq!(actual.size, 104); + + let new_tree = odb + .find_tree(&new_tree_id, &mut buf) + .expect("new tree is also available as object") + .to_owned(); + assert_eq!(new_tree, tree); + + let mem = odb.reset_object_memory().expect("memory is still available"); + assert_eq!(mem.len(), 1, "one new object was just written"); + + assert_eq!( + odb.try_header(&new_tree_id)?, + None, + "without memory, the object can't be found anymore" + ); + + let prev_mem = odb.set_object_memory(mem).expect("reset means it's just cleared"); + assert_eq!(prev_mem.len(), 0, "nothing was stored after the reset"); + + assert_eq!(odb.num_objects_in_memory(), 1, "we put all previous objects back"); + + let odb2 = odb.clone(); + assert_eq!(odb2.num_objects_in_memory(), 1, "it also clones the object memory"); + + assert!(!odb.exists(&gix_hash::Kind::Sha1.null())); + + Ok(()) +} + +fn db() -> crate::Result> { + let odb = gix_odb::at( + gix_testtools::scripted_fixture_read_only_standalone("repo_with_loose_objects.sh")?.join(".git/objects"), + )?; + Ok(gix_odb::memory::Proxy::new(odb, gix_hash::Kind::Sha1)) +} + +fn db_rw() -> crate::Result<(gix_odb::memory::Proxy, TempDir)> { + let tmp = gix_testtools::scripted_fixture_writable_standalone("repo_with_loose_objects.sh")?; + let odb = gix_odb::at(tmp.path().join(".git/objects"))?; + Ok((gix_odb::memory::Proxy::new(odb, gix_hash::Kind::Sha1), tmp)) +} diff --git a/gix-odb/tests/odb/mod.rs b/gix-odb/tests/odb/mod.rs index 6e4b1506bae..353bb78ee66 100644 --- a/gix-odb/tests/odb/mod.rs +++ b/gix-odb/tests/odb/mod.rs @@ -18,6 +18,7 @@ fn db_small_packs() -> gix_odb::Handle { pub mod alternate; pub mod find; pub mod header; +pub mod memory; pub mod regression; pub mod sink; pub mod store;