From d5565daf9ece760af99e6b6c7c711440b95cd359 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 13 Dec 2021 10:17:51 +0800 Subject: [PATCH] refactor (#266) --- git-odb/src/store/general/find.rs | 38 ++ git-odb/src/store/general/handle.rs | 156 ++++++ git-odb/src/store/general/init.rs | 29 ++ git-odb/src/store/general/load_indices.rs | 123 +++++ git-odb/src/store/general/metrics.rs | 52 ++ git-odb/src/store/general/mod.rs | 582 +--------------------- git-odb/src/store/general/store.rs | 167 +++++++ 7 files changed, 571 insertions(+), 576 deletions(-) create mode 100644 git-odb/src/store/general/find.rs create mode 100644 git-odb/src/store/general/handle.rs create mode 100644 git-odb/src/store/general/init.rs create mode 100644 git-odb/src/store/general/load_indices.rs create mode 100644 git-odb/src/store/general/metrics.rs create mode 100644 git-odb/src/store/general/store.rs diff --git a/git-odb/src/store/general/find.rs b/git-odb/src/store/general/find.rs new file mode 100644 index 00000000000..9b9e4d52c73 --- /dev/null +++ b/git-odb/src/store/general/find.rs @@ -0,0 +1,38 @@ +use git_hash::oid; +use git_object::Data; +use git_pack::cache::DecodeEntry; +use git_pack::data::entry::Location; +use git_pack::index::Entry; +use std::ops::Deref; + +impl crate::pack::Find for super::Handle +where + S: Deref + Clone, +{ + type Error = crate::compound::find::Error; + + fn contains(&self, id: impl AsRef) -> bool { + todo!("contains") + } + + fn try_find_cached<'a>( + &self, + id: impl AsRef, + buffer: &'a mut Vec, + pack_cache: &mut impl DecodeEntry, + ) -> Result, Option)>, Self::Error> { + todo!("try find cached") + } + + fn location_by_oid(&self, id: impl AsRef, buf: &mut Vec) -> Option { + todo!("location by oid") + } + + fn index_iter_by_pack_id(&self, pack_id: u32) -> Option + '_>> { + todo!("index iter by pack id") + } + + fn entry_by_location(&self, location: &Location) -> Option> { + todo!("entry by location") + } +} diff --git a/git-odb/src/store/general/handle.rs b/git-odb/src/store/general/handle.rs new file mode 100644 index 00000000000..fc7958d1793 --- /dev/null +++ b/git-odb/src/store/general/handle.rs @@ -0,0 +1,156 @@ +use crate::general::store; +use git_features::threading::OwnShared; +use std::ops::Deref; +use std::sync::atomic::Ordering; +use std::sync::Arc; + +pub(crate) mod multi_index { + // TODO: replace this one with an actual implementation of a multi-pack index. + pub type File = (); +} + +pub enum SingleOrMultiIndex { + Single { + index: Arc, + data: Option>, + }, + Multi { + index: Arc, + data: Vec>>, + }, +} + +pub struct IndexLookup { + pub(crate) file: SingleOrMultiIndex, + pub(crate) id: store::IndexId, +} + +pub struct IndexForObjectInPack { + /// The internal identifier of the pack itself, which either is referred to by an index or a multi-pack index. + pack_id: store::PackId, + /// The index of the object within the pack + object_index_in_pack: u32, +} + +pub(crate) mod index_lookup { + use crate::general::{handle, store}; + use git_hash::oid; + use std::sync::Arc; + + impl handle::IndexLookup { + /// See if the oid is contained in this index, and return its full id for lookup possibly alongside its data file if already + /// loaded. + /// If it is not loaded, ask it to be loaded and put it into the returned mutable option for safe-keeping. + fn lookup( + &mut self, + object_id: &oid, + ) -> Option<(handle::IndexForObjectInPack, &mut Option>)> { + let id = self.id; + match &mut self.file { + handle::SingleOrMultiIndex::Single { index, data } => { + index.lookup(object_id).map(|object_index_in_pack| { + ( + handle::IndexForObjectInPack { + pack_id: store::PackId { + index: id, + multipack_index: None, + }, + object_index_in_pack, + }, + data, + ) + }) + } + handle::SingleOrMultiIndex::Multi { index, data } => { + todo!("find respective pack and return it as &mut Option<>") + } + } + } + } +} + +pub(crate) enum Mode { + DeletedPacksAreInaccessible, + /// This mode signals that we should not unload packs even after they went missing. + KeepDeletedPacksAvailable, +} + +/// Handle registration +impl super::Store { + pub(crate) fn register_handle(&self) -> Mode { + self.num_handles_unstable.fetch_add(1, Ordering::Relaxed); + Mode::DeletedPacksAreInaccessible + } + pub(crate) fn remove_handle(&self, mode: Mode) { + match mode { + Mode::KeepDeletedPacksAvailable => { + let _lock = self.path.lock(); + self.num_handles_stable.fetch_sub(1, Ordering::SeqCst) + } + Mode::DeletedPacksAreInaccessible => self.num_handles_unstable.fetch_sub(1, Ordering::Relaxed), + }; + } + pub(crate) fn upgrade_handle(&self, mode: Mode) -> Mode { + if let Mode::DeletedPacksAreInaccessible = mode { + let _lock = self.path.lock(); + self.num_handles_stable.fetch_add(1, Ordering::SeqCst); + self.num_handles_unstable.fetch_sub(1, Ordering::SeqCst); + } + Mode::KeepDeletedPacksAvailable + } +} + +/// Handle creation +impl super::Store { + pub fn to_handle( + self: &OwnShared, + refresh_mode: crate::RefreshMode, + ) -> super::Handle> { + let token = self.register_handle(); + super::Handle { + store: self.clone(), + refresh_mode, + token: Some(token), + } + } +} + +impl super::Handle +where + S: Deref + Clone, +{ + /// Call once if pack ids are stored and later used for lookup, meaning they should always remain mapped and not be unloaded + /// even if they disappear from disk. + /// This must be called if there is a chance that git maintenance is happening while a pack is created. + pub fn prevent_pack_unload(&mut self) { + self.token = self.token.take().map(|token| self.store.upgrade_handle(token)); + } + + pub fn store(&self) -> &S::Target { + &*self.store + } +} + +impl Drop for super::Handle +where + S: Deref + Clone, +{ + fn drop(&mut self) { + if let Some(token) = self.token.take() { + self.store.remove_handle(token) + } + } +} + +impl Clone for super::Handle +where + S: Deref + Clone, +{ + fn clone(&self) -> Self { + super::Handle { + store: self.store.clone(), + refresh_mode: self.refresh_mode, + token: self.store.register_handle().into(), + } + } +} diff --git a/git-odb/src/store/general/init.rs b/git-odb/src/store/general/init.rs new file mode 100644 index 00000000000..7941da2d3e8 --- /dev/null +++ b/git-odb/src/store/general/init.rs @@ -0,0 +1,29 @@ +use crate::general::store; +use crate::general::store::{MutableIndexAndPack, SlotMapIndex}; +use arc_swap::ArcSwap; +use git_features::threading::OwnShared; +use std::iter::FromIterator; +use std::ops::Deref; +use std::path::PathBuf; +use std::sync::atomic::AtomicUsize; +use std::sync::Arc; + +impl super::Store { + pub fn at(objects_dir: impl Into) -> std::io::Result { + let objects_dir = objects_dir.into(); + if !objects_dir.is_dir() { + return Err(std::io::Error::new( + std::io::ErrorKind::Other, // TODO: use NotADirectory when stabilized + format!("'{}' wasn't a directory", objects_dir.display()), + )); + } + Ok(super::Store { + path: parking_lot::Mutex::new(objects_dir), + files: Vec::from_iter(std::iter::repeat_with(MutableIndexAndPack::default).take(256)), // TODO: figure this out from the amount of files currently present + index: ArcSwap::new(Arc::new(SlotMapIndex::default())), + num_handles_stable: Default::default(), + num_handles_unstable: Default::default(), + num_disk_state_consolidation: Default::default(), + }) + } +} diff --git a/git-odb/src/store/general/load_indices.rs b/git-odb/src/store/general/load_indices.rs new file mode 100644 index 00000000000..960e1d64823 --- /dev/null +++ b/git-odb/src/store/general/load_indices.rs @@ -0,0 +1,123 @@ +use crate::general::{handle, store}; +use std::path::PathBuf; + +use crate::general::store::StateId; +use crate::RefreshMode; +use std::sync::atomic::Ordering; +use std::sync::Arc; + +pub(crate) enum Outcome { + /// Drop all data and fully replace it with `indices`. + /// This happens if we have witnessed a generational change invalidating all of our ids and causing currently loaded + /// indices and maps to be dropped. + Replace { + indices: Vec, // should probably be SmallVec to get around most allocations + loose_dbs: Arc>, + marker: store::SlotIndexMarker, // use to show where the caller left off last time + }, + /// Despite all values being full copies, indices are still compatible to what was before. This also means + /// the caller can continue searching the added indices and loose-dbs. + /// Or in other words, new indices were only added to the known list, and what was seen before is known not to have changed. + /// Besides that, the full internal state can be replaced as with `Replace`. + ReplaceStable { + indices: Vec, // should probably be SmallVec to get around most allocations + loose_dbs: Arc>, + marker: store::SlotIndexMarker, // use to show where the caller left off last time + }, + /// No new indices to look at, caller should give up + NoMoreIndices, +} + +impl super::Store { + pub(crate) fn load_next_indices( + &self, + refresh_mode: RefreshMode, + marker: Option, + ) -> std::io::Result { + let index = self.index.load(); + let state_id = index.state_id(); + if index.loose_dbs.is_empty() { + // TODO: figure out what kind of refreshes we need. This one loads in the initial slot map, but I think this cost is paid + // in full during instantiation. + return self.consolidate_with_disk_state(state_id); + } + + Ok(match marker { + Some(marker) => { + if marker.generation != index.generation { + self.collect_replace_outcome(false /*stable*/) + } else if marker.state_id == state_id { + // Nothing changed in the mean time, try to load another index… + + // …and if that didn't yield anything new consider refreshing our disk state. + match refresh_mode { + RefreshMode::Never => Outcome::NoMoreIndices, + RefreshMode::AfterAllIndicesLoaded => return self.consolidate_with_disk_state(state_id), + } + } else { + self.collect_replace_outcome(true /*stable*/) + } + } + None => self.collect_replace_outcome(false /*stable*/), + }) + } + + /// refresh and possibly clear out our existing data structures, causing all pack ids to be invalidated. + fn consolidate_with_disk_state(&self, seen: StateId) -> std::io::Result { + let objects_directory = self.path.lock(); + if seen != self.index.load().state_id() { + todo!("return …") + } + self.num_disk_state_consolidation.fetch_add(1, Ordering::Relaxed); + let mut db_paths = crate::alternate::resolve(&*objects_directory) + .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?; + // These are in addition to our objects directory + db_paths.insert(0, objects_directory.clone()); + todo!() + } + + /// If there is no handle with stable pack ids requirements, unload them. + /// This property also relates to us pruning our internal state/doing internal maintenance which affects ids, too. + /// + /// Note that this must be called with a lock to the relevant state held to assure these values don't change while + /// we are working. + fn may_unload_packs(&mut self, guard: &parking_lot::MutexGuard<'_, PathBuf>) -> bool { + self.num_handles_stable.load(Ordering::SeqCst) == 0 + } + + fn collect_replace_outcome(&self, is_stable: bool) -> Outcome { + let index = self.index.load(); + let indices = index + .slot_indices + .iter() + .map(|idx| (*idx, &self.files[*idx])) + .filter_map(|(id, file)| { + let lookup = match (&**file.files.load()).as_ref()? { + store::IndexAndPacks::Index(bundle) => handle::SingleOrMultiIndex::Single { + index: bundle.index.loaded()?.clone(), + data: bundle.data.loaded().cloned(), + }, + store::IndexAndPacks::MultiIndex(multi) => handle::SingleOrMultiIndex::Multi { + index: multi.multi_index.loaded()?.clone(), + data: multi.data.iter().map(|f| f.loaded().cloned()).collect(), + }, + }; + handle::IndexLookup { file: lookup, id }.into() + }) + .collect(); + + if is_stable { + Outcome::ReplaceStable { + indices, + loose_dbs: Arc::clone(&index.loose_dbs), + marker: index.marker(), + } + } else { + Outcome::Replace { + indices, + loose_dbs: Arc::clone(&index.loose_dbs), + marker: index.marker(), + } + } + } +} diff --git a/git-odb/src/store/general/metrics.rs b/git-odb/src/store/general/metrics.rs new file mode 100644 index 00000000000..b4880c443e9 --- /dev/null +++ b/git-odb/src/store/general/metrics.rs @@ -0,0 +1,52 @@ +use crate::general::store; +use crate::general::store::IndexAndPacks; +use std::sync::atomic::Ordering; + +impl super::Store { + pub fn metrics(&self) -> store::Metrics { + let mut open_packs = 0; + let mut open_indices = 0; + let mut known_packs = 0; + let mut known_indices = 0; + let mut unused_slots = 0; + + for f in self.index.load().slot_indices.iter().map(|idx| &self.files[*idx]) { + match &**f.files.load() { + Some(IndexAndPacks::Index(bundle)) => { + if bundle.index.is_loaded() { + open_indices += 1; + } + known_indices += 1; + if bundle.data.is_loaded() { + open_packs += 1; + } + known_packs += 1; + } + Some(IndexAndPacks::MultiIndex(multi)) => { + if multi.multi_index.is_loaded() { + open_indices += 1; + } + known_indices += 1; + for pack in multi.data.iter() { + if pack.is_loaded() { + open_packs += 1; + } + known_packs += 1; + } + } + None => unused_slots += 1, + } + } + + store::Metrics { + num_handles: self.num_handles_unstable.load(Ordering::Relaxed) + + self.num_handles_stable.load(Ordering::Relaxed), + num_refreshes: self.num_disk_state_consolidation.load(Ordering::Relaxed), + open_packs, + open_indices, + known_indices, + known_packs, + unused_slots, + } + } +} diff --git a/git-odb/src/store/general/mod.rs b/git-odb/src/store/general/mod.rs index d0f9028d5ec..63846aec8ca 100644 --- a/git-odb/src/store/general/mod.rs +++ b/git-odb/src/store/general/mod.rs @@ -39,584 +39,14 @@ pub struct Store { pub(crate) num_disk_state_consolidation: AtomicUsize, } -mod find { - use git_hash::oid; - use git_object::Data; - use git_pack::cache::DecodeEntry; - use git_pack::data::entry::Location; - use git_pack::index::Entry; - use std::ops::Deref; +mod find; - impl crate::pack::Find for super::Handle - where - S: Deref + Clone, - { - type Error = crate::compound::find::Error; +mod init; - fn contains(&self, id: impl AsRef) -> bool { - todo!("contains") - } +pub mod store; - fn try_find_cached<'a>( - &self, - id: impl AsRef, - buffer: &'a mut Vec, - pack_cache: &mut impl DecodeEntry, - ) -> Result, Option)>, Self::Error> { - todo!("try find cached") - } +pub mod handle; - fn location_by_oid(&self, id: impl AsRef, buf: &mut Vec) -> Option { - todo!("location by oid") - } +pub mod load_indices; - fn index_iter_by_pack_id(&self, pack_id: u32) -> Option + '_>> { - todo!("index iter by pack id") - } - - fn entry_by_location(&self, location: &Location) -> Option> { - todo!("entry by location") - } - } -} - -mod init { - use crate::general::store; - use crate::general::store::{MutableIndexAndPack, SlotMapIndex}; - use arc_swap::ArcSwap; - use git_features::threading::OwnShared; - use std::iter::FromIterator; - use std::ops::Deref; - use std::path::PathBuf; - use std::sync::atomic::AtomicUsize; - use std::sync::Arc; - - impl super::Store { - pub fn at(objects_dir: impl Into) -> std::io::Result { - let objects_dir = objects_dir.into(); - if !objects_dir.is_dir() { - return Err(std::io::Error::new( - std::io::ErrorKind::Other, // TODO: use NotADirectory when stabilized - format!("'{}' wasn't a directory", objects_dir.display()), - )); - } - Ok(super::Store { - path: parking_lot::Mutex::new(objects_dir), - files: Vec::from_iter(std::iter::repeat_with(MutableIndexAndPack::default).take(256)), // TODO: figure this out from the amount of files currently present - index: ArcSwap::new(Arc::new(SlotMapIndex::default())), - num_handles_stable: Default::default(), - num_handles_unstable: Default::default(), - num_disk_state_consolidation: Default::default(), - }) - } - } -} - -pub mod store { - use arc_swap::ArcSwap; - use git_features::hash; - use std::ops::BitXor; - use std::path::{Path, PathBuf}; - use std::sync::atomic::{AtomicUsize, Ordering}; - use std::sync::Arc; - - /// An id to refer to an index file or a multipack index file - pub type IndexId = usize; - pub(crate) type StateId = u32; - - /// A way to indicate which pack indices we have seen already and which of them are loaded, along with an idea - /// of whether stored `PackId`s are still usable. - #[derive(Default)] - pub struct SlotIndexMarker { - /// The generation the `loaded_until_index` belongs to. Indices of different generations are completely incompatible. - /// This value changes once the internal representation is compacted, something that may happen only if there is no handle - /// requiring stable pack indices. - pub(crate) generation: u8, - /// A unique id identifying the index state as well as all loose databases we have last observed. - /// If it changes in any way, the value is different. - pub(crate) state_id: StateId, - } - - /// A way to load and refer to a pack uniquely, namespaced by their indexing mechanism, aka multi-pack or not. - pub struct PackId { - /// Note that if `multipack_index = None`, this index is corresponding to the index id. - /// So a pack is always identified by its corresponding index. - /// If it is a multipack index, this is the id / offset of the pack in the `multipack_index`. - pub(crate) index: IndexId, - pub(crate) multipack_index: Option, - } - - /// An index that changes only if the packs directory changes and its contents is re-read. - #[derive(Default)] - pub struct SlotMapIndex { - /// The index into the slot map at which we expect an index or pack file. Neither of these might be loaded yet. - pub(crate) slot_indices: Vec, - /// A list of loose object databases as resolved by their alternates file in the `object_directory`. The first entry is this objects - /// directory loose file database. All other entries are the loose stores of alternates. - /// It's in an Arc to be shared to Handles, but not to be shared across SlotMapIndices - pub(crate) loose_dbs: Arc>, - - /// A static value that doesn't ever change for a particular clone of this index. - pub(crate) generation: u8, - /// The number of indices loaded thus far when the index of the slot map was last examined, which can change as new indices are loaded - /// in parallel. - /// Shared across SlotMapIndex instances of the same generation. - pub(crate) next_index_to_load: Arc, - /// Incremented by one up to `slot_indices.len()` once an attempt to load an index completed. - /// If a load failed, there will also be an increment. - /// Shared across SlotMapIndex instances of the same generation. - pub(crate) loaded_indices: Arc, - } - - impl SlotMapIndex { - pub(crate) fn state_id(self: &Arc) -> StateId { - // We let the loaded indices take part despite not being part of our own snapshot. - // This is to account for indices being loaded in parallel without actually changing the snapshot itself. - let mut hash = hash::crc32(&(Arc::as_ptr(&self.loose_dbs) as usize).to_be_bytes()); - hash = hash::crc32_update(hash, &(Arc::as_ptr(self) as usize).to_be_bytes()); - hash::crc32_update(hash, &self.loaded_indices.load(Ordering::SeqCst).to_be_bytes()) - } - - pub(crate) fn marker(self: &Arc) -> SlotIndexMarker { - SlotIndexMarker { - generation: self.generation, - state_id: self.state_id(), - } - } - } - - #[derive(Clone)] - pub(crate) struct OnDiskFile { - /// The last known path of the file - path: Arc, - state: OnDiskFileState, - } - - #[derive(Clone)] - pub(crate) enum OnDiskFileState { - /// The file is on disk and can be loaded from there. - Unloaded, - Loaded(T), - /// The file was loaded, but appeared to be missing on disk after reconciling our state with what's on disk. - /// As there were handles that required pack-id stability we had to keep the item to allow finding it on later - /// lookups. - Garbage(T), - /// File is missing on disk and could not be loaded when we tried or turned missing after reconciling our state. - Missing, - } - - impl OnDiskFile { - /// Return true if we hold a memory map of the file already. - pub fn is_loaded(&self) -> bool { - matches!(self.state, OnDiskFileState::Loaded(_) | OnDiskFileState::Garbage(_)) - } - - pub fn loaded(&self) -> Option<&T> { - use OnDiskFileState::*; - match &self.state { - Loaded(v) | Garbage(v) => Some(v), - Unloaded | Missing => None, - } - } - - /// We do it like this as we first have to check for a loaded interior in read-only mode, and then upgrade - /// when we know that loading is necessary. This also works around borrow check, which is a nice coincidence. - pub fn do_load(&mut self, load: impl FnOnce(&Path) -> std::io::Result) -> std::io::Result> { - use OnDiskFileState::*; - match &mut self.state { - Loaded(_) | Garbage(_) => unreachable!("BUG: check before calling this"), - Missing => Ok(None), - Unloaded => match load(&self.path) { - Ok(v) => { - self.state = OnDiskFileState::Loaded(v); - match &self.state { - Loaded(v) => Ok(Some(v)), - _ => unreachable!(), - } - } - Err(err) if err.kind() == std::io::ErrorKind::NotFound => { - self.state = OnDiskFileState::Missing; - Ok(None) - } - Err(err) => Err(err), - }, - } - } - } - - #[derive(Clone)] - pub(crate) struct IndexFileBundle { - pub index: OnDiskFile>, - pub data: OnDiskFile>, - } - - #[derive(Clone)] - pub(crate) struct MultiIndexFileBundle { - pub multi_index: OnDiskFile>, - pub data: Vec>>, - } - - #[derive(Clone)] - pub(crate) enum IndexAndPacks { - Index(IndexFileBundle), - /// Note that there can only be one multi-pack file per repository, but thanks to git alternates, there can be multiple overall. - MultiIndex(MultiIndexFileBundle), - } - - #[derive(Default)] - pub(crate) struct MutableIndexAndPack { - pub(crate) files: ArcSwap>, - pub(crate) write: parking_lot::Mutex<()>, - } - - /// A snapshot about resource usage. - #[derive(Debug, Clone, Copy, PartialEq, Eq)] - pub struct Metrics { - pub num_handles: usize, - pub num_refreshes: usize, - pub open_indices: usize, - pub known_indices: usize, - pub open_packs: usize, - pub known_packs: usize, - pub unused_slots: usize, - } -} - -pub mod handle { - use crate::general::store; - use git_features::threading::OwnShared; - use std::ops::Deref; - use std::sync::atomic::Ordering; - use std::sync::Arc; - - pub(crate) mod multi_index { - // TODO: replace this one with an actual implementation of a multi-pack index. - pub type File = (); - } - - pub enum SingleOrMultiIndex { - Single { - index: Arc, - data: Option>, - }, - Multi { - index: Arc, - data: Vec>>, - }, - } - - pub struct IndexLookup { - pub(crate) file: SingleOrMultiIndex, - pub(crate) id: store::IndexId, - } - - pub struct IndexForObjectInPack { - /// The internal identifier of the pack itself, which either is referred to by an index or a multi-pack index. - pack_id: store::PackId, - /// The index of the object within the pack - object_index_in_pack: u32, - } - - pub(crate) mod index_lookup { - use crate::general::{handle, store}; - use git_hash::oid; - use std::sync::Arc; - - impl handle::IndexLookup { - /// See if the oid is contained in this index, and return its full id for lookup possibly alongside its data file if already - /// loaded. - /// If it is not loaded, ask it to be loaded and put it into the returned mutable option for safe-keeping. - fn lookup( - &mut self, - object_id: &oid, - ) -> Option<(handle::IndexForObjectInPack, &mut Option>)> { - let id = self.id; - match &mut self.file { - handle::SingleOrMultiIndex::Single { index, data } => { - index.lookup(object_id).map(|object_index_in_pack| { - ( - handle::IndexForObjectInPack { - pack_id: store::PackId { - index: id, - multipack_index: None, - }, - object_index_in_pack, - }, - data, - ) - }) - } - handle::SingleOrMultiIndex::Multi { index, data } => { - todo!("find respective pack and return it as &mut Option<>") - } - } - } - } - } - - pub(crate) enum Mode { - DeletedPacksAreInaccessible, - /// This mode signals that we should not unload packs even after they went missing. - KeepDeletedPacksAvailable, - } - - /// Handle registration - impl super::Store { - pub(crate) fn register_handle(&self) -> Mode { - self.num_handles_unstable.fetch_add(1, Ordering::Relaxed); - Mode::DeletedPacksAreInaccessible - } - pub(crate) fn remove_handle(&self, mode: Mode) { - match mode { - Mode::KeepDeletedPacksAvailable => { - let _lock = self.path.lock(); - self.num_handles_stable.fetch_sub(1, Ordering::SeqCst) - } - Mode::DeletedPacksAreInaccessible => self.num_handles_unstable.fetch_sub(1, Ordering::Relaxed), - }; - } - pub(crate) fn upgrade_handle(&self, mode: Mode) -> Mode { - if let Mode::DeletedPacksAreInaccessible = mode { - let _lock = self.path.lock(); - self.num_handles_stable.fetch_add(1, Ordering::SeqCst); - self.num_handles_unstable.fetch_sub(1, Ordering::SeqCst); - } - Mode::KeepDeletedPacksAvailable - } - } - - /// Handle creation - impl super::Store { - pub fn to_handle( - self: &OwnShared, - refresh_mode: crate::RefreshMode, - ) -> super::Handle> { - let token = self.register_handle(); - super::Handle { - store: self.clone(), - refresh_mode, - token: Some(token), - } - } - } - - impl super::Handle - where - S: Deref + Clone, - { - /// Call once if pack ids are stored and later used for lookup, meaning they should always remain mapped and not be unloaded - /// even if they disappear from disk. - /// This must be called if there is a chance that git maintenance is happening while a pack is created. - pub fn prevent_pack_unload(&mut self) { - self.token = self.token.take().map(|token| self.store.upgrade_handle(token)); - } - - pub fn store(&self) -> &S::Target { - &*self.store - } - } - - impl Drop for super::Handle - where - S: Deref + Clone, - { - fn drop(&mut self) { - if let Some(token) = self.token.take() { - self.store.remove_handle(token) - } - } - } - - impl Clone for super::Handle - where - S: Deref + Clone, - { - fn clone(&self) -> Self { - super::Handle { - store: self.store.clone(), - refresh_mode: self.refresh_mode, - token: self.store.register_handle().into(), - } - } - } -} - -pub mod load_indices { - use crate::general::{handle, store}; - use std::path::PathBuf; - - use crate::general::store::StateId; - use crate::RefreshMode; - use std::sync::atomic::Ordering; - use std::sync::Arc; - - pub(crate) enum Outcome { - /// Drop all data and fully replace it with `indices`. - /// This happens if we have witnessed a generational change invalidating all of our ids and causing currently loaded - /// indices and maps to be dropped. - Replace { - indices: Vec, // should probably be SmallVec to get around most allocations - loose_dbs: Arc>, - marker: store::SlotIndexMarker, // use to show where the caller left off last time - }, - /// Despite all values being full copies, indices are still compatible to what was before. This also means - /// the caller can continue searching the added indices and loose-dbs. - /// Besides that, the full internal state can be replaced as with `Replace`. - ReplaceStable { - indices: Vec, // should probably be SmallVec to get around most allocations - loose_dbs: Arc>, - marker: store::SlotIndexMarker, // use to show where the caller left off last time - }, - /// No new indices to look at, caller should give up - NoMoreIndices, - } - - impl super::Store { - pub(crate) fn load_next_indices( - &self, - refresh_mode: RefreshMode, - marker: Option, - ) -> std::io::Result { - let index = self.index.load(); - let state_id = index.state_id(); - if index.loose_dbs.is_empty() { - // TODO: figure out what kind of refreshes we need. This one loads in the initial slot map, but I think this cost is paid - // in full during instantiation. - return self.consolidate_with_disk_state(state_id); - } - - Ok(match marker { - Some(marker) => { - if marker.generation != index.generation { - self.collect_replace_outcome(false /*stable*/) - } else if marker.state_id == state_id { - // Nothing changed in the mean time, try to load another index… - - // …and if that didn't yield anything new consider refreshing our disk state. - match refresh_mode { - RefreshMode::Never => Outcome::NoMoreIndices, - RefreshMode::AfterAllIndicesLoaded => return self.consolidate_with_disk_state(state_id), - } - } else { - self.collect_replace_outcome(true /*stable*/) - } - } - None => self.collect_replace_outcome(false /*stable*/), - }) - } - - /// refresh and possibly clear out our existing data structures, causing all pack ids to be invalidated. - fn consolidate_with_disk_state(&self, seen: StateId) -> std::io::Result { - let objects_directory = self.path.lock(); - if seen != self.index.load().state_id() { - todo!("return …") - } - self.num_disk_state_consolidation.fetch_add(1, Ordering::Relaxed); - let mut db_paths = crate::alternate::resolve(&*objects_directory) - .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?; - // These are in addition to our objects directory - db_paths.insert(0, objects_directory.clone()); - todo!() - } - - /// If there is no handle with stable pack ids requirements, unload them. - /// This property also relates to us pruning our internal state/doing internal maintenance which affects ids, too. - /// - /// Note that this must be called with a lock to the relevant state held to assure these values don't change while - /// we are working. - fn may_unload_packs(&mut self, guard: &parking_lot::MutexGuard<'_, PathBuf>) -> bool { - self.num_handles_stable.load(Ordering::SeqCst) == 0 - } - - fn collect_replace_outcome(&self, is_stable: bool) -> Outcome { - let index = self.index.load(); - let indices = index - .slot_indices - .iter() - .map(|idx| (*idx, &self.files[*idx])) - .filter_map(|(id, file)| { - let lookup = match (&**file.files.load()).as_ref()? { - store::IndexAndPacks::Index(bundle) => handle::SingleOrMultiIndex::Single { - index: bundle.index.loaded()?.clone(), - data: bundle.data.loaded().cloned(), - }, - store::IndexAndPacks::MultiIndex(multi) => handle::SingleOrMultiIndex::Multi { - index: multi.multi_index.loaded()?.clone(), - data: multi.data.iter().map(|f| f.loaded().cloned()).collect(), - }, - }; - handle::IndexLookup { file: lookup, id }.into() - }) - .collect(); - - if is_stable { - Outcome::ReplaceStable { - indices, - loose_dbs: Arc::clone(&index.loose_dbs), - marker: index.marker(), - } - } else { - Outcome::Replace { - indices, - loose_dbs: Arc::clone(&index.loose_dbs), - marker: index.marker(), - } - } - } - } -} - -mod metrics { - use crate::general::store; - use crate::general::store::IndexAndPacks; - use std::sync::atomic::Ordering; - - impl super::Store { - pub fn metrics(&self) -> store::Metrics { - let mut open_packs = 0; - let mut open_indices = 0; - let mut known_packs = 0; - let mut known_indices = 0; - let mut unused_slots = 0; - - for f in self.index.load().slot_indices.iter().map(|idx| &self.files[*idx]) { - match &**f.files.load() { - Some(IndexAndPacks::Index(bundle)) => { - if bundle.index.is_loaded() { - open_indices += 1; - } - known_indices += 1; - if bundle.data.is_loaded() { - open_packs += 1; - } - known_packs += 1; - } - Some(IndexAndPacks::MultiIndex(multi)) => { - if multi.multi_index.is_loaded() { - open_indices += 1; - } - known_indices += 1; - for pack in multi.data.iter() { - if pack.is_loaded() { - open_packs += 1; - } - known_packs += 1; - } - } - None => unused_slots += 1, - } - } - - store::Metrics { - num_handles: self.num_handles_unstable.load(Ordering::Relaxed) - + self.num_handles_stable.load(Ordering::Relaxed), - num_refreshes: self.num_disk_state_consolidation.load(Ordering::Relaxed), - open_packs, - open_indices, - known_indices, - known_packs, - unused_slots, - } - } - } -} +mod metrics; diff --git a/git-odb/src/store/general/store.rs b/git-odb/src/store/general/store.rs new file mode 100644 index 00000000000..428676a9dde --- /dev/null +++ b/git-odb/src/store/general/store.rs @@ -0,0 +1,167 @@ +use arc_swap::ArcSwap; +use git_features::hash; +use std::ops::BitXor; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; + +/// An id to refer to an index file or a multipack index file +pub type IndexId = usize; +pub(crate) type StateId = u32; + +/// A way to indicate which pack indices we have seen already and which of them are loaded, along with an idea +/// of whether stored `PackId`s are still usable. +#[derive(Default)] +pub struct SlotIndexMarker { + /// The generation the `loaded_until_index` belongs to. Indices of different generations are completely incompatible. + /// This value changes once the internal representation is compacted, something that may happen only if there is no handle + /// requiring stable pack indices. + pub(crate) generation: u8, + /// A unique id identifying the index state as well as all loose databases we have last observed. + /// If it changes in any way, the value is different. + pub(crate) state_id: StateId, +} + +/// A way to load and refer to a pack uniquely, namespaced by their indexing mechanism, aka multi-pack or not. +pub struct PackId { + /// Note that if `multipack_index = None`, this index is corresponding to the index id. + /// So a pack is always identified by its corresponding index. + /// If it is a multipack index, this is the id / offset of the pack in the `multipack_index`. + pub(crate) index: IndexId, + pub(crate) multipack_index: Option, +} + +/// An index that changes only if the packs directory changes and its contents is re-read. +#[derive(Default)] +pub struct SlotMapIndex { + /// The index into the slot map at which we expect an index or pack file. Neither of these might be loaded yet. + pub(crate) slot_indices: Vec, + /// A list of loose object databases as resolved by their alternates file in the `object_directory`. The first entry is this objects + /// directory loose file database. All other entries are the loose stores of alternates. + /// It's in an Arc to be shared to Handles, but not to be shared across SlotMapIndices + pub(crate) loose_dbs: Arc>, + + /// A static value that doesn't ever change for a particular clone of this index. + pub(crate) generation: u8, + /// The number of indices loaded thus far when the index of the slot map was last examined, which can change as new indices are loaded + /// in parallel. + /// Shared across SlotMapIndex instances of the same generation. + pub(crate) next_index_to_load: Arc, + /// Incremented by one up to `slot_indices.len()` once an attempt to load an index completed. + /// If a load failed, there will also be an increment. + /// Shared across SlotMapIndex instances of the same generation. + pub(crate) loaded_indices: Arc, +} + +impl SlotMapIndex { + pub(crate) fn state_id(self: &Arc) -> StateId { + // We let the loaded indices take part despite not being part of our own snapshot. + // This is to account for indices being loaded in parallel without actually changing the snapshot itself. + let mut hash = hash::crc32(&(Arc::as_ptr(&self.loose_dbs) as usize).to_be_bytes()); + hash = hash::crc32_update(hash, &(Arc::as_ptr(self) as usize).to_be_bytes()); + hash::crc32_update(hash, &self.loaded_indices.load(Ordering::SeqCst).to_be_bytes()) + } + + pub(crate) fn marker(self: &Arc) -> SlotIndexMarker { + SlotIndexMarker { + generation: self.generation, + state_id: self.state_id(), + } + } +} + +#[derive(Clone)] +pub(crate) struct OnDiskFile { + /// The last known path of the file + path: Arc, + state: OnDiskFileState, +} + +#[derive(Clone)] +pub(crate) enum OnDiskFileState { + /// The file is on disk and can be loaded from there. + Unloaded, + Loaded(T), + /// The file was loaded, but appeared to be missing on disk after reconciling our state with what's on disk. + /// As there were handles that required pack-id stability we had to keep the item to allow finding it on later + /// lookups. + Garbage(T), + /// File is missing on disk and could not be loaded when we tried or turned missing after reconciling our state. + Missing, +} + +impl OnDiskFile { + /// Return true if we hold a memory map of the file already. + pub fn is_loaded(&self) -> bool { + matches!(self.state, OnDiskFileState::Loaded(_) | OnDiskFileState::Garbage(_)) + } + + pub fn loaded(&self) -> Option<&T> { + use OnDiskFileState::*; + match &self.state { + Loaded(v) | Garbage(v) => Some(v), + Unloaded | Missing => None, + } + } + + /// We do it like this as we first have to check for a loaded interior in read-only mode, and then upgrade + /// when we know that loading is necessary. This also works around borrow check, which is a nice coincidence. + pub fn do_load(&mut self, load: impl FnOnce(&Path) -> std::io::Result) -> std::io::Result> { + use OnDiskFileState::*; + match &mut self.state { + Loaded(_) | Garbage(_) => unreachable!("BUG: check before calling this"), + Missing => Ok(None), + Unloaded => match load(&self.path) { + Ok(v) => { + self.state = OnDiskFileState::Loaded(v); + match &self.state { + Loaded(v) => Ok(Some(v)), + _ => unreachable!(), + } + } + Err(err) if err.kind() == std::io::ErrorKind::NotFound => { + self.state = OnDiskFileState::Missing; + Ok(None) + } + Err(err) => Err(err), + }, + } + } +} + +#[derive(Clone)] +pub(crate) struct IndexFileBundle { + pub index: OnDiskFile>, + pub data: OnDiskFile>, +} + +#[derive(Clone)] +pub(crate) struct MultiIndexFileBundle { + pub multi_index: OnDiskFile>, + pub data: Vec>>, +} + +#[derive(Clone)] +pub(crate) enum IndexAndPacks { + Index(IndexFileBundle), + /// Note that there can only be one multi-pack file per repository, but thanks to git alternates, there can be multiple overall. + MultiIndex(MultiIndexFileBundle), +} + +#[derive(Default)] +pub(crate) struct MutableIndexAndPack { + pub(crate) files: ArcSwap>, + pub(crate) write: parking_lot::Mutex<()>, +} + +/// A snapshot about resource usage. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Metrics { + pub num_handles: usize, + pub num_refreshes: usize, + pub open_indices: usize, + pub known_indices: usize, + pub open_packs: usize, + pub known_packs: usize, + pub unused_slots: usize, +}