Skip to content

Commit

Permalink
only load multi-pack indices if allowed (#279)
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Dec 20, 2021
1 parent 5e085ec commit b22e146
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 11 deletions.
6 changes: 4 additions & 2 deletions git-odb/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ pub struct Store {

/// The amount of times we re-read the disk state to consolidate our in-memory representation.
pub(crate) num_disk_state_consolidation: AtomicUsize,
/// If true, we are allowed to use multi-pack indices.
pub use_multi_pack_index: bool,
}

impl Store {
Expand All @@ -120,8 +122,8 @@ impl Store {
}

/// Create a new cached handle to the object store with support for additional options.
pub fn at_opts(objects_dir: impl Into<PathBuf>, slots: store::init::Slots) -> std::io::Result<Handle> {
let handle = OwnShared::new(Store::at_opts(objects_dir, slots)?).to_handle();
pub fn at_opts(objects_dir: impl Into<PathBuf>, options: store::init::Options) -> std::io::Result<Handle> {
let handle = OwnShared::new(Store::at_opts(objects_dir, options)?).to_handle();
Ok(Cache::from(handle))
}

Expand Down
40 changes: 34 additions & 6 deletions git-odb/src/store_impls/dynamic/init.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,30 @@
use std::{iter::FromIterator, path::PathBuf, sync::Arc};

use crate::Store;
use arc_swap::ArcSwap;

use crate::store::types::{MutableIndexAndPack, SlotMapIndex};

/// Options for use in [`Store::at_opts()`].
#[derive(Copy, Clone, Debug)]
pub struct Options {
/// How to obtain a size for the slot map.
slots: Slots,
/// If true, we are allowed to use multi-pack indices.
use_multi_pack_index: bool,
}

impl Default for Options {
fn default() -> Self {
Options {
slots: Default::default(),
use_multi_pack_index: true,
}
}
}

/// Configures the amount of slots in the index slotmap, which is fixed throughout the existence of the store.
#[derive(Copy, Clone, Debug)]
pub enum Slots {
/// The amount of slots to use, that is the total amount of indices we can hold at a time.
/// Using this has the advantage of avoiding an initial directory listing of the repository, and is recommended
Expand All @@ -30,15 +50,21 @@ impl Default for Slots {
}
}

impl super::Store {
impl Store {
/// Open the store at `objects_dir` (containing loose objects and `packs/`), which must only be a directory for
/// the store to be created without any additional work being done.
/// `slots` defines how many multi-pack-indices as well as indices we can know about at a time, which includes
/// the allowance for all additional object databases coming in via `alternates` as well.
/// Note that the `slots` isn't used for packs, these are included with their multi-index or index respectively.
/// For example, In a repository with 250m objects and geometric packing one would expect 27 index/pack pairs,
/// or a single multi-pack index.
pub fn at_opts(objects_dir: impl Into<PathBuf>, slots: Slots) -> std::io::Result<Self> {
pub fn at_opts(
objects_dir: impl Into<PathBuf>,
Options {
slots,
use_multi_pack_index,
}: Options,
) -> std::io::Result<Self> {
let objects_dir = objects_dir.into();
if !objects_dir.is_dir() {
return Err(std::io::Error::new(
Expand All @@ -52,9 +78,10 @@ impl super::Store {
let mut db_paths = crate::alternate::resolve(&objects_dir)
.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?;
db_paths.insert(0, objects_dir.clone());
let num_slots = super::Store::collect_indices_and_mtime_sorted_by_size(db_paths, None)
.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?
.len();
let num_slots =
super::Store::collect_indices_and_mtime_sorted_by_size(db_paths, None, use_multi_pack_index)
.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?
.len();

((num_slots as f32 * multiplier) as usize).max(minimum)
}
Expand All @@ -65,11 +92,12 @@ impl super::Store {
"Cannot use more than 1^15 slots",
));
}
Ok(super::Store {
Ok(Store {
write: Default::default(),
path: objects_dir,
files: Vec::from_iter(std::iter::repeat_with(MutableIndexAndPack::default).take(slot_count)),
index: ArcSwap::new(Arc::new(SlotMapIndex::default())),
use_multi_pack_index,
num_handles_stable: Default::default(),
num_handles_unstable: Default::default(),
num_disk_state_consolidation: Default::default(),
Expand Down
11 changes: 8 additions & 3 deletions git-odb/src/store_impls/dynamic/load_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,11 @@ impl super::Store {
Arc::clone(&index.loose_dbs)
};

let indices_by_modification_time =
Self::collect_indices_and_mtime_sorted_by_size(db_paths, index.slot_indices.len().into())?;
let indices_by_modification_time = Self::collect_indices_and_mtime_sorted_by_size(
db_paths,
index.slot_indices.len().into(),
self.use_multi_pack_index,
)?;
let mut idx_by_index_path: BTreeMap<_, _> = index
.slot_indices
.iter()
Expand Down Expand Up @@ -383,6 +386,7 @@ impl super::Store {
pub(crate) fn collect_indices_and_mtime_sorted_by_size(
db_paths: Vec<PathBuf>,
initial_capacity: Option<usize>,
use_multi_pack_index: bool,
) -> Result<Vec<(PathBuf, SystemTime, u64)>, Error> {
let mut indices_by_modification_time = Vec::with_capacity(initial_capacity.unwrap_or_default());
for db_path in db_paths {
Expand All @@ -399,7 +403,8 @@ impl super::Store {
.filter(|(_, md)| md.file_type().is_file())
.filter(|(p, _)| {
let ext = p.extension();
ext == Some(OsStr::new("idx")) || (ext.is_none() && is_multipack_index(p))
ext == Some(OsStr::new("idx"))
|| (use_multi_pack_index && ext.is_none() && is_multipack_index(p))
})
.map(|(p, md)| md.modified().map_err(Error::from).map(|mtime| (p, mtime, md.len())))
.collect::<Result<Vec<_>, _>>()?,
Expand Down

0 comments on commit b22e146

Please sign in to comment.