Skip to content

Commit

Permalink
write pack-ids and offsets (#279)
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Dec 31, 2021
1 parent 565a7ae commit bfc8069
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 5 deletions.
6 changes: 3 additions & 3 deletions git-pack/src/index/write/encode.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use std::{cmp::Ordering, collections::VecDeque, io};

pub(crate) const LARGE_OFFSET_THRESHOLD: u64 = 0x7fff_ffff;
pub(crate) const HIGH_BIT: u32 = 0x8000_0000;

use byteorder::{BigEndian, WriteBytesExt};
use git_features::{
hash,
Expand Down Expand Up @@ -34,9 +37,6 @@ pub(crate) fn write_to(
out.write_all(V2_SIGNATURE)?;
out.write_u32::<BigEndian>(kind as u32)?;

const LARGE_OFFSET_THRESHOLD: u64 = 0x7fff_ffff;
const HIGH_BIT: u32 = 0x8000_0000;

let needs_64bit_offsets =
entries_sorted_by_oid.back().expect("at least one pack entry").offset > LARGE_OFFSET_THRESHOLD;
progress.init(Some(4), progress::steps());
Expand Down
30 changes: 30 additions & 0 deletions git-pack/src/multi_index/chunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,41 @@ pub mod lookup {

/// Information about the offsets table.
pub mod offsets {
use crate::multi_index;
use byteorder::{BigEndian, WriteBytesExt};
use std::ops::Range;

/// The id uniquely identifying the offsets table.
pub const ID: git_chunk::Id = *b"OOFF";

/// Return the amount of bytes needed to offset data for `entries`.
pub fn storage_size(entries: usize) -> u64 {
(entries * (4 /*pack-id*/ + 4/* pack offset */)) as u64
}

/// Returns the amount of entries that need a u64 offset.
pub(crate) fn write(
sorted_entries: &[multi_index::write::Entry],
mut out: impl std::io::Write,
) -> std::io::Result<u32> {
use crate::index::write::encode::{HIGH_BIT, LARGE_OFFSET_THRESHOLD};
let mut num_large_offsets = 0u32;

for entry in sorted_entries {
out.write_u32::<BigEndian>(entry.pack_index)?;

let offset = if entry.pack_offset > LARGE_OFFSET_THRESHOLD {
let res = num_large_offsets | HIGH_BIT;
num_large_offsets += 1;
res
} else {
entry.pack_offset as u32
};
out.write_u32::<BigEndian>(offset)?;
}
Ok(num_large_offsets)
}

/// Returns true if the `offset` range seems to match the size required for `num_objects`.
pub fn is_valid(offset: &Range<usize>, num_objects: u32) -> bool {
let entry_size = 4 /* pack-id */ + 4 /* pack-offset */;
Expand Down
12 changes: 10 additions & 2 deletions git-pack/src/multi_index/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ pub use error::Error;
/// An entry suitable for sorting and writing
pub(crate) struct Entry {
pub(crate) id: git_hash::ObjectId,
pack_index: u32,
pack_offset: crate::data::Offset,
pub(crate) pack_index: u32,
pub(crate) pack_offset: crate::data::Offset,
/// Used for sorting in case of duplicates
index_mtime: SystemTime,
}
Expand Down Expand Up @@ -120,6 +120,10 @@ impl multi_index::File {
multi_index::chunk::lookup::ID,
multi_index::chunk::lookup::storage_size(entries.len(), object_hash),
);
cf.plan_chunk(
multi_index::chunk::offsets::ID,
multi_index::chunk::offsets::storage_size(entries.len()),
);

let bytes_written = Self::write_header(
&mut out,
Expand All @@ -128,13 +132,17 @@ impl multi_index::File {
object_hash,
)?;
let mut chunk_write = cf.into_write(&mut out, bytes_written)?;
let mut num_large_offsets = None;
while let Some(chunk_to_write) = chunk_write.next_chunk() {
match chunk_to_write {
multi_index::chunk::index_names::ID => {
multi_index::chunk::index_names::write(&index_filenames_sorted, &mut chunk_write)?
}
multi_index::chunk::fanout::ID => multi_index::chunk::fanout::write(&entries, &mut chunk_write)?,
multi_index::chunk::lookup::ID => multi_index::chunk::lookup::write(&entries, &mut chunk_write)?,
multi_index::chunk::offsets::ID => {
num_large_offsets = multi_index::chunk::offsets::write(&entries, &mut chunk_write)?.into();
}
unknown => unreachable!("BUG: forgot to implement chunk {:?}", std::str::from_utf8(&unknown)),
}
}
Expand Down

0 comments on commit bfc8069

Please sign in to comment.