Skip to content

Commit

Permalink
feat!: Use [u8;4] as chunk id (#279)
Browse files Browse the repository at this point in the history
This allows to remove the additional string to describe the ids, which
are usually ascii anyway.
  • Loading branch information
Byron committed Dec 20, 2021
1 parent a363de9 commit 9d9f2ee
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 25 deletions.
12 changes: 6 additions & 6 deletions git-chunk/src/file/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ mod error {
display("Sentinel value encountered while still processing chunks.")
}
MissingSentinelValue { actual: crate::Kind } {
display("Sentinel value wasn't found, saw {:#016x}", actual)
display("Sentinel value wasn't found, saw {:?}", std::str::from_utf8(actual.as_ref()).unwrap_or("<non-ascii>"))
}
ChunkSizeOutOfBounds { offset: crate::file::Offset, file_length: u64 } {
display("The chunk offset {} went past the file of length {} - was it truncated?", offset, file_length)
Expand All @@ -22,7 +22,7 @@ mod error {
display("All chunk offsets must be incrementing.")
}
DuplicateChunk(kind: crate::Kind) {
display("The chunk of kind {:#016x} was encountered more than once", kind)
display("The chunk of kind {:?} was encountered more than once", std::str::from_utf8(kind.as_ref()).unwrap_or("<non-ascii>"))
}
TocTooSmall { actual: usize, expected: usize } {
display("The table of contents would be {} bytes, but got only {}", expected, actual)
Expand Down Expand Up @@ -58,7 +58,7 @@ impl file::Index {

for _ in 0..num_chunks {
let (kind, offset) = toc_entry.split_at(4);
let kind = be_u32(kind);
let kind = to_kind(kind);
if kind == crate::SENTINEL {
return Err(Error::EarlySentinelValue);
}
Expand Down Expand Up @@ -93,7 +93,7 @@ impl file::Index {
})
}

let sentinel = be_u32(&toc_entry[..4]);
let sentinel = to_kind(&toc_entry[..4]);
if sentinel != crate::SENTINEL {
return Err(Error::MissingSentinelValue { actual: sentinel });
}
Expand All @@ -102,8 +102,8 @@ impl file::Index {
}
}

fn be_u32(data: &[u8]) -> u32 {
u32::from_be_bytes(data[..4].try_into().unwrap())
fn to_kind(data: &[u8]) -> crate::Kind {
data[..4].try_into().unwrap()
}

fn be_u64(data: &[u8]) -> u64 {
Expand Down
22 changes: 6 additions & 16 deletions git-chunk/src/file/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,14 @@ pub mod offset_by_kind {
#[derive(Debug)]
pub struct Error {
pub kind: crate::Kind,
pub name: &'static str,
}

impl Display for Error {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Chunk named {:?} (id = {}) was not found in chunk file index",
self.name, self.kind
"Chunk named {:?} was not found in chunk file index",
std::str::from_utf8(&self.kind).unwrap_or("<non-ascii>")
)
}
}
Expand Down Expand Up @@ -61,25 +60,16 @@ impl Index {
pub const EMPTY_SIZE: usize = Index::ENTRY_SIZE;

/// Find a chunk of `kind` and return its offset into the data if found
pub fn offset_by_kind(
&self,
kind: crate::Kind,
name: &'static str,
) -> Result<Range<crate::file::Offset>, offset_by_kind::Error> {
pub fn offset_by_kind(&self, kind: crate::Kind) -> Result<Range<crate::file::Offset>, offset_by_kind::Error> {
self.chunks
.iter()
.find_map(|c| (c.kind == kind).then(|| c.offset.clone()))
.ok_or(offset_by_kind::Error { kind, name })
.ok_or(offset_by_kind::Error { kind })
}

/// Find a chunk of `kind` and return its data slice based on its offset.
pub fn data_by_kind<'a>(
&self,
data: &'a [u8],
kind: crate::Kind,
name: &'static str,
) -> Result<&'a [u8], data_by_kind::Error> {
let offset = self.offset_by_kind(kind, name)?;
pub fn data_by_kind<'a>(&self, data: &'a [u8], kind: crate::Kind) -> Result<&'a [u8], data_by_kind::Error> {
let offset = self.offset_by_kind(kind)?;
Ok(&data[crate::into_usize_range(offset).ok_or(data_by_kind::Error::FileTooLarge)?])
}

Expand Down
6 changes: 3 additions & 3 deletions git-chunk/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
use std::convert::TryInto;
use std::ops::Range;

/// An identifier to describe the kind of chunk, unique within a chunk file.
pub type Kind = u32;
/// An identifier to describe the kind of chunk, unique within a chunk file, typically in ASCII
pub type Kind = [u8; 4];

/// A special value denoting the end of the chunk file table of contents.
pub const SENTINEL: Kind = 0;
pub const SENTINEL: Kind = [0u8; 4];

/// Turn a u64 Range into a usize range safely, to make chunk ranges useful in memory mapped files.
pub fn into_usize_range(Range { start, end }: Range<file::Offset>) -> Option<Range<usize>> {
Expand Down

0 comments on commit 9d9f2ee

Please sign in to comment.