Skip to content

Commit

Permalink
Switch metadata HashMap to a deterministic order BTreeMap
Browse files Browse the repository at this point in the history
  • Loading branch information
caesay committed Nov 1, 2024
1 parent 495e30f commit c0b3279
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 50 deletions.
5 changes: 4 additions & 1 deletion bitar/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ fn main() {
std::path::PathBuf::from(std::env::var("OUT_DIR").unwrap()).join("chunk_dictionary.rs");

if !output_file.exists() {
prost_build::compile_protos(&["proto/chunk_dictionary.proto"], &["proto/"]).unwrap();
prost_build::Config::new()
.btree_map(["."])
.compile_protos(&["proto/chunk_dictionary.proto"], &["proto/"])
.unwrap();

let content = format!(
"// THIS FILE IS AUTOMATICALLY GENERATED\n// EDIT ../proto/chunk_dictionary.proto INSTEAD\n\n{}",
Expand Down
8 changes: 4 additions & 4 deletions bitar/src/api/compress.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::collections::HashMap;
use std::collections::{BTreeMap, HashMap};
use std::default::Default;
use std::error;
use std::fmt;
Expand Down Expand Up @@ -39,7 +39,7 @@ pub struct CreateArchiveOptions {
pub compression: Option<Compression>,

/// Custom string/bytes key-value pair metadata to be stored in the archive header
pub metadata: HashMap<String, Vec<u8>>,
pub metadata: BTreeMap<String, Vec<u8>>,
}

impl Default for CreateArchiveOptions {
Expand All @@ -58,7 +58,7 @@ impl Default for CreateArchiveOptions {
algorithm: CompressionAlgorithm::Brotli,
level: 6,
}),
metadata: HashMap::new(),
metadata: BTreeMap::new(),
}
}
}
Expand Down Expand Up @@ -251,7 +251,7 @@ pub async fn create_archive<R: AsyncRead + Unpin + Send, W: AsyncWrite + Unpin>(
source_total_size: source_length as u64,
chunker_params: Some(chunker_params),
chunk_compression: Some(options.compression.into()),
metadata: options.clone().metadata.into(),
metadata: options.metadata.clone(),
};

let header_buf = crate::header::build(&file_header, None).expect("Failed to create header");
Expand Down
14 changes: 8 additions & 6 deletions bitar/src/archive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::{
};
use blake2::{Blake2b512, Digest};
use futures_util::{stream::Stream, StreamExt};
use std::collections::HashMap;
use std::collections::BTreeMap;
use std::{
convert::TryInto,
fmt,
Expand Down Expand Up @@ -87,7 +87,7 @@ pub struct Archive<R> {
source_checksum: HashSum,
chunker_config: chunker::Config,
chunk_hash_length: usize,
metadata: HashMap<String, Vec<u8>>,
metadata: BTreeMap<String, Vec<u8>>,
}

impl<R> Archive<R> {
Expand Down Expand Up @@ -250,12 +250,14 @@ impl<R> Archive<R> {
&self.created_by_app_version
}
/// Get the custom key-value pair metadata stored in the archive header.
pub fn metadata(&self) -> &HashMap<String, Vec<u8>> {
&self.metadata
pub fn metadata_iter(&self) -> impl Iterator<Item = (&str, &[u8])> {
self.metadata
.iter()
.map(|(k, v)| (k.as_str(), v.as_slice()))
}
/// Get a specific metadata value stored in the archive header, or None if it is not present.
pub fn metadata_value(&self, key: &str) -> Option<&Vec<u8>> {
self.metadata.get(key)
pub fn metadata_value(&self, key: &str) -> Option<&[u8]> {
self.metadata.get(key).map(|v| v.as_slice())
}
/// Iterate chunks as ordered in source.
pub fn iter_source_chunks(&self) -> impl Iterator<Item = (u64, &ChunkDescriptor)> {
Expand Down
4 changes: 2 additions & 2 deletions bitar/src/chunk_dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,8 @@ pub struct ChunkDictionary {
#[prost(message, repeated, tag = "7")]
pub chunk_descriptors: ::prost::alloc::vec::Vec<ChunkDescriptor>,
/// Custom key-value-pair metadata to store with the dictionary
#[prost(map = "string, bytes", tag = "8")]
pub metadata: ::std::collections::HashMap<
#[prost(btree_map = "string, bytes", tag = "8")]
pub metadata: ::prost::alloc::collections::BTreeMap<
::prost::alloc::string::String,
::prost::alloc::vec::Vec<u8>,
>,
Expand Down
28 changes: 14 additions & 14 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use clap::error::ErrorKind;
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
use log::LevelFilter;
use reqwest::header::{HeaderMap, HeaderName, HeaderValue};
use std::collections::HashMap;
use std::ffi::OsString;
use std::path::{Path, PathBuf};
use std::time::Duration;
Expand Down Expand Up @@ -63,15 +62,16 @@ where
.long("metadata-file")
.num_args(2) // Expect exactly 2 values (key and path) each time
.action(clap::ArgAction::Append) // Append to the list of values
.value_names(&["KEY", "PATH"])
.value_names(["KEY", "PATH"])
.value_parser(value_parser!(OsString))
.help("Custom metadata key-value pair where the value is a file contents"),
)
.arg(
Arg::new("metadata-value")
.long("metadata-value")
.num_args(2) // Expect exactly 2 values (key and path) each time
.action(clap::ArgAction::Append) // Append to the list of values
.value_names(&["NAME", "VALUE"])
.value_names(["KEY", "VALUE"])
.help("Custom metadata key-value pair where the value is a provided string"),
),
);
Expand Down Expand Up @@ -183,22 +183,22 @@ where
let chunker_config = parse_chunker_config(&mut cmd, matches)?;
let compression = parse_compression(&mut cmd, matches)?;

let mut metadata_files: HashMap<String, PathBuf> = HashMap::new();
if let Some(values) = matches.get_many::<String>("metadata-file") {
let mut metadata_files: Vec<(String, PathBuf)> = Vec::new();
if let Some(values) = matches.get_many::<OsString>("metadata-file") {
let values: Vec<_> = values.collect();
for pair in values.chunks_exact(2) {
if let [key, value] = *pair {
metadata_files.insert(key.to_owned(), PathBuf::from(value));
metadata_files.push((key.to_string_lossy().to_string(), PathBuf::from(value)));
}
}
}

let mut metadata_strings: HashMap<String, String> = HashMap::new();
let mut metadata_strings: Vec<(String, String)> = Vec::new();
if let Some(values) = matches.get_many::<String>("metadata-value") {
let values: Vec<_> = values.collect();
for pair in values.chunks_exact(2) {
if let [key, value] = *pair {
metadata_strings.insert(key.to_owned(), value.to_owned());
metadata_strings.push((key.to_owned(), value.to_owned()));
}
}
}
Expand Down Expand Up @@ -611,8 +611,8 @@ mod tests {
Compression::try_new(bitar::CompressionAlgorithm::Brotli, 6).unwrap()
),
num_chunk_buffers: get_num_chunk_buffers(),
metadata_files: std::collections::HashMap::new(),
metadata_strings: std::collections::HashMap::new(),
metadata_files: Vec::new(),
metadata_strings: Vec::new(),
})
);
}
Expand Down Expand Up @@ -640,8 +640,8 @@ mod tests {
Compression::try_new(bitar::CompressionAlgorithm::Brotli, 6).unwrap()
),
num_chunk_buffers: get_num_chunk_buffers(),
metadata_files: std::collections::HashMap::new(),
metadata_strings: std::collections::HashMap::new(),
metadata_files: Vec::new(),
metadata_strings: Vec::new(),
})
);
}
Expand Down Expand Up @@ -691,8 +691,8 @@ mod tests {
Compression::try_new(bitar::CompressionAlgorithm::Brotli, 2).unwrap()
),
num_chunk_buffers: get_num_chunk_buffers(),
metadata_files: std::collections::HashMap::new(),
metadata_strings: std::collections::HashMap::new(),
metadata_files: Vec::new(),
metadata_strings: Vec::new(),
})
);
}
Expand Down
14 changes: 7 additions & 7 deletions src/compress_cmd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use futures_util::{future, StreamExt};
use log::*;
use std::io::Write;
use std::path::PathBuf;
use std::{collections::HashMap, io::IsTerminal};
use std::{collections::BTreeMap, collections::HashMap, io::IsTerminal};
use tokio::{
fs::{File, OpenOptions},
io::{AsyncRead, AsyncWriteExt},
Expand Down Expand Up @@ -152,8 +152,8 @@ pub struct Options {
pub chunker_config: chunker::Config,
pub compression: Option<Compression>,
pub num_chunk_buffers: usize,
pub metadata_files: HashMap<String, PathBuf>,
pub metadata_strings: HashMap<String, String>,
pub metadata_files: Vec<(String, PathBuf)>,
pub metadata_strings: Vec<(String, String)>,
}

pub async fn compress_cmd(opts: Options) -> Result<()> {
Expand Down Expand Up @@ -228,15 +228,15 @@ pub async fn compress_cmd(opts: Options) -> Result<()> {
};

// Construct custom metadata hashmap
let mut metadata = HashMap::new();
let mut metadata = BTreeMap::new();
for (key, value) in opts.metadata_strings {
metadata.insert(key, value.into());
}
for (key, path) in opts.metadata_files {
let content = std::fs::read(&path)
.context(format!("Failed to read metadata file {}", path.display()))?;
metadata.insert(key, content);
}
for (key, value) in opts.metadata_strings {
metadata.insert(key, value.into());
}

// Build the final archive
let file_header = dict::ChunkDictionary {
Expand Down
27 changes: 11 additions & 16 deletions src/info_cmd.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use anyhow::{bail, Result};
use log::*;
use std::collections::HashMap;
use std::io::Write;
use tokio::fs::File;

Expand Down Expand Up @@ -61,19 +60,6 @@ pub fn print_chunker_config(config: &chunker::Config) {
}
}

pub fn print_metadata_overview(metadata: &HashMap<String, Vec<u8>>) {
if metadata.is_empty() {
info!(" Metadata: None");
} else {
let display = metadata
.iter()
.map(|(key, value)| format!("{}({})", key, value.len()))
.collect::<Vec<String>>()
.join(", ");
info!(" Metadata: {}", display);
}
}

pub fn print_archive<R>(archive: &Archive<R>) {
info!("Archive: ");
info!(" Built with version: {}", archive.built_with_version());
Expand All @@ -82,7 +68,16 @@ pub fn print_archive<R>(archive: &Archive<R>) {
human_size!(archive.compressed_size() + archive.header_size() as u64)
);

print_metadata_overview(archive.metadata());
let mut metadata = archive.metadata_iter().peekable();
if metadata.peek().is_none() {
info!(" Metadata: None");
} else {
let display = metadata
.map(|(key, value)| format!("{}({})", key, value.len()))
.collect::<Vec<String>>()
.join(", ");
info!(" Metadata: {}", display);
}

info!(" Header checksum: {}", archive.header_checksum());
info!(" Chunk hash length: {} bytes", archive.chunk_hash_length());
Expand Down Expand Up @@ -128,7 +123,7 @@ where
if let Some(key) = metadata_key {
let archive = Archive::try_init(reader).await?;
if let Some(value) = archive.metadata_value(key.as_str()) {
std::io::stdout().write_all(&value)?;
std::io::stdout().write_all(value)?;
} else {
bail!("Metadata key not found: {}", key);
}
Expand Down

0 comments on commit c0b3279

Please sign in to comment.