Skip to content

Commit

Permalink
added comments
Browse files Browse the repository at this point in the history
  • Loading branch information
mcroomp committed Aug 3, 2024
1 parent 6141c5c commit c99b0f9
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 104 deletions.
136 changes: 38 additions & 98 deletions src/complevel_estimator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@
/// Getting the parameters correct means that the resulting diff between the deflate stream
/// and the predicted deflate stream will be as small as possible.
use crate::{
hash_algorithm::{
HashAlgorithm, HashImplementation, LibdeflateRotatingHash4, MiniZHash, RandomVectorHash,
ZlibNGHash, ZlibRotatingHash,
},
hash_chain::{DictionaryAddPolicy, HashChain, MAX_UPDATE_HASH_BATCH},
hash_algorithm::HashAlgorithm,
hash_chain::DictionaryAddPolicy,
hash_chain_holder::{new_hash_chain_holder, HashChainHolderTrait},
preflate_constants,
preflate_input::PreflateInput,
preflate_parameter_estimator::PreflateStrategy,
preflate_parse_config::{FAST_PREFLATE_PARSER_SETTINGS, SLOW_PREFLATE_PARSER_SETTINGS},
preflate_token::{BlockType, PreflateToken, PreflateTokenBlock, PreflateTokenReference},
skip_length_estimator::estimate_skip_length,
token_predictor::TokenPredictorParameters,
};

#[derive(Default)]
Expand All @@ -37,62 +37,10 @@ pub struct CompLevelInfo {
pub max_chain: u32,
}

/// vtable for invoking the hash chain functions on specific implementation
/// of hash algorithm
trait HashChainInvoke {
fn invoke_update_hash(
&mut self,
len: u32,
input: &PreflateInput,
add_policy: DictionaryAddPolicy,
);

fn invoke_match_depth(
&mut self,
token: PreflateTokenReference,
window_size: u32,
input: &PreflateInput,
) -> u32;
}

/// holds the hashchain for a specific hash algorithm
struct HashChainHolder<H: HashImplementation> {
hash_chain: H::HashChainType,
}

impl<H: HashImplementation + 'static> HashChainHolder<H> {
fn new(hash: H) -> Box<dyn HashChainInvoke> {
Box::new(HashChainHolder::<H> {
hash_chain: hash.new_hash_chain(),
})
}
}

impl<H: HashImplementation> HashChainInvoke for HashChainHolder<H> {
fn invoke_update_hash(
&mut self,
len: u32,
input: &PreflateInput,
add_policy: DictionaryAddPolicy,
) {
self.hash_chain
.update_hash_with_policy::<true>(len, input, add_policy)
}

fn invoke_match_depth(
&mut self,
token: PreflateTokenReference,
window_size: u32,
input: &PreflateInput,
) -> u32 {
self.hash_chain.match_depth(&token, window_size, input)
}
}

struct CandidateInfo {
hash_algorithm: HashAlgorithm,
add_policy: DictionaryAddPolicy,
hash_chain: Box<dyn HashChainInvoke>,
hash_chain: Box<dyn HashChainHolderTrait>,

longest_dist_at_hop_0: u32,
longest_dist_at_hop_1_plus: u32,
Expand All @@ -103,24 +51,29 @@ impl CandidateInfo {
fn new(
add_policy: DictionaryAddPolicy,
hash_algorithm: HashAlgorithm,
input: &PreflateInput,
window_bits: u32,
) -> Self {
CandidateInfo {
let params = TokenPredictorParameters {
hash_algorithm,
add_policy,
matches_to_start_detected: false,
very_far_matches_detected: false,
window_bits,
strategy: PreflateStrategy::Default,
nice_length: 0,
max_token_count: 0,
zlib_compatible: false,
max_dist_3_matches: 0,
good_length: 0,
max_lazy: 0,
max_chain: 0,
min_len: 0,
};

Self {
add_policy,
hash_algorithm,
hash_chain: match hash_algorithm {
HashAlgorithm::Zlib {
hash_mask,
hash_shift,
} => HashChainHolder::new(ZlibRotatingHash {
hash_shift,
hash_mask,
}),
HashAlgorithm::MiniZFast => HashChainHolder::new(MiniZHash {}),
HashAlgorithm::Libdeflate4 => HashChainHolder::new(LibdeflateRotatingHash4 {}),
HashAlgorithm::ZlibNG => HashChainHolder::new(ZlibNGHash {}),
HashAlgorithm::RandomVector => HashChainHolder::new(RandomVectorHash {}),
},
hash_chain: new_hash_chain_holder(&params),
longest_dist_at_hop_0: 0,
longest_dist_at_hop_1_plus: 0,
max_chain_found: 0,
Expand All @@ -133,9 +86,7 @@ impl CandidateInfo {
window_size: u32,
input: &PreflateInput,
) -> bool {
let mdepth = self
.hash_chain
.invoke_match_depth(token, window_size, input);
let mdepth = self.hash_chain.match_depth(token, window_size, input);

// remove element if the match was impossible due to matching the
// the hash depth or because in fast mode we can't match partial words
Expand Down Expand Up @@ -228,7 +179,7 @@ impl<'a> CompLevelEstimatorState<'a> {
candidates.push(Box::new(CandidateInfo::new(
add_policy,
HashAlgorithm::MiniZFast,
&input,
wbits,
)));

for (hash_shift, hash_mask) in [(5, 32767), (4, 2047)] {
Expand All @@ -238,22 +189,22 @@ impl<'a> CompLevelEstimatorState<'a> {
hash_mask,
hash_shift,
},
&input,
wbits,
)));
}

// LibFlate4 candidate
candidates.push(Box::new(CandidateInfo::new(
add_policy,
HashAlgorithm::Libdeflate4,
&input,
wbits,
)));

// ZlibNG candidate
candidates.push(Box::new(CandidateInfo::new(
add_policy,
HashAlgorithm::ZlibNG,
&input,
wbits,
)));

CompLevelEstimatorState {
Expand All @@ -269,25 +220,14 @@ impl<'a> CompLevelEstimatorState<'a> {
}
}

fn update_hash(&mut self, mut length: u32, override_add_policy: bool) {
while length > 0 {
let batch_len = std::cmp::min(length, MAX_UPDATE_HASH_BATCH);

for i in &mut self.candidates {
i.hash_chain.invoke_update_hash(
batch_len,
&self.input,
if override_add_policy {
DictionaryAddPolicy::AddAll
} else {
i.add_policy
},
);
}

self.input.advance(batch_len);
length -= batch_len;
fn update_hash(&mut self, length: u32, override_add_policy: bool) {
for i in &mut self.candidates {
let mut inputc = self.input.clone();
i.hash_chain
.update_hash_with_depth(length, &mut inputc, override_add_policy);
}

self.input.advance(length);
}

fn check_match(&mut self, token: PreflateTokenReference) {
Expand Down
6 changes: 2 additions & 4 deletions src/hash_chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,13 +362,11 @@ impl HashChainNormalizeLibflate4 {
// Important: total_shift starts at -8 since 0 indicates the end of the hash chain
// so this means that all valid values will be >= 8, otherwise the very first hash
// offset would be zero and so it would get missed
let mut c = HashChainNormalizeLibflate4 {
HashChainNormalizeLibflate4 {
total_shift: -8,
hash_table: HashTable::default_boxed(),
hash_table_3: HashTable::default_boxed(),
};

c
}
}
}

Expand Down
41 changes: 39 additions & 2 deletions src/hash_chain_holder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,22 @@ pub fn new_hash_chain_holder(params: &TokenPredictorParameters) -> Box<dyn HashC
/// trait that is not dependent on the HashImplementation so it can
/// be used in a boxed type by the TokenPredictor
pub trait HashChainHolderTrait {
fn update_hash(&mut self, length: u32, input: &mut PreflateInput, override_policy: bool);

/// updates the hash dictionary for a given length of matches.
///
/// If this is a literal, then the update policy is to add all the bytes to the dictionary.
fn update_hash(&mut self, length: u32, input: &mut PreflateInput, is_literal: bool);

/// updates the hash dictionary for a given length of matches, and also updates the depth
/// map of the hash chain.
///
/// If this is a literal, then the update policy is to add all the bytes to the dictionary.
fn update_hash_with_depth(&mut self, length: u32, input: &mut PreflateInput, is_literal: bool);

/// searches the hash chain for a given match, returns the longest result found if any
///
/// prev_len is the length of the previous match. We won't match anything shorter than that.
/// offset is the offset from the current position in the input (can be 0 for current or 1 for lazy matches)
/// max_depth is the maximum number of hops we will take in the hash chain
fn match_token(
&self,
prev_len: u32,
Expand Down Expand Up @@ -123,6 +137,29 @@ impl<H: HashImplementation> HashChainHolderTrait for HashChainHolder<H> {
}
}

fn update_hash_with_depth(
&mut self,
mut length: u32,
input: &mut PreflateInput,
is_literal: bool,
) {
while length > 0 {
let batch_len = cmp::min(length, MAX_UPDATE_HASH_BATCH);

self.hash.update_hash_with_policy::<true>(
batch_len,
input,
if is_literal {
DictionaryAddPolicy::AddAll
} else {
self.params.add_policy
},
);
input.advance(batch_len);
length -= batch_len;
}
}

fn match_depth(
&self,
token: PreflateTokenReference,
Expand Down
2 changes: 2 additions & 0 deletions src/preflate_input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
* Licensed under the Apache License, Version 2.0. See LICENSE.txt in the project root for license information.
* This software incorporates material from third parties. See NOTICE.txt for details.
*--------------------------------------------------------------------------------------------*/

#[derive(Clone)]
pub struct PreflateInput<'a> {
data: &'a [u8],
pos: i32,
Expand Down

0 comments on commit c99b0f9

Please sign in to comment.