From 929d5cde3b6ad02e72c16d275d32044458c77221 Mon Sep 17 00:00:00 2001 From: Kristof Date: Fri, 13 Sep 2024 19:29:28 +0200 Subject: [PATCH 1/2] add new crc code --- src/hash_algorithm.rs | 65 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/src/hash_algorithm.rs b/src/hash_algorithm.rs index 0fa6357..080f2d7 100644 --- a/src/hash_algorithm.rs +++ b/src/hash_algorithm.rs @@ -172,7 +172,14 @@ impl HashImplementation for Crc32cHash { type HashChainType = HashChainNormalize; fn get_hash(&self, b: &[u8]) -> u16 { - crc32fast::hash(&b[0..4]) as u16 + assert!(b.len() >= 4); + + let mut crc = CRC32C_TABLE[b[0] as usize]; + crc = (crc >> 8) ^ CRC32C_TABLE[((crc ^ u32::from(b[1])) & 0xFF) as usize]; + crc = (crc >> 8) ^ CRC32C_TABLE[((crc ^ u32::from(b[2])) & 0xFF) as usize]; + crc = (crc >> 8) ^ CRC32C_TABLE[((crc ^ u32::from(b[3])) & 0xFF) as usize]; + + crc as u16 } fn num_hash_bytes() -> usize { @@ -184,11 +191,65 @@ impl HashImplementation for Crc32cHash { } } +static CRC32C_TABLE : [u32;256] = [ + 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, + 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB, 0x8AD958CF, 0x78B2DBCC, + 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, + 0x5E133C24, 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, + 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384, 0x9A879FA0, + 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, + 0xBC267848, 0x4E4DFB4B, 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, + 0x33ED7D2A, 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, + 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, + 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA, 0x30E349B1, 0xC288CAB2, + 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 0x1642AE59, + 0xE4292D5A, 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, + 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595, 0x417B1DBC, + 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, + 0x67DAFA54, 0x95B17957, 0xCBA24573, 0x39C9C670, 0x2A993584, + 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198, + 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, + 0x64D4CECF, 0x77843D3B, 0x85EFBE38, 0xDBFC821C, 0x2997011F, + 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, + 0x0F36E6F7, 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, + 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789, 0xEB1FCBAD, + 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0x2C855CB2, 0xDEEEDFB1, + 0xCDBE2C45, 0x3FD5AF46, 0x7198540D, 0x83F3D70E, 0x90A324FA, + 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6, + 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, + 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829, 0x82F63B78, 0x709DB87B, + 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90, + 0x563C5F93, 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, + 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C, 0x92A8FC17, + 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, + 0xB4091BFF, 0x466298FC, 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, + 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, + 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, + 0x97BAA1BA, 0x84EA524E, 0x7681D14D, 0x2892ED69, 0xDAF96E6A, + 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, + 0xFC588982, 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, + 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, 0x38CC2A06, + 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, + 0x1E6DCDEE, 0xEC064EED, 0xC38D26C4, 0x31E6A5C7, 0x22B65633, + 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F, + 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, + 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0, 0xD3D3E1AB, 0x21B862A8, + 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, + 0x07198540, 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, + 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F, 0xE330A81A, + 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, + 0xC5914FF2, 0x37FACCF1, 0x69E9F0D5, 0x9B8273D6, 0x88D28022, + 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E, + 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, + 0xC69F7B69, 0xD5CF889D, 0x27A40B9E, 0x79B737BA, 0x8BDCB4B9, + 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, + 0xAD7D5351]; + /// This vector uses a lookup into a table for random values #[derive(Debug, Default, Copy, Clone, Eq, PartialEq)] pub struct RandomVectorHash {} -const RANDOM_VECTOR: [u16; 768] = [ +static RANDOM_VECTOR: [u16; 768] = [ 0x499d, 0x3dc2, 0x2d07, 0x705b, 0x7a76, 0x3469, 0x59db, 0x0c58, 0x2b72, 0x412d, 0x1246, 0x2095, 0x1c1c, 0x4726, 0x5f45, 0x2c4e, 0x7b1b, 0x1e70, 0x2743, 0x554f, 0x1334, 0x5328, 0x78c1, 0x41cc, 0x4b2c, 0x62a5, 0x1d93, 0x4aa4, 0x64c8, 0x65f0, 0x194d, 0x1ac0, 0x3f96, 0x41df, 0x4389, 0x065b, From 2dbec984b4d6d7943a4a956b71f9c20a0e304ac2 Mon Sep 17 00:00:00 2001 From: Kristof Date: Sat, 14 Sep 2024 16:05:51 +0200 Subject: [PATCH 2/2] finalized work --- src/add_policy_estimator.rs | 62 ++++++++++++-- src/complevel_estimator.rs | 1 + src/depth_estimator.rs | 37 +++++++-- src/hash_algorithm.rs | 123 ++++++++++++++++------------ src/preflate_parameter_estimator.rs | 14 ++-- src/preflate_token.rs | 11 ++- 6 files changed, 172 insertions(+), 76 deletions(-) diff --git a/src/add_policy_estimator.rs b/src/add_policy_estimator.rs index 1d37d07..dc91b01 100644 --- a/src/add_policy_estimator.rs +++ b/src/add_policy_estimator.rs @@ -23,6 +23,16 @@ pub enum DictionaryAddPolicy { /// This policy is used by MiniZ in fastest mode. It adds all substrings of a match to the dictionary except /// literals that are 4 bytes away from the end of the block. AddFirstExcept4kBoundary, + + /// This policy is used by fast mode in zlibng, it is the same + /// as AddFirst(0) but it also add the last character for the + /// last match in the 32k window. + /// + /// This is due to the fact that + /// each time the dictionary is reset, it explicitly adds the + /// last character to the dictionary which ends up being the + /// last chacacter of the previous match. + AddFirstWith32KBoundary, } impl DictionaryAddPolicy { @@ -59,11 +69,29 @@ impl DictionaryAddPolicy { update_fn(input, pos, 1); } } + DictionaryAddPolicy::AddFirstWith32KBoundary => { + update_fn(input, pos, 1); + if is_at_32k_boundary(length, pos) { + update_fn(&input[length as usize - 1..], pos + length - 1, 1); + } + } } } } } +/// Check if the match is crossing the 32k boundary as which happens +/// in zlibng. +fn is_at_32k_boundary(length: u32, pos: u32) -> bool { + length > 1 + && (((pos) & 0x7fff) <= (32768 - 0x106)) + && (((pos + length) & 0x7fff) >= (32768 - 0x106)) +} + +/// When adding matches to the dictionary, some of the fast variants +/// only add smaller strings in their entirety (ie a substring starting +/// at each position). This function is designed to measure this +/// and determine the policy that should be used. pub fn estimate_add_policy(token_blocks: &[PreflateTokenBlock]) -> DictionaryAddPolicy { const WINDOW_MASK: usize = 0x7fff; @@ -78,9 +106,16 @@ pub fn estimate_add_policy(token_blocks: &[PreflateTokenBlock]) -> DictionaryAdd // tracks the maximum length that we've seen that was added to the dictionary if the last match was also added let mut max_length_last_add = 0; + + // same as previous, but tracks if we are inside the 32k boundary + let mut last_outside_32k_seen = false; + let mut current_offset: u32 = 0; const LAST_ADDED: u16 = 0x8000; + const LAST_32K: u16 = 0x4000; + + const MASK: u16 = 0x0fff; let mut min_len = u32::MAX; @@ -113,7 +148,7 @@ pub fn estimate_add_policy(token_blocks: &[PreflateTokenBlock]) -> DictionaryAdd let previous_match = current_window[(current_offset - r.dist()) as usize & WINDOW_MASK]; - let match_length = u32::from(previous_match & !LAST_ADDED); + let match_length = u32::from(previous_match & MASK); max_length = std::cmp::max(max_length, match_length); if (previous_match & LAST_ADDED) == 0 { @@ -121,12 +156,23 @@ pub fn estimate_add_policy(token_blocks: &[PreflateTokenBlock]) -> DictionaryAdd std::cmp::max(max_length_last_add, match_length); } + if match_length != 0 && (previous_match & LAST_32K) == 0 { + last_outside_32k_seen = true; + } + + let last = LAST_ADDED + | if is_at_32k_boundary(r.len(), current_offset) { + LAST_32K + } else { + 0 + }; + current_window[current_offset as usize & WINDOW_MASK] = 0; current_offset += 1; for i in 1..r.len() { current_window[current_offset as usize & WINDOW_MASK] = - r.len() as u16 | if i == r.len() - 1 { LAST_ADDED } else { 0 }; + r.len() as u16 | if i == r.len() - 1 { last } else { 0 }; current_offset += 1; } } @@ -138,6 +184,8 @@ pub fn estimate_add_policy(token_blocks: &[PreflateTokenBlock]) -> DictionaryAdd if max_length == 0 && block_4k { DictionaryAddPolicy::AddFirstExcept4kBoundary + } else if !last_outside_32k_seen { + DictionaryAddPolicy::AddFirstWith32KBoundary } else if max_length_last_add < max_length { DictionaryAddPolicy::AddFirstAndLast(max_length_last_add as u16) } else if max_length < 258 { @@ -180,10 +228,10 @@ fn verify_zlib_level_recognition() { #[test] fn verify_zlibng_level_recognition() { let levels = [ - DictionaryAddPolicy::AddFirstAndLast(0), // 1 quick - DictionaryAddPolicy::AddFirstAndLast(4), // 2 fast - DictionaryAddPolicy::AddFirstAndLast(96), // 3 medium - DictionaryAddPolicy::AddFirstAndLast(191), // 4 medium + DictionaryAddPolicy::AddFirstWith32KBoundary, // 1 quick + DictionaryAddPolicy::AddFirstAndLast(4), // 2 fast + DictionaryAddPolicy::AddFirstAndLast(96), // 3 medium + DictionaryAddPolicy::AddFirstAndLast(191), // 4 medium ]; for i in 1..=4 { @@ -198,7 +246,7 @@ fn verify_zlibng_level_recognition() { /// libflate always adds all matches to the dictionary #[test] -fn verify_libflate_level_recognition() { +fn verify_libdeflate_level_recognition() { for i in 1..=9 { let v = crate::process::read_file(&format!("compressed_libdeflate_level{}.deflate", i)); diff --git a/src/complevel_estimator.rs b/src/complevel_estimator.rs index 3187e05..93e32d3 100644 --- a/src/complevel_estimator.rs +++ b/src/complevel_estimator.rs @@ -262,6 +262,7 @@ impl<'a> CompLevelEstimatorState<'a> { match self.add_policy { DictionaryAddPolicy::AddFirst(_) | DictionaryAddPolicy::AddFirstAndLast(_) + | DictionaryAddPolicy::AddFirstWith32KBoundary | DictionaryAddPolicy::AddFirstExcept4kBoundary => { for config in &ZLIB_PREFLATE_PARSER_SETTINGS { if candidate.max_chain_found() < config.max_chain { diff --git a/src/depth_estimator.rs b/src/depth_estimator.rs index 65de3ec..6afd1c3 100644 --- a/src/depth_estimator.rs +++ b/src/depth_estimator.rs @@ -38,6 +38,9 @@ pub struct HashTableDepthEstimatorImpl { /// hash function used to calculate the hash hash: H, + + /// the dictionary add policy used to update the hash + add_policy: DictionaryAddPolicy, } impl HashTableDepthEstimatorImpl { @@ -45,7 +48,13 @@ impl HashTableDepthEstimatorImpl { /// is only valid if this was part of the same hash chain #[inline] fn get_node_depth(&self, node: u16, expected_hash: u16) -> i32 { - debug_assert_eq!(self.chain_depth_hash_verify[node as usize], expected_hash); + debug_assert_eq!( + self.chain_depth_hash_verify[node as usize], + expected_hash, + "hash chain imcomplete {:?} {:?}", + self.hash.algorithm(), + self.add_policy + ); self.chain_depth[node as usize] } @@ -80,6 +89,7 @@ impl HashTableDepthEstimatorImpl { impl HashTableDepthEstimator for HashTableDepthEstimatorImpl { fn update_hash(&mut self, add_policy: DictionaryAddPolicy, input: &PreflateInput, length: u32) { + self.add_policy = add_policy; add_policy.update_hash( input.cur_chars(0), input.pos(), @@ -113,17 +123,17 @@ impl HashTableDepthEstimator for HashTableDepthEstimatorI } } -/// this algorithm is wierd because it uses the first candidate of the 3 byte match, -/// but then continues with the next 4 bytes. This is used by libflate. +/// Libdeflate is a bit special because it uses the first candidate of the 3 byte match, +/// but then continues with the next 4 bytes. #[derive(DefaultBoxed)] -struct HashTableDepthEstimatorLibflate { +struct HashTableDepthEstimatorLibdeflate { length4: HashTableDepthEstimatorImpl, head3: [u32; 65536], } const LIB_DEFLATE3_HASH: LibdeflateHash3Secondary = LibdeflateHash3Secondary {}; -impl HashTableDepthEstimatorLibflate { +impl HashTableDepthEstimatorLibdeflate { fn internal_update_hash3(&mut self, chars: &[u8], pos: u32, length: u32) { debug_assert!(length as usize <= chars.len()); if length as usize + 3 - 1 >= chars.len() { @@ -139,7 +149,7 @@ impl HashTableDepthEstimatorLibflate { } } -impl HashTableDepthEstimator for HashTableDepthEstimatorLibflate { +impl HashTableDepthEstimator for HashTableDepthEstimatorLibdeflate { fn update_hash(&mut self, add_policy: DictionaryAddPolicy, input: &PreflateInput, length: u32) { add_policy.update_hash( input.cur_chars(0), @@ -184,7 +194,7 @@ pub fn new_depth_estimator(hash_algorithm: HashAlgorithm) -> Box HashTableDepthEstimatorImpl::box_new(MiniZHash {}), - HashAlgorithm::Libdeflate4 => HashTableDepthEstimatorLibflate::default_boxed(), + HashAlgorithm::Libdeflate4 => HashTableDepthEstimatorLibdeflate::default_boxed(), HashAlgorithm::Libdeflate4Fast => HashTableDepthEstimatorImpl::box_new(LibdeflateHash4 {}), HashAlgorithm::ZlibNG => HashTableDepthEstimatorImpl::box_new(ZlibNGHash {}), @@ -207,7 +217,10 @@ fn verify_max_chain_length() { #[rustfmt::skip] let levels = [ - ("compressed_zlibng_level1.deflate", HashAlgorithm::ZlibNG, DictionaryAddPolicy::AddFirstAndLast(0), 23), + ("compressed_zlibng_level1.deflate", HashAlgorithm::Crc32cHash, DictionaryAddPolicy::AddFirstWith32KBoundary, 0), + ("compressed_zlibng_level2.deflate", HashAlgorithm::Crc32cHash, DictionaryAddPolicy::AddFirstAndLast(4), 3), + ("compressed_zlibng_level3.deflate", HashAlgorithm::Crc32cHash, DictionaryAddPolicy::AddFirstAndLast(96), 5), + ("compressed_zlibng_level4.deflate", HashAlgorithm::Crc32cHash, DictionaryAddPolicy::AddFirstAndLast(191), 23), ("compressed_libdeflate_level1.deflate", HashAlgorithm::Libdeflate4Fast, DictionaryAddPolicy::AddAll, 1), ("compressed_libdeflate_level2.deflate", HashAlgorithm::Libdeflate4, DictionaryAddPolicy::AddAll, 6), ("compressed_libdeflate_level3.deflate", HashAlgorithm::Libdeflate4, DictionaryAddPolicy::AddAll, 12), @@ -235,6 +248,14 @@ fn verify_max_chain_length() { let parsed = parse_deflate(&compressed_data, 0).unwrap(); + let add_policy_estimator = crate::add_policy_estimator::estimate_add_policy(&parsed.blocks); + + assert_eq!( + add_policy_estimator, level.2, + "add policy for file {} is incorrect (should be {:?})", + level.0, level.2 + ); + let mut estimator = new_depth_estimator(level.1); let mut input = PreflateInput::new(&parsed.plain_text); diff --git a/src/hash_algorithm.rs b/src/hash_algorithm.rs index 080f2d7..536c811 100644 --- a/src/hash_algorithm.rs +++ b/src/hash_algorithm.rs @@ -25,6 +25,7 @@ pub trait HashImplementation: Default + Copy + Clone { fn get_hash(&self, b: &[u8]) -> u16; fn num_hash_bytes() -> usize; fn new_hash_chain(self) -> Self::HashChainType; + fn algorithm(&self) -> HashAlgorithm; } #[derive(Default, Debug, Copy, Clone, Eq, PartialEq)] @@ -50,6 +51,13 @@ impl HashImplementation for ZlibRotatingHash { fn new_hash_chain(self) -> Self::HashChainType { HashChainNormalize::::new(self) } + + fn algorithm(&self) -> HashAlgorithm { + HashAlgorithm::Zlib { + hash_mask: self.hash_mask, + hash_shift: self.hash_shift, + } + } } #[derive(Default, Copy, Clone)] @@ -74,6 +82,10 @@ impl HashImplementation for MiniZHash { fn new_hash_chain(self) -> Self::HashChainType { crate::hash_chain::HashChainNormalize::::new(self) } + + fn algorithm(&self) -> HashAlgorithm { + HashAlgorithm::MiniZFast + } } /// Fast version of Libflate hash that doesn't use a secondary 3 @@ -97,6 +109,10 @@ impl HashImplementation for LibdeflateHash4Fast { fn new_hash_chain(self) -> Self::HashChainType { HashChainNormalize::::new(self) } + + fn algorithm(&self) -> HashAlgorithm { + HashAlgorithm::Libdeflate4Fast + } } #[derive(Default, Copy, Clone)] @@ -118,6 +134,10 @@ impl HashImplementation for LibdeflateHash4 { fn new_hash_chain(self) -> Self::HashChainType { crate::hash_chain::HashChainNormalizeLibflate4::new() } + + fn algorithm(&self) -> HashAlgorithm { + HashAlgorithm::Libdeflate4 + } } /// This is the 3 byte version of the libdeflate hash algorithm, which is used @@ -142,6 +162,10 @@ impl HashImplementation for LibdeflateHash3Secondary { fn new_hash_chain(self) -> Self::HashChainType { unimplemented!(); } + + fn algorithm(&self) -> HashAlgorithm { + unimplemented!("shoudln't get called on secondary hash"); + } } #[derive(Default, Copy, Clone)] @@ -163,6 +187,10 @@ impl HashImplementation for ZlibNGHash { fn new_hash_chain(self) -> Self::HashChainType { crate::hash_chain::HashChainNormalize::::new(self) } + + fn algorithm(&self) -> HashAlgorithm { + HashAlgorithm::ZlibNG + } } #[derive(Default, Copy, Clone)] @@ -189,61 +217,46 @@ impl HashImplementation for Crc32cHash { fn new_hash_chain(self) -> Self::HashChainType { crate::hash_chain::HashChainNormalize::::new(self) } + + fn algorithm(&self) -> HashAlgorithm { + HashAlgorithm::Crc32cHash + } } -static CRC32C_TABLE : [u32;256] = [ - 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, - 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB, 0x8AD958CF, 0x78B2DBCC, - 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, - 0x5E133C24, 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, - 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384, 0x9A879FA0, - 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, - 0xBC267848, 0x4E4DFB4B, 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, - 0x33ED7D2A, 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, - 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, - 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA, 0x30E349B1, 0xC288CAB2, - 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 0x1642AE59, - 0xE4292D5A, 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, - 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595, 0x417B1DBC, - 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, - 0x67DAFA54, 0x95B17957, 0xCBA24573, 0x39C9C670, 0x2A993584, - 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198, - 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, - 0x64D4CECF, 0x77843D3B, 0x85EFBE38, 0xDBFC821C, 0x2997011F, - 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, - 0x0F36E6F7, 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, - 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789, 0xEB1FCBAD, - 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0x2C855CB2, 0xDEEEDFB1, - 0xCDBE2C45, 0x3FD5AF46, 0x7198540D, 0x83F3D70E, 0x90A324FA, - 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6, - 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, - 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829, 0x82F63B78, 0x709DB87B, - 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90, - 0x563C5F93, 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, - 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C, 0x92A8FC17, - 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, - 0xB4091BFF, 0x466298FC, 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, - 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, - 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, - 0x97BAA1BA, 0x84EA524E, 0x7681D14D, 0x2892ED69, 0xDAF96E6A, - 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, - 0xFC588982, 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, - 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, 0x38CC2A06, - 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, - 0x1E6DCDEE, 0xEC064EED, 0xC38D26C4, 0x31E6A5C7, 0x22B65633, - 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F, - 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, - 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0, 0xD3D3E1AB, 0x21B862A8, - 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, - 0x07198540, 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, - 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F, 0xE330A81A, - 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, - 0xC5914FF2, 0x37FACCF1, 0x69E9F0D5, 0x9B8273D6, 0x88D28022, - 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E, - 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, - 0xC69F7B69, 0xD5CF889D, 0x27A40B9E, 0x79B737BA, 0x8BDCB4B9, - 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, - 0xAD7D5351]; +static CRC32C_TABLE: [u32; 256] = [ + 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB, + 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, + 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384, + 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B, + 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, + 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA, + 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A, + 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595, + 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957, + 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198, + 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38, + 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7, + 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789, + 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46, + 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6, + 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829, + 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93, + 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C, + 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC, + 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, + 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D, + 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982, + 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, + 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED, + 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F, + 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0, + 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540, + 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F, + 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1, + 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E, + 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E, + 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351, +]; /// This vector uses a lookup into a table for random values #[derive(Debug, Default, Copy, Clone, Eq, PartialEq)] @@ -332,4 +345,8 @@ impl HashImplementation for RandomVectorHash { fn new_hash_chain(self) -> Self::HashChainType { Self::HashChainType::new(self) } + + fn algorithm(&self) -> HashAlgorithm { + HashAlgorithm::RandomVector + } } diff --git a/src/preflate_parameter_estimator.rs b/src/preflate_parameter_estimator.rs index a7a7187..acb8ec4 100644 --- a/src/preflate_parameter_estimator.rs +++ b/src/preflate_parameter_estimator.rs @@ -81,11 +81,12 @@ impl PreflateParameters { let max_chain = decoder.decode_value(16); let min_len = decoder.decode_value(16); - let add_policy = match decoder.decode_value(2) { + let add_policy = match decoder.decode_value(3) { 0 => DictionaryAddPolicy::AddAll, 1 => DictionaryAddPolicy::AddFirst(decoder.decode_value(8)), 2 => DictionaryAddPolicy::AddFirstAndLast(decoder.decode_value(8)), 3 => DictionaryAddPolicy::AddFirstExcept4kBoundary, + 4 => DictionaryAddPolicy::AddFirstWith32KBoundary, _ => panic!("invalid add policy"), }; @@ -216,17 +217,20 @@ impl PreflateParameters { encoder.encode_value(u16::try_from(self.predictor.min_len).unwrap(), 16); match self.predictor.add_policy { - DictionaryAddPolicy::AddAll => encoder.encode_value(0, 2), + DictionaryAddPolicy::AddAll => encoder.encode_value(0, 3), DictionaryAddPolicy::AddFirst(v) => { - encoder.encode_value(1, 2); + encoder.encode_value(1, 3); encoder.encode_value(v as u16, 8); } DictionaryAddPolicy::AddFirstAndLast(v) => { - encoder.encode_value(2, 2); + encoder.encode_value(2, 3); encoder.encode_value(v as u16, 8); } DictionaryAddPolicy::AddFirstExcept4kBoundary => { - encoder.encode_value(3, 2); + encoder.encode_value(3, 3); + } + DictionaryAddPolicy::AddFirstWith32KBoundary => { + encoder.encode_value(4, 3); } } } diff --git a/src/preflate_token.rs b/src/preflate_token.rs index d6cc96b..dbdef2f 100644 --- a/src/preflate_token.rs +++ b/src/preflate_token.rs @@ -112,13 +112,18 @@ impl PreflateTokenBlock { pub fn add_literal(&mut self, lit: u8) { self.tokens.push(PreflateToken::Literal(lit)); - self.freq.literal_codes[lit as usize] += 1; + if self.block_type == BlockType::DynamicHuff { + self.freq.literal_codes[lit as usize] += 1; + } } pub fn add_reference(&mut self, len: u32, dist: u32, irregular258: bool) { self.tokens .push(PreflateToken::new_reference(len, dist, irregular258)); - self.freq.literal_codes[NONLEN_CODE_COUNT + quantize_length(len)] += 1; - self.freq.distance_codes[quantize_distance(dist)] += 1; + + if self.block_type == BlockType::DynamicHuff { + self.freq.literal_codes[NONLEN_CODE_COUNT + quantize_length(len)] += 1; + self.freq.distance_codes[quantize_distance(dist)] += 1; + } } }