Skip to content

Commit

Permalink
huffman weights may only use a fse compression with 6 bits
Browse files Browse the repository at this point in the history
  • Loading branch information
KillingSpark committed Oct 17, 2024
1 parent c4fabda commit 1cf903f
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 8 deletions.
8 changes: 4 additions & 4 deletions src/fse/fse_encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,15 +193,15 @@ impl State {
}
}

pub fn build_table_from_data(data: &[u8], avoid_0_numbit: bool) -> FSETable {
pub fn build_table_from_data(data: &[u8], max_log: usize, avoid_0_numbit: bool) -> FSETable {
let mut counts = [0; 256];
for x in data {
counts[*x as usize] += 1;
}
build_table_from_counts(&counts, avoid_0_numbit)
build_table_from_counts(&counts, max_log, avoid_0_numbit)
}

fn build_table_from_counts(counts: &[usize], avoid_0_numbit: bool) -> FSETable {
fn build_table_from_counts(counts: &[usize], max_log: usize, avoid_0_numbit: bool) -> FSETable {
let mut probs = [0; 256];
let mut min_count = 0;
for (idx, count) in counts.iter().copied().enumerate() {
Expand All @@ -224,7 +224,7 @@ fn build_table_from_counts(counts: &[usize], avoid_0_numbit: bool) -> FSETable {
assert!(sum > 0);
let sum = sum as usize;
let acc_log = (sum.ilog2() as u8 + 1).max(5);
assert!(acc_log < 22); // TODO implement logic to decrease some counts until this fits
assert!(acc_log < max_log as u8); // TODO implement logic to decrease some counts until this fits

// just raise the maximum probability as much as possible
// TODO is this optimal?
Expand Down
2 changes: 1 addition & 1 deletion src/fse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ pub fn round_trip(data: &[u8]) {
return;
}

let mut encoder: FSEEncoder = FSEEncoder::new(fse_encoder::build_table_from_data(data, false));
let mut encoder: FSEEncoder = FSEEncoder::new(fse_encoder::build_table_from_data(data, 22, false));
let mut dec_table = FSETable::new(255);

let encoded = encoder.encode(data);
Expand Down
2 changes: 1 addition & 1 deletion src/huff0/huff0_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ impl HuffmanTable {
//fse decompress weights
let bytes_used_by_fse_header = self
.fse_table
.build_decoder(fse_stream, /*TODO find actual max*/ 100)?;
.build_decoder(fse_stream, 6)?;

if bytes_used_by_fse_header > header as usize {
return Err(err::FSETableUsedTooManyBytes {
Expand Down
3 changes: 1 addition & 2 deletions src/huff0/huff0_encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,10 @@ impl HuffmanEncoder {
// TODO strategy for determining this?
let weights = self.weights();
let weights = &weights[..weights.len() - 1]; // dont encode last weight

if weights.len() > 16 {
// TODO share output vec between encoders
// TODO assert that no 0 num_bit states are generated here
let mut encoder = FSEEncoder::new(fse_encoder::build_table_from_data(&weights, true));
let mut encoder = FSEEncoder::new(fse_encoder::build_table_from_data(&weights, 6, true));
let encoded = encoder.encode_interleaved(&weights);
assert!(encoded.len() < 128);
self.writer.write_bits(encoded.len() as u8, 8);
Expand Down

0 comments on commit 1cf903f

Please sign in to comment.