Skip to content

Commit

Permalink
Minor refactoring. Introducing a codec type enum. (#1477)
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton authored Aug 24, 2022
1 parent 513f682 commit 8bbb22e
Show file tree
Hide file tree
Showing 10 changed files with 196 additions and 156 deletions.
12 changes: 6 additions & 6 deletions fastfield_codecs/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ extern crate test;
#[cfg(test)]
mod tests {
use fastfield_codecs::bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer};
use fastfield_codecs::linearinterpol::{
LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer,
use fastfield_codecs::blockwise_linear::{
BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader,
};
use fastfield_codecs::multilinearinterpol::{
MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
use fastfield_codecs::linear::{
LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer,
};
use fastfield_codecs::*;

Expand Down Expand Up @@ -64,7 +64,7 @@ mod tests {
#[bench]
fn bench_fastfield_multilinearinterpol_create(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_create::<MultiLinearInterpolFastFieldSerializer>(b, &data);
bench_create::<BlockwiseLinearInterpolFastFieldSerializer>(b, &data);
}
#[bench]
fn bench_fastfield_bitpack_get(b: &mut Bencher) {
Expand All @@ -79,7 +79,7 @@ mod tests {
#[bench]
fn bench_fastfield_multilinearinterpol_get(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_get::<MultiLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(
bench_get::<BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(
b, &data,
);
}
Expand Down
9 changes: 6 additions & 3 deletions fastfield_codecs/src/bitpacked.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ use common::BinarySerializable;
use ownedbytes::OwnedBytes;
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};

use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess};
use crate::{
FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType, FastFieldDataAccess,
};

/// Depending on the field type, a different
/// fast field is required.
Expand Down Expand Up @@ -99,8 +101,9 @@ impl<'a, W: Write> BitpackedFastFieldSerializerLegacy<'a, W> {
pub struct BitpackedFastFieldSerializer {}

impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
const NAME: &'static str = "Bitpacked";
const ID: u8 = 1;
/// The CODEC_TYPE is an enum value used for serialization.
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::Bitpacked;

/// Serializes data with the BitpackedFastFieldSerializer.
///
/// The serializer in fact encode the values by bitpacking
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ use common::{BinarySerializable, CountingWriter, DeserializeFrom};
use ownedbytes::OwnedBytes;
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};

use crate::linearinterpol::{get_calculated_value, get_slope};
use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess};
use crate::linear::{get_calculated_value, get_slope};
use crate::{
FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType, FastFieldDataAccess,
};

const CHUNK_SIZE: u64 = 512;

Expand Down Expand Up @@ -179,11 +181,10 @@ impl FastFieldCodecReader for MultiLinearInterpolFastFieldReader {
}

/// Same as LinearInterpolFastFieldSerializer, but working on chunks of CHUNK_SIZE elements.
pub struct MultiLinearInterpolFastFieldSerializer {}
pub struct BlockwiseLinearInterpolFastFieldSerializer {}

impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
const NAME: &'static str = "MultiLinearInterpol";
const ID: u8 = 3;
impl FastFieldCodecSerializer for BlockwiseLinearInterpolFastFieldSerializer {
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::BlockwiseLinearInterpol;
/// Creates a new fast field serializer.
fn serialize(
write: &mut impl Write,
Expand Down Expand Up @@ -359,7 +360,7 @@ mod tests {

fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
crate::tests::create_and_validate::<
MultiLinearInterpolFastFieldSerializer,
BlockwiseLinearInterpolFastFieldSerializer,
MultiLinearInterpolFastFieldReader,
>(data, name)
}
Expand Down
91 changes: 69 additions & 22 deletions fastfield_codecs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@ extern crate more_asserts;
use std::io;
use std::io::Write;

use common::BinarySerializable;
use ownedbytes::OwnedBytes;

pub mod bitpacked;
pub mod linearinterpol;
pub mod multilinearinterpol;
pub mod blockwise_linear;
pub mod linear;

pub trait FastFieldCodecReader: Sized {
/// reads the metadata and returns the CodecReader
Expand All @@ -19,13 +20,50 @@ pub trait FastFieldCodecReader: Sized {
fn max_value(&self) -> u64;
}

#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
#[repr(u8)]
pub enum FastFieldCodecType {
Bitpacked = 1,
LinearInterpol = 2,
BlockwiseLinearInterpol = 3,
Gcd = 4,
}

impl BinarySerializable for FastFieldCodecType {
fn serialize<W: Write>(&self, wrt: &mut W) -> io::Result<()> {
self.to_code().serialize(wrt)
}

fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
let code = u8::deserialize(reader)?;
let codec_type: Self = Self::from_code(code)
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Unknown code `{code}.`"))?;
Ok(codec_type)
}
}

impl FastFieldCodecType {
pub fn to_code(self) -> u8 {
self as u8
}

pub fn from_code(code: u8) -> Option<Self> {
match code {
1 => Some(Self::Bitpacked),
2 => Some(Self::LinearInterpol),
3 => Some(Self::BlockwiseLinearInterpol),
4 => Some(Self::Gcd),
_ => None,
}
}
}

/// The FastFieldSerializerEstimate trait is required on all variants
/// of fast field compressions, to decide which one to choose.
pub trait FastFieldCodecSerializer {
/// A codex needs to provide a unique name and id, which is
/// used for debugging and de/serialization.
const NAME: &'static str;
const ID: u8;
const CODEC_TYPE: FastFieldCodecType;

/// Check if the Codec is able to compress the data
fn is_applicable(fastfield_accessor: &impl FastFieldDataAccess) -> bool;
Expand Down Expand Up @@ -128,10 +166,10 @@ mod tests {
use proptest::proptest;

use crate::bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer};
use crate::linearinterpol::{LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer};
use crate::multilinearinterpol::{
MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
use crate::blockwise_linear::{
BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader,
};
use crate::linear::{LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer};

pub fn create_and_validate<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
data: &[u64],
Expand All @@ -151,8 +189,8 @@ mod tests {
let val = reader.get_u64(doc as u64);
if val != *orig_val {
panic!(
"val {:?} does not match orig_val {:?}, in data set {}, data {:?}",
val, orig_val, name, data
"val {val:?} does not match orig_val {orig_val:?}, in data set {name}, data \
{data:?}",
);
}
}
Expand All @@ -163,14 +201,14 @@ mod tests {
#[test]
fn test_proptest_small(data in proptest::collection::vec(any::<u64>(), 1..10)) {
create_and_validate::<LinearInterpolFastFieldSerializer, LinearInterpolFastFieldReader>(&data, "proptest linearinterpol");
create_and_validate::<MultiLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(&data, "proptest bitpacked");
}

#[test]
fn test_proptest_large(data in proptest::collection::vec(any::<u64>(), 1..6000)) {
create_and_validate::<LinearInterpolFastFieldSerializer, LinearInterpolFastFieldReader>(&data, "proptest linearinterpol");
create_and_validate::<MultiLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(&data, "proptest bitpacked");
}

Expand All @@ -193,19 +231,15 @@ mod tests {
}

fn test_codec<S: FastFieldCodecSerializer, R: FastFieldCodecReader>() {
let codec_name = S::NAME;
for (data, data_set_name) in get_codec_test_data_sets() {
let (estimate, actual) =
crate::tests::create_and_validate::<S, R>(&data, data_set_name);
let codec_name = format!("{:?}", S::CODEC_TYPE);
for (data, dataset_name) in get_codec_test_data_sets() {
let (estimate, actual) = crate::tests::create_and_validate::<S, R>(&data, dataset_name);
let result = if estimate == f32::MAX {
"Disabled".to_string()
} else {
format!("Estimate {:?} Actual {:?} ", estimate, actual)
format!("Estimate `{estimate}` Actual `{actual}`")
};
println!(
"Codec {}, DataSet {}, {}",
codec_name, data_set_name, result
);
println!("Codec {codec_name}, DataSet {dataset_name}, {result}");
}
}
#[test]
Expand All @@ -218,7 +252,8 @@ mod tests {
}
#[test]
fn test_codec_multi_interpolation() {
test_codec::<MultiLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>();
test_codec::<BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(
);
}

use super::*;
Expand All @@ -231,7 +266,7 @@ mod tests {
assert_le!(linear_interpol_estimation, 0.01);

let multi_linear_interpol_estimation =
MultiLinearInterpolFastFieldSerializer::estimate(&data);
BlockwiseLinearInterpolFastFieldSerializer::estimate(&data);
assert_le!(multi_linear_interpol_estimation, 0.2);
assert_le!(linear_interpol_estimation, multi_linear_interpol_estimation);

Expand Down Expand Up @@ -262,4 +297,16 @@ mod tests {
assert_le!(bitpacked_estimation, 0.32);
assert_le!(bitpacked_estimation, linear_interpol_estimation);
}

#[test]
fn test_fast_field_codec_type_to_code() {
let mut count_codec = 0;
for code in 0..=255 {
if let Some(codec_type) = FastFieldCodecType::from_code(code) {
assert_eq!(codec_type.to_code(), code);
count_codec += 1;
}
}
assert_eq!(count_codec, 4);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ use common::{BinarySerializable, FixedSize};
use ownedbytes::OwnedBytes;
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};

use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess};
use crate::{
FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType, FastFieldDataAccess,
};

/// Depending on the field type, a different
/// fast field is required.
Expand Down Expand Up @@ -133,8 +135,8 @@ pub fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
}

impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
const NAME: &'static str = "LinearInterpol";
const ID: u8 = 2;
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::LinearInterpol;

/// Creates a new fast field serializer.
fn serialize(
write: &mut impl Write,
Expand Down
18 changes: 9 additions & 9 deletions fastfield_codecs/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#[macro_use]
extern crate prettytable;
use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer;
use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
use fastfield_codecs::{FastFieldCodecSerializer, FastFieldStats};
use fastfield_codecs::blockwise_linear::BlockwiseLinearInterpolFastFieldSerializer;
use fastfield_codecs::linear::LinearInterpolFastFieldSerializer;
use fastfield_codecs::{FastFieldCodecSerializer, FastFieldCodecType, FastFieldStats};
use prettytable::{Cell, Row, Table};

fn main() {
Expand All @@ -15,7 +15,7 @@ fn main() {
let mut results = vec![];
let res = serialize_with_codec::<LinearInterpolFastFieldSerializer>(&data);
results.push(res);
let res = serialize_with_codec::<MultiLinearInterpolFastFieldSerializer>(&data);
let res = serialize_with_codec::<BlockwiseLinearInterpolFastFieldSerializer>(&data);
results.push(res);
let res = serialize_with_codec::<fastfield_codecs::bitpacked::BitpackedFastFieldSerializer>(
&data,
Expand All @@ -33,7 +33,7 @@ fn main() {
.unwrap();

table.add_row(Row::new(vec![Cell::new(data_set_name).style_spec("Bbb")]));
for (is_applicable, est, comp, name) in results {
for (is_applicable, est, comp, codec_type) in results {
let (est_cell, ratio_cell) = if !is_applicable {
("Codec Disabled".to_string(), "".to_string())
} else {
Expand All @@ -46,7 +46,7 @@ fn main() {
};

table.add_row(Row::new(vec![
Cell::new(name).style_spec("bFg"),
Cell::new(&format!("{codec_type:?}")).style_spec("bFg"),
Cell::new(&ratio_cell).style_spec(style),
Cell::new(&est_cell).style_spec(""),
]));
Expand Down Expand Up @@ -93,17 +93,17 @@ pub fn get_codec_test_data_sets() -> Vec<(Vec<u64>, &'static str)> {

pub fn serialize_with_codec<S: FastFieldCodecSerializer>(
data: &[u64],
) -> (bool, f32, f32, &'static str) {
) -> (bool, f32, f32, FastFieldCodecType) {
let is_applicable = S::is_applicable(&data);
if !is_applicable {
return (false, 0.0, 0.0, S::NAME);
return (false, 0.0, 0.0, S::CODEC_TYPE);
}
let estimation = S::estimate(&data);
let mut out = vec![];
S::serialize(&mut out, &data).unwrap();

let actual_compression = out.len() as f32 / (data.len() * 8) as f32;
(true, estimation, actual_compression, S::NAME)
(true, estimation, actual_compression, S::CODEC_TYPE)
}

pub fn stats_from_vec(data: &[u64]) -> FastFieldStats {
Expand Down
Loading

0 comments on commit 8bbb22e

Please sign in to comment.