Skip to content

Commit

Permalink
Rename fastfield codecs (#1483)
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton authored Aug 25, 2022
1 parent f908549 commit d8f66ba
Show file tree
Hide file tree
Showing 9 changed files with 87 additions and 111 deletions.
20 changes: 9 additions & 11 deletions fastfield_codecs/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@ extern crate test;

#[cfg(test)]
mod tests {
use fastfield_codecs::bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer};
use fastfield_codecs::blockwise_linear::{
BlockwiseLinearFastFieldReader, BlockwiseLinearFastFieldSerializer,
};
use fastfield_codecs::linear::{LinearFastFieldReader, LinearFastFieldSerializer};
use fastfield_codecs::bitpacked::{BitpackedReader, BitpackedSerializer};
use fastfield_codecs::blockwise_linear::{BlockwiseLinearReader, BlockwiseLinearSerializer};
use fastfield_codecs::linear::{LinearReader, LinearSerializer};
use fastfield_codecs::*;

fn get_data() -> Vec<u64> {
Expand Down Expand Up @@ -52,32 +50,32 @@ mod tests {
#[bench]
fn bench_fastfield_bitpack_create(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_create::<BitpackedFastFieldSerializer>(b, &data);
bench_create::<BitpackedSerializer>(b, &data);
}
#[bench]
fn bench_fastfield_linearinterpol_create(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_create::<LinearFastFieldSerializer>(b, &data);
bench_create::<LinearSerializer>(b, &data);
}
#[bench]
fn bench_fastfield_multilinearinterpol_create(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_create::<BlockwiseLinearFastFieldSerializer>(b, &data);
bench_create::<BlockwiseLinearSerializer>(b, &data);
}
#[bench]
fn bench_fastfield_bitpack_get(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_get::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(b, &data);
bench_get::<BitpackedSerializer, BitpackedReader>(b, &data);
}
#[bench]
fn bench_fastfield_linearinterpol_get(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_get::<LinearFastFieldSerializer, LinearFastFieldReader>(b, &data);
bench_get::<LinearSerializer, LinearReader>(b, &data);
}
#[bench]
fn bench_fastfield_multilinearinterpol_get(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_get::<BlockwiseLinearFastFieldSerializer, BlockwiseLinearFastFieldReader>(b, &data);
bench_get::<BlockwiseLinearSerializer, BlockwiseLinearReader>(b, &data);
}
pub fn stats_from_vec(data: &[u64]) -> FastFieldStats {
let min_value = data.iter().cloned().min().unwrap_or(0);
Expand Down
24 changes: 11 additions & 13 deletions fastfield_codecs/src/bitpacked.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ use crate::{
/// Depending on the field type, a different
/// fast field is required.
#[derive(Clone)]
pub struct BitpackedFastFieldReader {
pub struct BitpackedReader {
data: OwnedBytes,
bit_unpacker: BitUnpacker,
pub min_value_u64: u64,
pub max_value_u64: u64,
}

impl FastFieldCodecReader for BitpackedFastFieldReader {
impl FastFieldCodecReader for BitpackedReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
let footer_offset = bytes.len() - 16;
Expand All @@ -28,7 +28,7 @@ impl FastFieldCodecReader for BitpackedFastFieldReader {
let max_value = min_value + amplitude;
let num_bits = compute_num_bits(amplitude);
let bit_unpacker = BitUnpacker::new(num_bits);
Ok(BitpackedFastFieldReader {
Ok(BitpackedReader {
data,
min_value_u64: min_value,
max_value_u64: max_value,
Expand All @@ -48,15 +48,15 @@ impl FastFieldCodecReader for BitpackedFastFieldReader {
self.max_value_u64
}
}
pub struct BitpackedFastFieldSerializerLegacy<'a, W: 'a + Write> {
pub struct BitpackedSerializerLegacy<'a, W: 'a + Write> {
bit_packer: BitPacker,
write: &'a mut W,
min_value: u64,
amplitude: u64,
num_bits: u8,
}

impl<'a, W: Write> BitpackedFastFieldSerializerLegacy<'a, W> {
impl<'a, W: Write> BitpackedSerializerLegacy<'a, W> {
/// Creates a new fast field serializer.
///
/// The serializer in fact encode the values by bitpacking
Expand All @@ -69,12 +69,12 @@ impl<'a, W: Write> BitpackedFastFieldSerializerLegacy<'a, W> {
write: &'a mut W,
min_value: u64,
max_value: u64,
) -> io::Result<BitpackedFastFieldSerializerLegacy<'a, W>> {
) -> io::Result<BitpackedSerializerLegacy<'a, W>> {
assert!(min_value <= max_value);
let amplitude = max_value - min_value;
let num_bits = compute_num_bits(amplitude);
let bit_packer = BitPacker::new();
Ok(BitpackedFastFieldSerializerLegacy {
Ok(BitpackedSerializerLegacy {
bit_packer,
write,
min_value,
Expand All @@ -98,9 +98,9 @@ impl<'a, W: Write> BitpackedFastFieldSerializerLegacy<'a, W> {
}
}

pub struct BitpackedFastFieldSerializer {}
pub struct BitpackedSerializer {}

impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
impl FastFieldCodecSerializer for BitpackedSerializer {
/// The CODEC_TYPE is an enum value used for serialization.
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::Bitpacked;

Expand All @@ -116,7 +116,7 @@ impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
write: &mut impl Write,
fastfield_accessor: &dyn FastFieldDataAccess,
) -> io::Result<()> {
let mut serializer = BitpackedFastFieldSerializerLegacy::open(
let mut serializer = BitpackedSerializerLegacy::open(
write,
fastfield_accessor.min_value(),
fastfield_accessor.max_value(),
Expand Down Expand Up @@ -146,9 +146,7 @@ mod tests {
use crate::tests::get_codec_test_data_sets;

fn create_and_validate(data: &[u64], name: &str) {
crate::tests::create_and_validate::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(
data, name,
);
crate::tests::create_and_validate::<BitpackedSerializer, BitpackedReader>(data, name);
}

#[test]
Expand Down
19 changes: 9 additions & 10 deletions fastfield_codecs/src/blockwise_linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ const CHUNK_SIZE: u64 = 512;
/// Depending on the field type, a different
/// fast field is required.
#[derive(Clone)]
pub struct BlockwiseLinearFastFieldReader {
pub struct BlockwiseLinearReader {
data: OwnedBytes,
pub footer: BlockwiseLinearFooter,
}
Expand Down Expand Up @@ -148,14 +148,14 @@ fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Functio
&interpolations[get_interpolation_position(doc)]
}

impl FastFieldCodecReader for BlockwiseLinearFastFieldReader {
impl FastFieldCodecReader for BlockwiseLinearReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?;
let footer_offset = bytes.len() - 4 - footer_len as usize;
let (data, mut footer) = bytes.split(footer_offset);
let footer = BlockwiseLinearFooter::deserialize(&mut footer)?;
Ok(BlockwiseLinearFastFieldReader { data, footer })
Ok(BlockwiseLinearReader { data, footer })
}

#[inline]
Expand All @@ -180,10 +180,10 @@ impl FastFieldCodecReader for BlockwiseLinearFastFieldReader {
}
}

/// Same as LinearInterpolFastFieldSerializer, but working on chunks of CHUNK_SIZE elements.
pub struct BlockwiseLinearFastFieldSerializer {}
/// Same as LinearSerializer, but working on chunks of CHUNK_SIZE elements.
pub struct BlockwiseLinearSerializer {}

impl FastFieldCodecSerializer for BlockwiseLinearFastFieldSerializer {
impl FastFieldCodecSerializer for BlockwiseLinearSerializer {
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::BlockwiseLinear;
/// Creates a new fast field serializer.
fn serialize(
Expand Down Expand Up @@ -359,10 +359,9 @@ mod tests {
use crate::tests::get_codec_test_data_sets;

fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
crate::tests::create_and_validate::<
BlockwiseLinearFastFieldSerializer,
BlockwiseLinearFastFieldReader,
>(data, name)
crate::tests::create_and_validate::<BlockwiseLinearSerializer, BlockwiseLinearReader>(
data, name,
)
}

const HIGHEST_BIT: u64 = 1 << 63;
Expand Down
40 changes: 19 additions & 21 deletions fastfield_codecs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,9 @@ mod tests {
use proptest::arbitrary::any;
use proptest::proptest;

use crate::bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer};
use crate::blockwise_linear::{
BlockwiseLinearFastFieldReader, BlockwiseLinearFastFieldSerializer,
};
use crate::linear::{LinearFastFieldReader, LinearFastFieldSerializer};
use crate::bitpacked::{BitpackedReader, BitpackedSerializer};
use crate::blockwise_linear::{BlockwiseLinearReader, BlockwiseLinearSerializer};
use crate::linear::{LinearReader, LinearSerializer};

pub fn create_and_validate<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
data: &[u64],
Expand Down Expand Up @@ -200,16 +198,16 @@ mod tests {
proptest! {
#[test]
fn test_proptest_small(data in proptest::collection::vec(any::<u64>(), 1..10)) {
create_and_validate::<LinearFastFieldSerializer, LinearFastFieldReader>(&data, "proptest linearinterpol");
create_and_validate::<BlockwiseLinearFastFieldSerializer, BlockwiseLinearFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(&data, "proptest bitpacked");
create_and_validate::<LinearSerializer, LinearReader>(&data, "proptest linearinterpol");
create_and_validate::<BlockwiseLinearSerializer, BlockwiseLinearReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BitpackedSerializer, BitpackedReader>(&data, "proptest bitpacked");
}

#[test]
fn test_proptest_large(data in proptest::collection::vec(any::<u64>(), 1..6000)) {
create_and_validate::<LinearFastFieldSerializer, LinearFastFieldReader>(&data, "proptest linearinterpol");
create_and_validate::<BlockwiseLinearFastFieldSerializer, BlockwiseLinearFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(&data, "proptest bitpacked");
create_and_validate::<LinearSerializer, LinearReader>(&data, "proptest linearinterpol");
create_and_validate::<BlockwiseLinearSerializer, BlockwiseLinearReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BitpackedSerializer, BitpackedReader>(&data, "proptest bitpacked");
}

}
Expand Down Expand Up @@ -244,15 +242,15 @@ mod tests {
}
#[test]
fn test_codec_bitpacking() {
test_codec::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>();
test_codec::<BitpackedSerializer, BitpackedReader>();
}
#[test]
fn test_codec_interpolation() {
test_codec::<LinearFastFieldSerializer, LinearFastFieldReader>();
test_codec::<LinearSerializer, LinearReader>();
}
#[test]
fn test_codec_multi_interpolation() {
test_codec::<BlockwiseLinearFastFieldSerializer, BlockwiseLinearFastFieldReader>();
test_codec::<BlockwiseLinearSerializer, BlockwiseLinearReader>();
}

use super::*;
Expand All @@ -261,24 +259,24 @@ mod tests {
fn estimation_good_interpolation_case() {
let data = (10..=20000_u64).collect::<Vec<_>>();

let linear_interpol_estimation = LinearFastFieldSerializer::estimate(&data);
let linear_interpol_estimation = LinearSerializer::estimate(&data);
assert_le!(linear_interpol_estimation, 0.01);

let multi_linear_interpol_estimation = BlockwiseLinearFastFieldSerializer::estimate(&data);
let multi_linear_interpol_estimation = BlockwiseLinearSerializer::estimate(&data);
assert_le!(multi_linear_interpol_estimation, 0.2);
assert_le!(linear_interpol_estimation, multi_linear_interpol_estimation);

let bitpacked_estimation = BitpackedFastFieldSerializer::estimate(&data);
let bitpacked_estimation = BitpackedSerializer::estimate(&data);
assert_le!(linear_interpol_estimation, bitpacked_estimation);
}
#[test]
fn estimation_test_bad_interpolation_case() {
let data = vec![200, 10, 10, 10, 10, 1000, 20];

let linear_interpol_estimation = LinearFastFieldSerializer::estimate(&data);
let linear_interpol_estimation = LinearSerializer::estimate(&data);
assert_le!(linear_interpol_estimation, 0.32);

let bitpacked_estimation = BitpackedFastFieldSerializer::estimate(&data);
let bitpacked_estimation = BitpackedSerializer::estimate(&data);
assert_le!(bitpacked_estimation, linear_interpol_estimation);
}
#[test]
Expand All @@ -288,10 +286,10 @@ mod tests {

// in this case the linear interpolation can't in fact not be worse than bitpacking,
// but the estimator adds some threshold, which leads to estimated worse behavior
let linear_interpol_estimation = LinearFastFieldSerializer::estimate(&data);
let linear_interpol_estimation = LinearSerializer::estimate(&data);
assert_le!(linear_interpol_estimation, 0.35);

let bitpacked_estimation = BitpackedFastFieldSerializer::estimate(&data);
let bitpacked_estimation = BitpackedSerializer::estimate(&data);
assert_le!(bitpacked_estimation, 0.32);
assert_le!(bitpacked_estimation, linear_interpol_estimation);
}
Expand Down
14 changes: 6 additions & 8 deletions fastfield_codecs/src/linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::{
/// Depending on the field type, a different
/// fast field is required.
#[derive(Clone)]
pub struct LinearFastFieldReader {
pub struct LinearReader {
data: OwnedBytes,
bit_unpacker: BitUnpacker,
pub footer: LinearFooter,
Expand Down Expand Up @@ -59,7 +59,7 @@ impl FixedSize for LinearFooter {
const SIZE_IN_BYTES: usize = 56;
}

impl FastFieldCodecReader for LinearFastFieldReader {
impl FastFieldCodecReader for LinearReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
let footer_offset = bytes.len() - LinearFooter::SIZE_IN_BYTES;
Expand All @@ -68,7 +68,7 @@ impl FastFieldCodecReader for LinearFastFieldReader {
let slope = get_slope(footer.first_val, footer.last_val, footer.num_vals);
let num_bits = compute_num_bits(footer.relative_max_value);
let bit_unpacker = BitUnpacker::new(num_bits);
Ok(LinearFastFieldReader {
Ok(LinearReader {
data,
bit_unpacker,
footer,
Expand All @@ -93,7 +93,7 @@ impl FastFieldCodecReader for LinearFastFieldReader {

/// Fastfield serializer, which tries to guess values by linear interpolation
/// and stores the difference bitpacked.
pub struct LinearFastFieldSerializer {}
pub struct LinearSerializer {}

#[inline]
pub(crate) fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
Expand Down Expand Up @@ -134,7 +134,7 @@ pub fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
}
}

impl FastFieldCodecSerializer for LinearFastFieldSerializer {
impl FastFieldCodecSerializer for LinearSerializer {
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::Linear;

/// Creates a new fast field serializer.
Expand Down Expand Up @@ -260,9 +260,7 @@ mod tests {
use crate::tests::get_codec_test_data_sets;

fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
crate::tests::create_and_validate::<LinearFastFieldSerializer, LinearFastFieldReader>(
data, name,
)
crate::tests::create_and_validate::<LinearSerializer, LinearReader>(data, name)
}

#[test]
Expand Down
12 changes: 5 additions & 7 deletions fastfield_codecs/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#[macro_use]
extern crate prettytable;
use fastfield_codecs::blockwise_linear::BlockwiseLinearFastFieldSerializer;
use fastfield_codecs::linear::LinearFastFieldSerializer;
use fastfield_codecs::blockwise_linear::BlockwiseLinearSerializer;
use fastfield_codecs::linear::LinearSerializer;
use fastfield_codecs::{FastFieldCodecSerializer, FastFieldCodecType, FastFieldStats};
use prettytable::{Cell, Row, Table};

Expand All @@ -13,13 +13,11 @@ fn main() {

for (data, data_set_name) in get_codec_test_data_sets() {
let mut results = vec![];
let res = serialize_with_codec::<LinearFastFieldSerializer>(&data);
let res = serialize_with_codec::<LinearSerializer>(&data);
results.push(res);
let res = serialize_with_codec::<BlockwiseLinearFastFieldSerializer>(&data);
let res = serialize_with_codec::<BlockwiseLinearSerializer>(&data);
results.push(res);
let res = serialize_with_codec::<fastfield_codecs::bitpacked::BitpackedFastFieldSerializer>(
&data,
);
let res = serialize_with_codec::<fastfield_codecs::bitpacked::BitpackedSerializer>(&data);
results.push(res);

// let best_estimation_codec = results
Expand Down
4 changes: 2 additions & 2 deletions src/fastfield/multivalued/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::io;
use fnv::FnvHashMap;
use tantivy_bitpacker::minmax;

use crate::fastfield::serializer::BitpackedFastFieldSerializerLegacy;
use crate::fastfield::serializer::BitpackedSerializerLegacy;
use crate::fastfield::{value_to_u64, CompositeFastFieldSerializer, FastFieldType, FastValue};
use crate::indexer::doc_id_mapping::DocIdMapping;
use crate::postings::UnorderedTermId;
Expand Down Expand Up @@ -171,7 +171,7 @@ impl MultiValuedFastFieldWriter {
}
{
// writing the values themselves.
let mut value_serializer: BitpackedFastFieldSerializerLegacy<'_, _>;
let mut value_serializer: BitpackedSerializerLegacy<'_, _>;
if let Some(mapping) = mapping_opt {
value_serializer = serializer.new_u64_fast_field_with_idx(
self.field,
Expand Down
Loading

0 comments on commit d8f66ba

Please sign in to comment.