Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename fastfield codecs #1483

Merged
merged 1 commit into from
Aug 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions fastfield_codecs/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@ extern crate test;

#[cfg(test)]
mod tests {
use fastfield_codecs::bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer};
use fastfield_codecs::blockwise_linear::{
BlockwiseLinearFastFieldReader, BlockwiseLinearFastFieldSerializer,
};
use fastfield_codecs::linear::{LinearFastFieldReader, LinearFastFieldSerializer};
use fastfield_codecs::bitpacked::{BitpackedReader, BitpackedSerializer};
use fastfield_codecs::blockwise_linear::{BlockwiseLinearReader, BlockwiseLinearSerializer};
use fastfield_codecs::linear::{LinearReader, LinearSerializer};
use fastfield_codecs::*;

fn get_data() -> Vec<u64> {
Expand Down Expand Up @@ -52,32 +50,32 @@ mod tests {
#[bench]
fn bench_fastfield_bitpack_create(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_create::<BitpackedFastFieldSerializer>(b, &data);
bench_create::<BitpackedSerializer>(b, &data);
}
#[bench]
fn bench_fastfield_linearinterpol_create(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_create::<LinearFastFieldSerializer>(b, &data);
bench_create::<LinearSerializer>(b, &data);
}
#[bench]
fn bench_fastfield_multilinearinterpol_create(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_create::<BlockwiseLinearFastFieldSerializer>(b, &data);
bench_create::<BlockwiseLinearSerializer>(b, &data);
}
#[bench]
fn bench_fastfield_bitpack_get(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_get::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(b, &data);
bench_get::<BitpackedSerializer, BitpackedReader>(b, &data);
}
#[bench]
fn bench_fastfield_linearinterpol_get(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_get::<LinearFastFieldSerializer, LinearFastFieldReader>(b, &data);
bench_get::<LinearSerializer, LinearReader>(b, &data);
}
#[bench]
fn bench_fastfield_multilinearinterpol_get(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_get::<BlockwiseLinearFastFieldSerializer, BlockwiseLinearFastFieldReader>(b, &data);
bench_get::<BlockwiseLinearSerializer, BlockwiseLinearReader>(b, &data);
}
pub fn stats_from_vec(data: &[u64]) -> FastFieldStats {
let min_value = data.iter().cloned().min().unwrap_or(0);
Expand Down
24 changes: 11 additions & 13 deletions fastfield_codecs/src/bitpacked.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ use crate::{
/// Depending on the field type, a different
/// fast field is required.
#[derive(Clone)]
pub struct BitpackedFastFieldReader {
pub struct BitpackedReader {
data: OwnedBytes,
bit_unpacker: BitUnpacker,
pub min_value_u64: u64,
pub max_value_u64: u64,
}

impl FastFieldCodecReader for BitpackedFastFieldReader {
impl FastFieldCodecReader for BitpackedReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
let footer_offset = bytes.len() - 16;
Expand All @@ -28,7 +28,7 @@ impl FastFieldCodecReader for BitpackedFastFieldReader {
let max_value = min_value + amplitude;
let num_bits = compute_num_bits(amplitude);
let bit_unpacker = BitUnpacker::new(num_bits);
Ok(BitpackedFastFieldReader {
Ok(BitpackedReader {
data,
min_value_u64: min_value,
max_value_u64: max_value,
Expand All @@ -48,15 +48,15 @@ impl FastFieldCodecReader for BitpackedFastFieldReader {
self.max_value_u64
}
}
pub struct BitpackedFastFieldSerializerLegacy<'a, W: 'a + Write> {
pub struct BitpackedSerializerLegacy<'a, W: 'a + Write> {
bit_packer: BitPacker,
write: &'a mut W,
min_value: u64,
amplitude: u64,
num_bits: u8,
}

impl<'a, W: Write> BitpackedFastFieldSerializerLegacy<'a, W> {
impl<'a, W: Write> BitpackedSerializerLegacy<'a, W> {
/// Creates a new fast field serializer.
///
/// The serializer in fact encode the values by bitpacking
Expand All @@ -69,12 +69,12 @@ impl<'a, W: Write> BitpackedFastFieldSerializerLegacy<'a, W> {
write: &'a mut W,
min_value: u64,
max_value: u64,
) -> io::Result<BitpackedFastFieldSerializerLegacy<'a, W>> {
) -> io::Result<BitpackedSerializerLegacy<'a, W>> {
assert!(min_value <= max_value);
let amplitude = max_value - min_value;
let num_bits = compute_num_bits(amplitude);
let bit_packer = BitPacker::new();
Ok(BitpackedFastFieldSerializerLegacy {
Ok(BitpackedSerializerLegacy {
bit_packer,
write,
min_value,
Expand All @@ -98,9 +98,9 @@ impl<'a, W: Write> BitpackedFastFieldSerializerLegacy<'a, W> {
}
}

pub struct BitpackedFastFieldSerializer {}
pub struct BitpackedSerializer {}

impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
impl FastFieldCodecSerializer for BitpackedSerializer {
/// The CODEC_TYPE is an enum value used for serialization.
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::Bitpacked;

Expand All @@ -116,7 +116,7 @@ impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
write: &mut impl Write,
fastfield_accessor: &dyn FastFieldDataAccess,
) -> io::Result<()> {
let mut serializer = BitpackedFastFieldSerializerLegacy::open(
let mut serializer = BitpackedSerializerLegacy::open(
write,
fastfield_accessor.min_value(),
fastfield_accessor.max_value(),
Expand Down Expand Up @@ -146,9 +146,7 @@ mod tests {
use crate::tests::get_codec_test_data_sets;

fn create_and_validate(data: &[u64], name: &str) {
crate::tests::create_and_validate::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(
data, name,
);
crate::tests::create_and_validate::<BitpackedSerializer, BitpackedReader>(data, name);
}

#[test]
Expand Down
19 changes: 9 additions & 10 deletions fastfield_codecs/src/blockwise_linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ const CHUNK_SIZE: u64 = 512;
/// Depending on the field type, a different
/// fast field is required.
#[derive(Clone)]
pub struct BlockwiseLinearFastFieldReader {
pub struct BlockwiseLinearReader {
data: OwnedBytes,
pub footer: BlockwiseLinearFooter,
}
Expand Down Expand Up @@ -148,14 +148,14 @@ fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Functio
&interpolations[get_interpolation_position(doc)]
}

impl FastFieldCodecReader for BlockwiseLinearFastFieldReader {
impl FastFieldCodecReader for BlockwiseLinearReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?;
let footer_offset = bytes.len() - 4 - footer_len as usize;
let (data, mut footer) = bytes.split(footer_offset);
let footer = BlockwiseLinearFooter::deserialize(&mut footer)?;
Ok(BlockwiseLinearFastFieldReader { data, footer })
Ok(BlockwiseLinearReader { data, footer })
}

#[inline]
Expand All @@ -180,10 +180,10 @@ impl FastFieldCodecReader for BlockwiseLinearFastFieldReader {
}
}

/// Same as LinearInterpolFastFieldSerializer, but working on chunks of CHUNK_SIZE elements.
pub struct BlockwiseLinearFastFieldSerializer {}
/// Same as LinearSerializer, but working on chunks of CHUNK_SIZE elements.
pub struct BlockwiseLinearSerializer {}

impl FastFieldCodecSerializer for BlockwiseLinearFastFieldSerializer {
impl FastFieldCodecSerializer for BlockwiseLinearSerializer {
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::BlockwiseLinear;
/// Creates a new fast field serializer.
fn serialize(
Expand Down Expand Up @@ -359,10 +359,9 @@ mod tests {
use crate::tests::get_codec_test_data_sets;

fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
crate::tests::create_and_validate::<
BlockwiseLinearFastFieldSerializer,
BlockwiseLinearFastFieldReader,
>(data, name)
crate::tests::create_and_validate::<BlockwiseLinearSerializer, BlockwiseLinearReader>(
data, name,
)
}

const HIGHEST_BIT: u64 = 1 << 63;
Expand Down
40 changes: 19 additions & 21 deletions fastfield_codecs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,9 @@ mod tests {
use proptest::arbitrary::any;
use proptest::proptest;

use crate::bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer};
use crate::blockwise_linear::{
BlockwiseLinearFastFieldReader, BlockwiseLinearFastFieldSerializer,
};
use crate::linear::{LinearFastFieldReader, LinearFastFieldSerializer};
use crate::bitpacked::{BitpackedReader, BitpackedSerializer};
use crate::blockwise_linear::{BlockwiseLinearReader, BlockwiseLinearSerializer};
use crate::linear::{LinearReader, LinearSerializer};

pub fn create_and_validate<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
data: &[u64],
Expand Down Expand Up @@ -200,16 +198,16 @@ mod tests {
proptest! {
#[test]
fn test_proptest_small(data in proptest::collection::vec(any::<u64>(), 1..10)) {
create_and_validate::<LinearFastFieldSerializer, LinearFastFieldReader>(&data, "proptest linearinterpol");
create_and_validate::<BlockwiseLinearFastFieldSerializer, BlockwiseLinearFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(&data, "proptest bitpacked");
create_and_validate::<LinearSerializer, LinearReader>(&data, "proptest linearinterpol");
create_and_validate::<BlockwiseLinearSerializer, BlockwiseLinearReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BitpackedSerializer, BitpackedReader>(&data, "proptest bitpacked");
}

#[test]
fn test_proptest_large(data in proptest::collection::vec(any::<u64>(), 1..6000)) {
create_and_validate::<LinearFastFieldSerializer, LinearFastFieldReader>(&data, "proptest linearinterpol");
create_and_validate::<BlockwiseLinearFastFieldSerializer, BlockwiseLinearFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(&data, "proptest bitpacked");
create_and_validate::<LinearSerializer, LinearReader>(&data, "proptest linearinterpol");
create_and_validate::<BlockwiseLinearSerializer, BlockwiseLinearReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BitpackedSerializer, BitpackedReader>(&data, "proptest bitpacked");
}

}
Expand Down Expand Up @@ -244,15 +242,15 @@ mod tests {
}
#[test]
fn test_codec_bitpacking() {
test_codec::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>();
test_codec::<BitpackedSerializer, BitpackedReader>();
}
#[test]
fn test_codec_interpolation() {
test_codec::<LinearFastFieldSerializer, LinearFastFieldReader>();
test_codec::<LinearSerializer, LinearReader>();
}
#[test]
fn test_codec_multi_interpolation() {
test_codec::<BlockwiseLinearFastFieldSerializer, BlockwiseLinearFastFieldReader>();
test_codec::<BlockwiseLinearSerializer, BlockwiseLinearReader>();
}

use super::*;
Expand All @@ -261,24 +259,24 @@ mod tests {
fn estimation_good_interpolation_case() {
let data = (10..=20000_u64).collect::<Vec<_>>();

let linear_interpol_estimation = LinearFastFieldSerializer::estimate(&data);
let linear_interpol_estimation = LinearSerializer::estimate(&data);
assert_le!(linear_interpol_estimation, 0.01);

let multi_linear_interpol_estimation = BlockwiseLinearFastFieldSerializer::estimate(&data);
let multi_linear_interpol_estimation = BlockwiseLinearSerializer::estimate(&data);
assert_le!(multi_linear_interpol_estimation, 0.2);
assert_le!(linear_interpol_estimation, multi_linear_interpol_estimation);

let bitpacked_estimation = BitpackedFastFieldSerializer::estimate(&data);
let bitpacked_estimation = BitpackedSerializer::estimate(&data);
assert_le!(linear_interpol_estimation, bitpacked_estimation);
}
#[test]
fn estimation_test_bad_interpolation_case() {
let data = vec![200, 10, 10, 10, 10, 1000, 20];

let linear_interpol_estimation = LinearFastFieldSerializer::estimate(&data);
let linear_interpol_estimation = LinearSerializer::estimate(&data);
assert_le!(linear_interpol_estimation, 0.32);

let bitpacked_estimation = BitpackedFastFieldSerializer::estimate(&data);
let bitpacked_estimation = BitpackedSerializer::estimate(&data);
assert_le!(bitpacked_estimation, linear_interpol_estimation);
}
#[test]
Expand All @@ -288,10 +286,10 @@ mod tests {

// in this case the linear interpolation can't in fact not be worse than bitpacking,
// but the estimator adds some threshold, which leads to estimated worse behavior
let linear_interpol_estimation = LinearFastFieldSerializer::estimate(&data);
let linear_interpol_estimation = LinearSerializer::estimate(&data);
assert_le!(linear_interpol_estimation, 0.35);

let bitpacked_estimation = BitpackedFastFieldSerializer::estimate(&data);
let bitpacked_estimation = BitpackedSerializer::estimate(&data);
assert_le!(bitpacked_estimation, 0.32);
assert_le!(bitpacked_estimation, linear_interpol_estimation);
}
Expand Down
14 changes: 6 additions & 8 deletions fastfield_codecs/src/linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::{
/// Depending on the field type, a different
/// fast field is required.
#[derive(Clone)]
pub struct LinearFastFieldReader {
pub struct LinearReader {
data: OwnedBytes,
bit_unpacker: BitUnpacker,
pub footer: LinearFooter,
Expand Down Expand Up @@ -59,7 +59,7 @@ impl FixedSize for LinearFooter {
const SIZE_IN_BYTES: usize = 56;
}

impl FastFieldCodecReader for LinearFastFieldReader {
impl FastFieldCodecReader for LinearReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
let footer_offset = bytes.len() - LinearFooter::SIZE_IN_BYTES;
Expand All @@ -68,7 +68,7 @@ impl FastFieldCodecReader for LinearFastFieldReader {
let slope = get_slope(footer.first_val, footer.last_val, footer.num_vals);
let num_bits = compute_num_bits(footer.relative_max_value);
let bit_unpacker = BitUnpacker::new(num_bits);
Ok(LinearFastFieldReader {
Ok(LinearReader {
data,
bit_unpacker,
footer,
Expand All @@ -93,7 +93,7 @@ impl FastFieldCodecReader for LinearFastFieldReader {

/// Fastfield serializer, which tries to guess values by linear interpolation
/// and stores the difference bitpacked.
pub struct LinearFastFieldSerializer {}
pub struct LinearSerializer {}

#[inline]
pub(crate) fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
Expand Down Expand Up @@ -134,7 +134,7 @@ pub fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
}
}

impl FastFieldCodecSerializer for LinearFastFieldSerializer {
impl FastFieldCodecSerializer for LinearSerializer {
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::Linear;

/// Creates a new fast field serializer.
Expand Down Expand Up @@ -260,9 +260,7 @@ mod tests {
use crate::tests::get_codec_test_data_sets;

fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
crate::tests::create_and_validate::<LinearFastFieldSerializer, LinearFastFieldReader>(
data, name,
)
crate::tests::create_and_validate::<LinearSerializer, LinearReader>(data, name)
}

#[test]
Expand Down
12 changes: 5 additions & 7 deletions fastfield_codecs/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#[macro_use]
extern crate prettytable;
use fastfield_codecs::blockwise_linear::BlockwiseLinearFastFieldSerializer;
use fastfield_codecs::linear::LinearFastFieldSerializer;
use fastfield_codecs::blockwise_linear::BlockwiseLinearSerializer;
use fastfield_codecs::linear::LinearSerializer;
use fastfield_codecs::{FastFieldCodecSerializer, FastFieldCodecType, FastFieldStats};
use prettytable::{Cell, Row, Table};

Expand All @@ -13,13 +13,11 @@ fn main() {

for (data, data_set_name) in get_codec_test_data_sets() {
let mut results = vec![];
let res = serialize_with_codec::<LinearFastFieldSerializer>(&data);
let res = serialize_with_codec::<LinearSerializer>(&data);
results.push(res);
let res = serialize_with_codec::<BlockwiseLinearFastFieldSerializer>(&data);
let res = serialize_with_codec::<BlockwiseLinearSerializer>(&data);
results.push(res);
let res = serialize_with_codec::<fastfield_codecs::bitpacked::BitpackedFastFieldSerializer>(
&data,
);
let res = serialize_with_codec::<fastfield_codecs::bitpacked::BitpackedSerializer>(&data);
results.push(res);

// let best_estimation_codec = results
Expand Down
4 changes: 2 additions & 2 deletions src/fastfield/multivalued/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::io;
use fnv::FnvHashMap;
use tantivy_bitpacker::minmax;

use crate::fastfield::serializer::BitpackedFastFieldSerializerLegacy;
use crate::fastfield::serializer::BitpackedSerializerLegacy;
use crate::fastfield::{value_to_u64, CompositeFastFieldSerializer, FastFieldType, FastValue};
use crate::indexer::doc_id_mapping::DocIdMapping;
use crate::postings::UnorderedTermId;
Expand Down Expand Up @@ -171,7 +171,7 @@ impl MultiValuedFastFieldWriter {
}
{
// writing the values themselves.
let mut value_serializer: BitpackedFastFieldSerializerLegacy<'_, _>;
let mut value_serializer: BitpackedSerializerLegacy<'_, _>;
if let Some(mapping) = mapping_opt {
value_serializer = serializer.new_u64_fast_field_with_idx(
self.field,
Expand Down
Loading