Skip to content

Commit

Permalink
add missing docs for fastfield_codecs crate (#1613)
Browse files Browse the repository at this point in the history
closes #1603
  • Loading branch information
PSeitz authored Oct 11, 2022
1 parent 8b69aab commit 11d3409
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 2 deletions.
3 changes: 3 additions & 0 deletions fastfield_codecs/src/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use tantivy_bitpacker::minmax;

use crate::monotonic_mapping::StrictlyMonotonicFn;

/// `Column` provides columnar access on a field.
pub trait Column<T: PartialOrd = u64>: Send + Sync {
/// Return the value associated with the given idx.
///
Expand Down Expand Up @@ -59,6 +60,7 @@ pub trait Column<T: PartialOrd = u64>: Send + Sync {
/// `.max_value()`.
fn max_value(&self) -> T;

/// The number of values in the column.
fn num_vals(&self) -> u64;

/// Returns a iterator over the data
Expand All @@ -67,6 +69,7 @@ pub trait Column<T: PartialOrd = u64>: Send + Sync {
}
}

/// VecColumn provides `Column` over a slice.
pub struct VecColumn<'a, T = u64> {
values: &'a [T],
min_value: T,
Expand Down
19 changes: 17 additions & 2 deletions fastfield_codecs/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
#![warn(missing_docs)]
#![cfg_attr(all(feature = "unstable", test), feature(test))]

//! # `fastfield_codecs`
//!
//! - Columnar storage of data for tantivy [`Column`].
//! - Encode data in different codecs.
//! - Monotonically map values to u64/u128

#[cfg(test)]
#[macro_use]
extern crate more_asserts;
Expand Down Expand Up @@ -44,9 +51,16 @@ pub use self::serialize::{

#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
#[repr(u8)]
/// Available codecs to use to encode the u64 (via [`MonotonicallyMappableToU64`]) converted data.
pub enum FastFieldCodecType {
/// Bitpack all values in the value range. The number of bits is defined by the amplitude
/// column.max_value()-column.min_value()
Bitpacked = 1,
/// Linear interpolation puts a line between the first and last value and then bitpacks the
/// values by the offset from the line. The number of bits is defined by the max deviation from
/// the line.
Linear = 2,
/// Same as [`FastFieldCodecType::Linear`], but encodes in blocks of 512 elements.
BlockwiseLinear = 3,
}

Expand All @@ -64,11 +78,11 @@ impl BinarySerializable for FastFieldCodecType {
}

impl FastFieldCodecType {
pub fn to_code(self) -> u8 {
pub(crate) fn to_code(self) -> u8 {
self as u8
}

pub fn from_code(code: u8) -> Option<Self> {
pub(crate) fn from_code(code: u8) -> Option<Self> {
match code {
1 => Some(Self::Bitpacked),
2 => Some(Self::Linear),
Expand Down Expand Up @@ -150,6 +164,7 @@ trait FastFieldCodec: 'static {
fn estimate(column: &dyn Column) -> Option<f32>;
}

/// The list of all available codecs for u64 convertible data.
pub const ALL_CODEC_TYPES: [FastFieldCodecType; 3] = [
FastFieldCodecType::Bitpacked,
FastFieldCodecType::BlockwiseLinear,
Expand Down
2 changes: 2 additions & 0 deletions fastfield_codecs/src/monotonic_mapping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ use fastdivide::DividerU64;

use crate::MonotonicallyMappableToU128;

/// Monotonic maps a value to u64 value space.
/// Monotonic mapping enables `PartialOrd` on u64 space without conversion to original space.
pub trait MonotonicallyMappableToU64: 'static + PartialOrd + Copy + Send + Sync {
/// Converts a value to u64.
///
Expand Down
2 changes: 2 additions & 0 deletions fastfield_codecs/src/monotonic_mapping_u128.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use std::net::Ipv6Addr;

/// Montonic maps a value to u128 value space
/// Monotonic mapping enables `PartialOrd` on u128 space without conversion to original space.
pub trait MonotonicallyMappableToU128: 'static + PartialOrd + Copy + Send + Sync {
/// Converts a value to u128.
///
Expand Down
7 changes: 7 additions & 0 deletions fastfield_codecs/src/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ use crate::{
/// By design, after normalization, `min_value = 0` and `gcd = 1`.
#[derive(Debug, Copy, Clone)]
pub struct NormalizedHeader {
/// The number of values in the underlying column.
pub num_vals: u64,
/// The max value of the underlying column.
pub max_value: u64,
}

Expand Down Expand Up @@ -137,6 +139,8 @@ impl BinarySerializable for Header {
}
}

/// Return estimated compression for given codec in the value range [0.0..1.0], where 1.0 means no
/// compression.
pub fn estimate<T: MonotonicallyMappableToU64>(
typed_column: impl Column<T>,
codec_type: FastFieldCodecType,
Expand All @@ -157,6 +161,7 @@ pub fn estimate<T: MonotonicallyMappableToU64>(
}
}

/// Serializes u128 values with the compact space codec.
pub fn serialize_u128<F: Fn() -> I, I: Iterator<Item = u128>>(
iter_gen: F,
num_vals: u64,
Expand All @@ -169,6 +174,7 @@ pub fn serialize_u128<F: Fn() -> I, I: Iterator<Item = u128>>(
Ok(())
}

/// Serializes the column with the codec with the best estimate on the data.
pub fn serialize<T: MonotonicallyMappableToU64>(
typed_column: impl Column<T>,
output: &mut impl io::Write,
Expand Down Expand Up @@ -239,6 +245,7 @@ fn serialize_given_codec(
Ok(())
}

/// Helper function to serialize a column (autodetect from all codecs) and then open it
pub fn serialize_and_load<T: MonotonicallyMappableToU64 + Ord + Default>(
column: &[T],
) -> Arc<dyn Column<T>> {
Expand Down

0 comments on commit 11d3409

Please sign in to comment.