Skip to content

Commit

Permalink
refactor multivalue fastfield, refactor range query (#1749)
Browse files Browse the repository at this point in the history
Introduce MakeZero trait, remove make_zero from FastValue
Merge two multivalue fastfield implementations into one
prepare range query on fastfield for different types
  • Loading branch information
PSeitz authored Jan 5, 2023
1 parent 2080c37 commit 07a51eb
Show file tree
Hide file tree
Showing 11 changed files with 287 additions and 356 deletions.
50 changes: 31 additions & 19 deletions src/fastfield/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@
//!
//!
//! Fields have to be declared as `FAST` in the schema.
//! Currently supported fields are: u64, i64, f64, bytes and text.
//! Currently supported fields are: u64, i64, f64, bytes, ip and text.
//!
//! Fast fields are stored in with [different codecs](fastfield_codecs). The best codec is detected
//! automatically, when serializing.
//!
//! Read access performance is comparable to that of an array lookup.

use std::net::Ipv6Addr;

use fastfield_codecs::MonotonicallyMappableToU64;

pub use self::alive_bitset::{intersect_alive_bitsets, write_alive_bitset, AliveBitSet};
Expand All @@ -28,7 +30,7 @@ pub use self::facet_reader::FacetReader;
pub(crate) use self::multivalued::{get_fastfield_codecs_for_multivalue, MultivalueStartIndex};
pub use self::multivalued::{
MultiValueIndex, MultiValueU128FastFieldWriter, MultiValuedFastFieldReader,
MultiValuedFastFieldWriter, MultiValuedU128FastFieldReader,
MultiValuedFastFieldWriter,
};
pub(crate) use self::readers::type_and_cardinality;
pub use self::readers::FastFieldReaders;
Expand All @@ -47,19 +49,40 @@ mod readers;
mod serializer;
mod writer;

/// Trait for types that provide a zero value.
///
/// The resulting value is never used, just as placeholder, e.g. for `vec.resize()`.
pub trait MakeZero {
/// Build a default value. This default value is never used, so the value does not
/// really matter.
fn make_zero() -> Self;
}

impl<T: FastValue> MakeZero for T {
fn make_zero() -> Self {
T::from_u64(0)
}
}

impl MakeZero for u128 {
fn make_zero() -> Self {
0
}
}

impl MakeZero for Ipv6Addr {
fn make_zero() -> Self {
Ipv6Addr::from(0u128.to_be_bytes())
}
}

/// Trait for types that are allowed for fast fields:
/// (u64, i64 and f64, bool, DateTime).
pub trait FastValue:
MonotonicallyMappableToU64 + Copy + Send + Sync + PartialOrd + 'static
{
/// Returns the `schema::Type` for this FastValue.
fn to_type() -> Type;

/// Build a default value. This default value is never used, so the value does not
/// really matter.
fn make_zero() -> Self {
Self::from_u64(0u64)
}
}

impl FastValue for u64 {
Expand Down Expand Up @@ -101,12 +124,6 @@ impl FastValue for DateTime {
fn to_type() -> Type {
Type::Date
}

fn make_zero() -> Self {
DateTime {
timestamp_micros: 0,
}
}
}

fn value_to_u64(value: &Value) -> crate::Result<u64> {
Expand Down Expand Up @@ -520,11 +537,6 @@ mod tests {
Ok(())
}

#[test]
fn test_default_date() {
assert_eq!(0, DateTime::make_zero().into_timestamp_secs());
}

fn get_vals_for_docs(ff: &MultiValuedFastFieldReader<u64>, docs: Range<u32>) -> Vec<u64> {
let mut all = vec![];

Expand Down
2 changes: 1 addition & 1 deletion src/fastfield/multivalued/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ mod writer;
use fastfield_codecs::FastFieldCodecType;
pub use index::MultiValueIndex;

pub use self::reader::{MultiValuedFastFieldReader, MultiValuedU128FastFieldReader};
pub use self::reader::MultiValuedFastFieldReader;
pub(crate) use self::writer::MultivalueStartIndex;
pub use self::writer::{MultiValueU128FastFieldWriter, MultiValuedFastFieldWriter};

Expand Down
99 changes: 11 additions & 88 deletions src/fastfield/multivalued/reader.rs
Original file line number Diff line number Diff line change
@@ -1,107 +1,30 @@
use std::ops::{Range, RangeInclusive};
use std::sync::Arc;

use fastfield_codecs::{Column, MonotonicallyMappableToU128};
use fastfield_codecs::Column;

use super::MultiValueIndex;
use crate::fastfield::FastValue;
use crate::fastfield::MakeZero;
use crate::DocId;

/// Reader for a multivalued `u64` fast field.
/// Reader for a multivalued fast field.
///
/// The reader is implemented as two `u64` fast field.
/// The reader is implemented as two fast fields, one u64 fast field for the index and one for the
/// values.
///
/// The `vals_reader` will access the concatenated list of all
/// values for all reader.
/// The `idx_reader` associated, for each document, the index of its first value.
/// Stores the start position for each document.
/// The `vals_reader` will access the concatenated list of all values.
/// The `idx_reader` associates, for each document, the index of its first value.
#[derive(Clone)]
pub struct MultiValuedFastFieldReader<Item: FastValue> {
idx_reader: MultiValueIndex,
vals_reader: Arc<dyn Column<Item>>,
}

impl<Item: FastValue> MultiValuedFastFieldReader<Item> {
pub(crate) fn open(
idx_reader: Arc<dyn Column<u64>>,
vals_reader: Arc<dyn Column<Item>>,
) -> MultiValuedFastFieldReader<Item> {
MultiValuedFastFieldReader {
idx_reader: MultiValueIndex::new(idx_reader),
vals_reader,
}
}

/// Returns the array of values associated with the given `doc`.
#[inline]
fn get_vals_for_range(&self, range: Range<u32>, vals: &mut Vec<Item>) {
let len = (range.end - range.start) as usize;
vals.resize(len, Item::make_zero());
self.vals_reader
.get_range(range.start as u64, &mut vals[..]);
}

/// Returns the array of values associated with the given `doc`.
#[inline]
pub fn get_vals(&self, doc: DocId, vals: &mut Vec<Item>) {
let range = self.idx_reader.range(doc);
self.get_vals_for_range(range, vals);
}

/// returns the multivalue index
pub fn get_index_reader(&self) -> &MultiValueIndex {
&self.idx_reader
}

/// Returns the minimum value for this fast field.
///
/// The min value does not take in account of possible
/// deleted document, and should be considered as a lower bound
/// of the actual minimum value.
pub fn min_value(&self) -> Item {
self.vals_reader.min_value()
}

/// Returns the maximum value for this fast field.
///
/// The max value does not take in account of possible
/// deleted document, and should be considered as an upper bound
/// of the actual maximum value.
pub fn max_value(&self) -> Item {
self.vals_reader.max_value()
}

/// Returns the number of values associated with the document `DocId`.
#[inline]
pub fn num_vals(&self, doc: DocId) -> u32 {
self.idx_reader.num_vals_for_doc(doc)
}

/// Returns the overall number of values in this field.
#[inline]
pub fn total_num_vals(&self) -> u32 {
self.idx_reader.total_num_vals()
}
}

/// Reader for a multivalued `u128` fast field.
///
/// The reader is implemented as a `u64` fast field for the index and a `u128` fast field.
///
/// The `vals_reader` will access the concatenated list of all
/// values for all reader.
/// The `idx_reader` associated, for each document, the index of its first value.
#[derive(Clone)]
pub struct MultiValuedU128FastFieldReader<T: MonotonicallyMappableToU128> {
pub struct MultiValuedFastFieldReader<T> {
idx_reader: MultiValueIndex,
vals_reader: Arc<dyn Column<T>>,
}

impl<T: MonotonicallyMappableToU128> MultiValuedU128FastFieldReader<T> {
impl<T: PartialOrd + MakeZero + Clone> MultiValuedFastFieldReader<T> {
pub(crate) fn open(
idx_reader: Arc<dyn Column<u64>>,
vals_reader: Arc<dyn Column<T>>,
) -> MultiValuedU128FastFieldReader<T> {
) -> MultiValuedFastFieldReader<T> {
Self {
idx_reader: MultiValueIndex::new(idx_reader),
vals_reader,
Expand All @@ -122,7 +45,7 @@ impl<T: MonotonicallyMappableToU128> MultiValuedU128FastFieldReader<T> {
#[inline]
fn get_vals_for_range(&self, range: Range<u32>, vals: &mut Vec<T>) {
let len = (range.end - range.start) as usize;
vals.resize(len, T::from_u128(0));
vals.resize(len, T::make_zero());
self.vals_reader
.get_range(range.start as u64, &mut vals[..]);
}
Expand Down
23 changes: 6 additions & 17 deletions src/fastfield/readers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@ use std::sync::Arc;

use fastfield_codecs::{open, open_u128, Column};

use super::multivalued::MultiValuedU128FastFieldReader;
use super::multivalued::MultiValuedFastFieldReader;
use crate::directory::{CompositeFile, FileSlice};
use crate::fastfield::{
BytesFastFieldReader, FastFieldNotAvailableError, FastValue, MultiValuedFastFieldReader,
};
use crate::fastfield::{BytesFastFieldReader, FastFieldNotAvailableError, FastValue};
use crate::schema::{Cardinality, Field, FieldType, Schema};
use crate::space_usage::PerFieldSpaceUsage;
use crate::{DateTime, TantivyError};
Expand Down Expand Up @@ -161,20 +159,14 @@ impl FastFieldReaders {
/// Returns the `ip` fast field reader reader associated to `field`.
///
/// If `field` is not a u128 fast field, this method returns an Error.
pub fn ip_addrs(
&self,
field: Field,
) -> crate::Result<MultiValuedU128FastFieldReader<Ipv6Addr>> {
pub fn ip_addrs(&self, field: Field) -> crate::Result<MultiValuedFastFieldReader<Ipv6Addr>> {
self.check_type(field, FastType::U128, Cardinality::MultiValues)?;
let idx_reader: Arc<dyn Column<u64>> = self.typed_fast_field_reader(field)?;

let bytes = self.fast_field_data(field, 1)?.read_bytes()?;
let vals_reader = open_u128::<Ipv6Addr>(bytes)?;

Ok(MultiValuedU128FastFieldReader::open(
idx_reader,
vals_reader,
))
Ok(MultiValuedFastFieldReader::open(idx_reader, vals_reader))
}

/// Returns the `u128` fast field reader reader associated to `field`.
Expand All @@ -189,17 +181,14 @@ impl FastFieldReaders {
/// Returns the `u128` multi-valued fast field reader reader associated to `field`.
///
/// If `field` is not a u128 multi-valued fast field, this method returns an Error.
pub fn u128s(&self, field: Field) -> crate::Result<MultiValuedU128FastFieldReader<u128>> {
pub fn u128s(&self, field: Field) -> crate::Result<MultiValuedFastFieldReader<u128>> {
self.check_type(field, FastType::U128, Cardinality::MultiValues)?;
let idx_reader: Arc<dyn Column<u64>> = self.typed_fast_field_reader(field)?;

let bytes = self.fast_field_data(field, 1)?.read_bytes()?;
let vals_reader = open_u128::<u128>(bytes)?;

Ok(MultiValuedU128FastFieldReader::open(
idx_reader,
vals_reader,
))
Ok(MultiValuedFastFieldReader::open(idx_reader, vals_reader))
}

/// Returns the `u64` fast field reader reader associated with `field`, regardless of whether
Expand Down
26 changes: 13 additions & 13 deletions src/indexer/merger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::docset::{DocSet, TERMINATED};
use crate::error::DataCorruption;
use crate::fastfield::{
get_fastfield_codecs_for_multivalue, AliveBitSet, Column, CompositeFastFieldSerializer,
MultiValueIndex, MultiValuedFastFieldReader, MultiValuedU128FastFieldReader,
MultiValueIndex, MultiValuedFastFieldReader,
};
use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter};
use crate::indexer::doc_id_mapping::{expect_field_id_for_sort_field, SegmentDocIdMapping};
Expand Down Expand Up @@ -331,18 +331,18 @@ impl IndexMerger {
fast_field_serializer: &mut CompositeFastFieldSerializer,
doc_id_mapping: &SegmentDocIdMapping,
) -> crate::Result<()> {
let segment_and_ff_readers: Vec<(&SegmentReader, MultiValuedU128FastFieldReader<u128>)> =
self.readers
.iter()
.map(|segment_reader| {
let ff_reader: MultiValuedU128FastFieldReader<u128> =
segment_reader.fast_fields().u128s(field).expect(
"Failed to find index for multivalued field. This is a bug in \
tantivy, please report.",
);
(segment_reader, ff_reader)
})
.collect::<Vec<_>>();
let segment_and_ff_readers: Vec<(&SegmentReader, MultiValuedFastFieldReader<u128>)> = self
.readers
.iter()
.map(|segment_reader| {
let ff_reader: MultiValuedFastFieldReader<u128> =
segment_reader.fast_fields().u128s(field).expect(
"Failed to find index for multivalued field. This is a bug in tantivy, \
please report.",
);
(segment_reader, ff_reader)
})
.collect::<Vec<_>>();

Self::write_1_n_fast_field_idx_generic(
field,
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ use crate::time::{OffsetDateTime, PrimitiveDateTime, UtcOffset};
/// All constructors and conversions are provided as explicit
/// functions and not by implementing any `From`/`Into` traits
/// to prevent unintended usage.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
#[derive(Clone, Default, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct DateTime {
// Timestamp in microseconds.
pub(crate) timestamp_micros: i64,
Expand Down
1 change: 0 additions & 1 deletion src/query/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ mod phrase_query;
mod query;
mod query_parser;
mod range_query;
mod range_query_ip_fastfield;
mod regex_query;
mod reqopt_scorer;
mod scorer;
Expand Down
Loading

0 comments on commit 07a51eb

Please sign in to comment.