Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
Introduce an empty FilterCondition variant to support unknown fields
Browse files Browse the repository at this point in the history
  • Loading branch information
Kerollmops committed Jul 27, 2021
1 parent b12738c commit 0518da7
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 56 deletions.
121 changes: 70 additions & 51 deletions milli/src/search/facet/filter_condition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use std::str::FromStr;

use either::Either;
use heed::types::DecodeIgnore;
use itertools::Itertools;
use log::debug;
use pest::error::{Error as PestError, ErrorVariant};
use pest::iterators::{Pair, Pairs};
Expand Down Expand Up @@ -54,6 +55,7 @@ pub enum FilterCondition {
Operator(FieldId, Operator),
Or(Box<Self>, Box<Self>),
And(Box<Self>, Box<Self>),
Empty,
}

impl FilterCondition {
Expand Down Expand Up @@ -108,15 +110,15 @@ impl FilterCondition {
expression: &str,
) -> Result<FilterCondition> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let filterable_fields = index.filterable_fields_ids(rtxn)?;
let filterable_fields = index.filterable_fields(rtxn)?;
let lexed =
FilterParser::parse(Rule::prgm, expression).map_err(UserError::InvalidFilter)?;
FilterCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed)
}

fn from_pairs(
fim: &FieldsIdsMap,
ff: &HashSet<FieldId>,
ff: &HashSet<String>,
expression: Pairs<Rule>,
) -> Result<Self> {
PREC_CLIMBER.climb(
Expand Down Expand Up @@ -150,17 +152,22 @@ impl FilterCondition {
},
Or(a, b) => And(Box::new(a.negate()), Box::new(b.negate())),
And(a, b) => Or(Box::new(a.negate()), Box::new(b.negate())),
Empty => Empty,
}
}

fn between(
fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>,
filterable_fields: &HashSet<String>,
item: Pair<Rule>,
) -> Result<FilterCondition> {
let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?;
let fid = match field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?
{
Some(fid) => fid,
None => return Ok(Empty),
};

let (lresult, _) = pest_parse(items.next().unwrap());
let (rresult, _) = pest_parse(items.next().unwrap());
Expand All @@ -173,12 +180,16 @@ impl FilterCondition {

fn equal(
fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>,
filterable_fields: &HashSet<String>,
item: Pair<Rule>,
) -> Result<FilterCondition> {
let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?;
let fid = match field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?
{
Some(fid) => fid,
None => return Ok(Empty),
};

let value = items.next().unwrap();
let (result, svalue) = pest_parse(value);
Expand All @@ -189,12 +200,16 @@ impl FilterCondition {

fn greater_than(
fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>,
filterable_fields: &HashSet<String>,
item: Pair<Rule>,
) -> Result<FilterCondition> {
let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?;
let fid = match field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?
{
Some(fid) => fid,
None => return Ok(Empty),
};

let value = items.next().unwrap();
let (result, _svalue) = pest_parse(value);
Expand All @@ -205,12 +220,16 @@ impl FilterCondition {

fn greater_than_or_equal(
fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>,
filterable_fields: &HashSet<String>,
item: Pair<Rule>,
) -> Result<FilterCondition> {
let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?;
let fid = match field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?
{
Some(fid) => fid,
None => return Ok(Empty),
};

let value = items.next().unwrap();
let (result, _svalue) = pest_parse(value);
Expand All @@ -221,12 +240,16 @@ impl FilterCondition {

fn lower_than(
fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>,
filterable_fields: &HashSet<String>,
item: Pair<Rule>,
) -> Result<FilterCondition> {
let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?;
let fid = match field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?
{
Some(fid) => fid,
None => return Ok(Empty),
};

let value = items.next().unwrap();
let (result, _svalue) = pest_parse(value);
Expand All @@ -237,12 +260,16 @@ impl FilterCondition {

fn lower_than_or_equal(
fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>,
filterable_fields: &HashSet<String>,
item: Pair<Rule>,
) -> Result<FilterCondition> {
let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?;
let fid = match field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?
{
Some(fid) => fid,
None => return Ok(Empty),
};

let value = items.next().unwrap();
let (result, _svalue) = pest_parse(value);
Expand Down Expand Up @@ -461,57 +488,43 @@ impl FilterCondition {
let rhs = rhs.evaluate(rtxn, index)?;
Ok(lhs & rhs)
}
Empty => Ok(RoaringBitmap::new()),
}
}
}

/// Retrieve the field id base on the pest value, returns an error is
/// the field does not exist or is not filterable.
/// Retrieve the field id base on the pest value.
///
/// Returns an error if the given value is not filterable.
///
/// Returns Ok(None) if the given value is filterable, but is not yet ascociated to a field_id.
///
/// The pest pair is simply a string associated with a span, a location to highlight in
/// the error message.
fn field_id(
fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>,
filterable_fields: &HashSet<String>,
items: &mut Pairs<Rule>,
) -> StdResult<FieldId, PestError<Rule>> {
) -> StdResult<Option<FieldId>, PestError<Rule>> {
// lexing ensures that we at least have a key
let key = items.next().unwrap();

let field_id = match fields_ids_map.id(key.as_str()) {
Some(field_id) => field_id,
None => {
return Err(PestError::new_from_span(
if !filterable_fields.contains(key.as_str()) {
return Err(PestError::new_from_span(
ErrorVariant::CustomError {
message: format!(
"attribute `{}` not found, available attributes are: {}",
key.as_str(),
fields_ids_map.iter().map(|(_, n)| n).collect::<Vec<_>>().join(", "),
),
"attribute `{}` is not filterable, available filterable attributes are: {}",
key.as_str(),
filterable_fields
.iter()
.join(", "),
),
},
key.as_span(),
))
}
};

if !filterable_fields.contains(&field_id) {
return Err(PestError::new_from_span(
ErrorVariant::CustomError {
message: format!(
"attribute `{}` is not filterable, available filterable attributes are: {}",
key.as_str(),
filterable_fields
.iter()
.flat_map(|id| { fields_ids_map.name(*id) })
.collect::<Vec<_>>()
.join(", "),
),
},
key.as_span(),
));
}

Ok(field_id)
Ok(fields_ids_map.id(key.as_str()))
}

/// Tries to parse the pest pair into the type `T` specified, always returns
Expand Down Expand Up @@ -552,6 +565,9 @@ mod tests {

// Set the filterable fields to be the channel.
let mut wtxn = index.write_txn().unwrap();
let mut map = index.fields_ids_map(&wtxn).unwrap();
map.insert("channel");
index.put_fields_ids_map(&mut wtxn, &map).unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_filterable_fields(hashset! { S("channel") });
builder.execute(|_, _| ()).unwrap();
Expand Down Expand Up @@ -581,6 +597,9 @@ mod tests {

// Set the filterable fields to be the channel.
let mut wtxn = index.write_txn().unwrap();
let mut map = index.fields_ids_map(&wtxn).unwrap();
map.insert("timestamp");
index.put_fields_ids_map(&mut wtxn, &map).unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_filterable_fields(hashset! { "timestamp".into() });
builder.execute(|_, _| ()).unwrap();
Expand Down
7 changes: 4 additions & 3 deletions milli/src/update/index_documents/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,10 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
documents_file,
} = output;

// The fields_ids_map is put back to the store now so the rest of the transaction sees an
// up to date field map.
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;

// We delete the documents that this document addition replaces. This way we are
// able to simply insert all the documents even if they already exist in the database.
if !replaced_documents_ids.is_empty() {
Expand Down Expand Up @@ -596,9 +600,6 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {

debug!("Writing using the write method: {:?}", write_method);

// We write the fields ids map into the main database
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;

// We write the field distribution into the main database
self.index.put_field_distribution(self.wtxn, &field_distribution)?;

Expand Down
5 changes: 3 additions & 2 deletions milli/src/update/settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,8 @@ mod tests {
let count = index
.facet_id_f64_docids
.remap_key_type::<ByteSlice>()
.prefix_iter(&rtxn, &[0, 0])
// The faceted field id is 2u16
.prefix_iter(&rtxn, &[0, 2, 0])
.unwrap()
.count();
assert_eq!(count, 3);
Expand All @@ -700,7 +701,7 @@ mod tests {
let count = index
.facet_id_f64_docids
.remap_key_type::<ByteSlice>()
.prefix_iter(&rtxn, &[0, 0])
.prefix_iter(&rtxn, &[0, 2, 0])
.unwrap()
.count();
assert_eq!(count, 4);
Expand Down

0 comments on commit 0518da7

Please sign in to comment.