Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Integration tests on filters and distinct #238

Merged
merged 3 commits into from
Jun 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion milli/tests/assets/test_set.ndjson
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
{"id":"B","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":1,"title":"hello","description":"hello is a song recorded by english singer songwriter adele","tag":"red","":""}
{"id":"C","word_rank":0,"typo_rank":1,"proximity_rank":8,"attribute_rank":336,"exact_rank":4,"asc_desc_rank":2,"title":"hell on earth","description":"hell on earth is the third studio album by american hip hop duo mobb deep","tag":"blue","":""}
{"id":"D","word_rank":0,"typo_rank":1,"proximity_rank":10,"attribute_rank":757,"exact_rank":4,"asc_desc_rank":3,"title":"hell on wheels tv series","description":"the construction of the first transcontinental railroad across the united states in the world","tag":"red","":""}
{"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":1,"asc_desc_rank":4,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","":""}
{"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":4,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","":""}
{"id":"F","word_rank":2,"typo_rank":1,"proximity_rank":0,"attribute_rank":1017,"exact_rank":5,"asc_desc_rank":5,"title":"laptop orchestra","description":"a laptop orchestra lork or lo is a chamber music ensemble consisting primarily of laptops like helo huddersfield experimental laptop orchestra","tag":"blue","":""}
{"id":"G","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":5,"title":"hello world film","description":"hello world is a 2019 japanese animated sci fi romantic drama film directed by tomohiko ito and produced by graphinica","tag":"red","":""}
{"id":"H","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":4,"title":"world hello day","description":"holiday observed on november 21 to express that conflicts should be resolved through communication rather than the use of force","tag":"green","":""}
Expand Down
72 changes: 72 additions & 0 deletions milli/tests/search/distinct.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
use std::collections::HashSet;

use big_s::S;
use milli::update::Settings;
use milli::{Criterion, Search, SearchResult};
use Criterion::*;

use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};

macro_rules! test_distinct {
($func:ident, $distinct:ident, $criteria:expr) => {
#[test]
fn $func() {
let criteria = $criteria;
let index = search::setup_search_index_with_criteria(&criteria);

// update distinct attribute
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_distinct_field(S(stringify!($distinct)));
builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap();

let rtxn = index.read_txn().unwrap();

let mut search = Search::new(&rtxn, &index);
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.authorize_typos(true);
search.optional_words(true);

let SearchResult { documents_ids, .. } = search.execute().unwrap();

let mut distinct_values = HashSet::new();
let expected_external_ids: Vec<_> = search::expected_order(&criteria, true, true)
.into_iter()
.filter_map(|d| {
if distinct_values.contains(&d.$distinct) {
None
} else {
distinct_values.insert(d.$distinct.to_owned());
Some(d.id)
}
})
.collect();

let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
assert_eq!(documents_ids, expected_external_ids);
}
};
}

test_distinct!(
distinct_string_default_criteria,
tag,
vec![Words, Typo, Proximity, Attribute, Exactness]
);
test_distinct!(
distinct_number_default_criteria,
asc_desc_rank,
vec![Words, Typo, Proximity, Attribute, Exactness]
);
test_distinct!(distinct_string_criterion_words, tag, vec![Words]);
test_distinct!(distinct_number_criterion_words, asc_desc_rank, vec![Words]);
test_distinct!(distinct_string_criterion_words_typo, tag, vec![Words, Typo]);
test_distinct!(distinct_number_criterion_words_typo, asc_desc_rank, vec![Words, Typo]);
test_distinct!(distinct_string_criterion_words_proximity, tag, vec![Words, Proximity]);
test_distinct!(distinct_number_criterion_words_proximity, asc_desc_rank, vec![Words, Proximity]);
test_distinct!(distinct_string_criterion_words_attribute, tag, vec![Words, Attribute]);
test_distinct!(distinct_number_criterion_words_attribute, asc_desc_rank, vec![Words, Attribute]);
test_distinct!(distinct_string_criterion_words_exactness, tag, vec![Words, Exactness]);
test_distinct!(distinct_number_criterion_words_exactness, asc_desc_rank, vec![Words, Exactness]);
78 changes: 78 additions & 0 deletions milli/tests/search/filters.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
use either::{Either, Left, Right};
use milli::{Criterion, FilterCondition, Search, SearchResult};
use Criterion::*;

use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};

macro_rules! test_filter {
($func:ident, $filter:expr) => {
#[test]
fn $func() {
let criteria = vec![Words, Typo, Proximity, Attribute, Exactness];
let index = search::setup_search_index_with_criteria(&criteria);
let rtxn = index.read_txn().unwrap();

let filter_conditions =
FilterCondition::from_array::<Vec<Either<Vec<&str>, &str>>, _, _, _>(
&rtxn, &index, $filter,
)
.unwrap()
.unwrap();

let mut search = Search::new(&rtxn, &index);
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.authorize_typos(true);
search.optional_words(true);
search.filter(filter_conditions);

let SearchResult { documents_ids, .. } = search.execute().unwrap();

let filtered_ids = search::expected_filtered_ids($filter);
let expected_external_ids: Vec<_> = search::expected_order(&criteria, true, true)
.into_iter()
.filter_map(|d| if filtered_ids.contains(&d.id) { Some(d.id) } else { None })
.collect();

let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
assert_eq!(documents_ids, expected_external_ids);
}
};
}

test_filter!(eq_simple_string_filter, vec![Right("tag=red")]);
test_filter!(eq_simple_number_filter, vec![Right("asc_desc_rank=1")]);
test_filter!(eq_string_and_filter_return_empty, vec![Right("tag=red"), Right("tag=green")]);
test_filter!(eq_mix_and_filter, vec![Right("tag=red"), Right("asc_desc_rank=1")]);
test_filter!(eq_string_or_filter, vec![Left(vec!["tag=red", "tag=green"])]);
test_filter!(eq_mix_or_filter, vec![Left(vec!["tag=red", "asc_desc_rank=1"])]);
test_filter!(eq_number_or_filter, vec![Left(vec!["asc_desc_rank=3", "asc_desc_rank=1"])]);
test_filter!(eq_complex_filter, vec![Left(vec!["tag=red", "tag=green"]), Right("asc_desc_rank=3")]);
test_filter!(
eq_complex_filter_2,
vec![Left(vec!["tag=red", "tag=green"]), Left(vec!["asc_desc_rank=3", "asc_desc_rank=1"])]
);
test_filter!(greater_simple_number_filter, vec![Right("asc_desc_rank>1")]);
test_filter!(greater_mix_and_filter, vec![Right("tag=red"), Right("asc_desc_rank>1")]);
test_filter!(greater_mix_or_filter, vec![Left(vec!["tag=red", "asc_desc_rank>1"])]);
test_filter!(greater_number_or_filter, vec![Left(vec!["asc_desc_rank>3", "asc_desc_rank>1"])]);
test_filter!(
greater_complex_filter,
vec![Left(vec!["tag=red", "tag=green"]), Right("asc_desc_rank>3")]
);
test_filter!(
greater_complex_filter_2,
vec![Left(vec!["tag=red", "tag=green"]), Left(vec!["asc_desc_rank>3", "asc_desc_rank>1"])]
);
test_filter!(lower_simple_number_filter, vec![Right("asc_desc_rank<1")]);
test_filter!(lower_mix_and_filter, vec![Right("tag=red"), Right("asc_desc_rank<1")]);
test_filter!(lower_mix_or_filter, vec![Left(vec!["tag=red", "asc_desc_rank<1"])]);
test_filter!(lower_number_or_filter, vec![Left(vec!["asc_desc_rank<3", "asc_desc_rank<1"])]);
test_filter!(
lower_complex_filter,
vec![Left(vec!["tag=red", "tag=green"]), Right("asc_desc_rank<3")]
);
test_filter!(
lower_complex_filter_2,
vec![Left(vec!["tag=red", "tag=green"]), Left(vec!["asc_desc_rank<3", "asc_desc_rank<1"])]
);
58 changes: 57 additions & 1 deletion milli/tests/search/mod.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
use std::collections::HashSet;

use big_s::S;
use either::{Either, Left, Right};
use heed::EnvOpenOptions;
use maplit::{hashmap, hashset};
use milli::update::{IndexDocuments, Settings, UpdateFormat};
use milli::{Criterion, DocumentId, Index};
use serde::Deserialize;
use slice_group_by::GroupBy;

mod distinct;
mod filters;
mod query_criteria;

pub const TEST_QUERY: &'static str = "hello world america";
Expand Down Expand Up @@ -120,7 +125,58 @@ pub fn expected_order(
}
}

#[derive(Debug, Clone, Deserialize)]
fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
let mut id = None;
if let Some((field, filter)) = filter.split_once("=") {
if field == "tag" && document.tag == filter {
id = Some(document.id.clone())
} else if field == "asc_desc_rank"
&& document.asc_desc_rank == filter.parse::<u32>().unwrap()
{
id = Some(document.id.clone())
}
} else if let Some(("asc_desc_rank", filter)) = filter.split_once("<") {
if document.asc_desc_rank < filter.parse().unwrap() {
id = Some(document.id.clone())
}
} else if let Some(("asc_desc_rank", filter)) = filter.split_once(">") {
if document.asc_desc_rank > filter.parse().unwrap() {
id = Some(document.id.clone())
}
}
id
}

pub fn expected_filtered_ids(filters: Vec<Either<Vec<&str>, &str>>) -> HashSet<String> {
let dataset: HashSet<TestDocument> =
serde_json::Deserializer::from_str(CONTENT).into_iter().map(|r| r.unwrap()).collect();

let mut filtered_ids: HashSet<_> = dataset.iter().map(|d| d.id.clone()).collect();
for either in filters {
let ids = match either {
Left(array) => array
.into_iter()
.map(|f| {
let ids: HashSet<String> =
dataset.iter().filter_map(|d| execute_filter(f, d)).collect();
ids
})
.reduce(|a, b| a.union(&b).cloned().collect())
.unwrap(),
Right(filter) => {
let ids: HashSet<String> =
dataset.iter().filter_map(|d| execute_filter(filter, d)).collect();
ids
}
};

filtered_ids = filtered_ids.intersection(&ids).cloned().collect();
}

filtered_ids
}

#[derive(Debug, Clone, Deserialize, PartialEq, Eq, Hash)]
pub struct TestDocument {
pub id: String,
pub word_rank: u32,
Expand Down
115 changes: 73 additions & 42 deletions milli/tests/search/query_criteria.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,64 +11,95 @@ const ALLOW_OPTIONAL_WORDS: bool = true;
const DISALLOW_OPTIONAL_WORDS: bool = false;

macro_rules! test_criterion {
($func:ident, $optional_word:ident, $authorize_typos:ident $(, $criterion:expr)?) => {
($func:ident, $optional_word:ident, $authorize_typos:ident, $criteria:expr) => {
#[test]
fn $func() {
let criteria = vec![$($criterion)?];
let criteria = $criteria;
let index = search::setup_search_index_with_criteria(&criteria);
let mut rtxn = index.read_txn().unwrap();
let rtxn = index.read_txn().unwrap();

let mut search = Search::new(&mut rtxn, &index);
let mut search = Search::new(&rtxn, &index);
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.authorize_typos($authorize_typos);
search.optional_words($optional_word);

let SearchResult { documents_ids, .. } = search.execute().unwrap();

let expected_external_ids: Vec<_> = search::expected_order(&criteria, $authorize_typos, $optional_word)
.into_iter()
.map(|d| d.id).collect();
let expected_external_ids: Vec<_> =
search::expected_order(&criteria, $authorize_typos, $optional_word)
.into_iter()
.map(|d| d.id)
.collect();
let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
assert_eq!(documents_ids, expected_external_ids);
}
}
};
}

#[rustfmt::skip]
test_criterion!(none_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS);
#[rustfmt::skip]
test_criterion!(none_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS);
#[rustfmt::skip]
test_criterion!(words_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Words);
#[rustfmt::skip]
test_criterion!(attribute_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Attribute);
#[rustfmt::skip]
test_criterion!(attribute_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Attribute);
#[rustfmt::skip]
test_criterion!(exactness_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Exactness);
#[rustfmt::skip]
test_criterion!(exactness_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Exactness);
#[rustfmt::skip]
test_criterion!(proximity_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Proximity);
#[rustfmt::skip]
test_criterion!(proximity_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Proximity);
#[rustfmt::skip]
test_criterion!(asc_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Asc(S("asc_desc_rank")));
#[rustfmt::skip]
test_criterion!(asc_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Asc(S("asc_desc_rank")));
#[rustfmt::skip]
test_criterion!(desc_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Desc(S("asc_desc_rank")));
#[rustfmt::skip]
test_criterion!(desc_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Desc(S("asc_desc_rank")));
#[rustfmt::skip]
test_criterion!(asc_unexisting_field_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Asc(S("unexisting_field")));
#[rustfmt::skip]
test_criterion!(asc_unexisting_field_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Asc(S("unexisting_field")));
#[rustfmt::skip]
test_criterion!(desc_unexisting_field_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, Desc(S("unexisting_field")));
#[rustfmt::skip]
test_criterion!(desc_unexisting_field_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, Desc(S("unexisting_field")));
test_criterion!(none_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![]);
test_criterion!(none_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![]);
test_criterion!(words_allow_typo, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Words]);
test_criterion!(attribute_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Attribute]);
test_criterion!(attribute_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![Attribute]);
test_criterion!(exactness_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Exactness]);
test_criterion!(exactness_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![Exactness]);
test_criterion!(proximity_allow_typo, DISALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, vec![Proximity]);
test_criterion!(proximity_disallow_typo, DISALLOW_OPTIONAL_WORDS, DISALLOW_TYPOS, vec![Proximity]);
test_criterion!(
asc_allow_typo,
DISALLOW_OPTIONAL_WORDS,
ALLOW_TYPOS,
vec![Asc(S("asc_desc_rank"))]
);
test_criterion!(
asc_disallow_typo,
DISALLOW_OPTIONAL_WORDS,
DISALLOW_TYPOS,
vec![Asc(S("asc_desc_rank"))]
);
test_criterion!(
desc_allow_typo,
DISALLOW_OPTIONAL_WORDS,
ALLOW_TYPOS,
vec![Desc(S("asc_desc_rank"))]
);
test_criterion!(
desc_disallow_typo,
DISALLOW_OPTIONAL_WORDS,
DISALLOW_TYPOS,
vec![Desc(S("asc_desc_rank"))]
);
test_criterion!(
asc_unexisting_field_allow_typo,
DISALLOW_OPTIONAL_WORDS,
ALLOW_TYPOS,
vec![Asc(S("unexisting_field"))]
);
test_criterion!(
asc_unexisting_field_disallow_typo,
DISALLOW_OPTIONAL_WORDS,
DISALLOW_TYPOS,
vec![Asc(S("unexisting_field"))]
);
test_criterion!(
desc_unexisting_field_allow_typo,
DISALLOW_OPTIONAL_WORDS,
ALLOW_TYPOS,
vec![Desc(S("unexisting_field"))]
);
test_criterion!(
desc_unexisting_field_disallow_typo,
DISALLOW_OPTIONAL_WORDS,
DISALLOW_TYPOS,
vec![Desc(S("unexisting_field"))]
);
test_criterion!(
default_criteria_order,
ALLOW_OPTIONAL_WORDS,
ALLOW_TYPOS,
vec![Words, Typo, Proximity, Attribute, Exactness]
);

#[test]
fn criteria_mixup() {
Expand Down