From 52a494bd3b75cda4bb045650ae73bd8dce51b335 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 8 Jun 2022 16:54:16 +0200 Subject: [PATCH 1/3] Add the new pagination.limited_to and faceting.max_values_per_facet settings --- milli/src/update/settings.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 9363d8eb6..86c168be3 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -104,6 +104,8 @@ pub struct Settings<'a, 't, 'u, 'i> { exact_words: Setting>, /// Attributes on which typo tolerance is disabled. exact_attributes: Setting>, + max_values_per_facet: Setting, + limit_pagination_to: Setting, } impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { @@ -129,6 +131,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { min_word_len_two_typos: Setting::NotSet, min_word_len_one_typo: Setting::NotSet, exact_attributes: Setting::NotSet, + max_values_per_facet: Setting::NotSet, + limit_pagination_to: Setting::NotSet, indexer_config, } } @@ -246,6 +250,22 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.exact_attributes = Setting::Reset; } + pub fn set_max_values_per_facet(&mut self, value: usize) { + self.max_values_per_facet = Setting::Set(value); + } + + pub fn reset_max_values_per_facet(&mut self) { + self.max_values_per_facet = Setting::Reset; + } + + pub fn set_limit_pagination_to(&mut self, value: usize) { + self.limit_pagination_to = Setting::Set(value); + } + + pub fn reset_limit_pagination_to(&mut self) { + self.limit_pagination_to = Setting::Reset; + } + fn reindex(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()> where F: Fn(UpdateIndexingStep) + Sync, @@ -1525,6 +1545,8 @@ mod tests { min_word_len_one_typo, exact_words, exact_attributes, + max_values_per_facet, + limit_pagination_to, } = builder; assert!(matches!(searchable_fields, Setting::NotSet)); @@ -1541,5 +1563,7 @@ mod tests { assert!(matches!(min_word_len_one_typo, Setting::NotSet)); assert!(matches!(exact_words, Setting::NotSet)); assert!(matches!(exact_attributes, Setting::NotSet)); + assert!(matches!(max_values_per_facet, Setting::NotSet)); + assert!(matches!(limit_pagination_to, Setting::NotSet)); } } From 69931e50d20ed2ad3ec56f2323b57ec71131befc Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 8 Jun 2022 17:28:23 +0200 Subject: [PATCH 2/3] Add the max_values_by_facet setting to the database --- milli/src/index.rs | 14 ++++++++++ milli/src/lib.rs | 2 +- milli/src/search/facet/facet_distribution.rs | 28 +++++++++++--------- milli/src/search/facet/mod.rs | 2 +- milli/src/search/mod.rs | 2 +- milli/src/update/settings.rs | 24 ++++++++++++++--- 6 files changed, 52 insertions(+), 20 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 41bd85b93..f7e3aa14a 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -56,6 +56,8 @@ pub mod main_key { pub const TWO_TYPOS_WORD_LEN: &str = "two-typos-word-len"; pub const EXACT_WORDS: &str = "exact-words"; pub const EXACT_ATTRIBUTES: &str = "exact-attributes"; + pub const MAX_VALUES_PER_FACET: &str = "max-values-per-facet"; + pub const PAGINATION_LIMITED_TO: &str = "pagination-limited-to"; } pub mod db_name { @@ -1087,6 +1089,18 @@ impl Index { self.main.delete::<_, Str>(txn, main_key::EXACT_ATTRIBUTES)?; Ok(()) } + + pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result> { + self.main.get::<_, Str, OwnedType>(txn, main_key::MAX_VALUES_PER_FACET) + } + + pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: usize) -> heed::Result<()> { + self.main.put::<_, Str, OwnedType>(txn, main_key::MAX_VALUES_PER_FACET, &val) + } + + pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn) -> heed::Result { + self.main.delete::<_, Str>(txn, main_key::MAX_VALUES_PER_FACET) + } } #[cfg(test)] diff --git a/milli/src/lib.rs b/milli/src/lib.rs index f28677ed8..81cd057d5 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -38,7 +38,7 @@ pub use self::heed_codec::{ pub use self::index::Index; pub use self::search::{ FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWord, - MatchingWords, Search, SearchResult, + MatchingWords, Search, SearchResult, DEFAULT_VALUES_PER_FACET, }; pub type Result = std::result::Result; diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 8069abede..b2718a490 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -15,7 +15,7 @@ use crate::{FieldId, Index, Result}; /// The default number of values by facets that will /// be fetched from the key-value store. -const DEFAULT_VALUES_BY_FACET: usize = 100; +pub const DEFAULT_VALUES_PER_FACET: usize = 100; /// Threshold on the number of candidates that will make /// the system to choose between one algorithm or another. @@ -24,7 +24,7 @@ const CANDIDATES_THRESHOLD: u64 = 3000; pub struct FacetDistribution<'a> { facets: Option>, candidates: Option, - max_values_by_facet: usize, + max_values_per_facet: usize, rtxn: &'a heed::RoTxn<'a>, index: &'a Index, } @@ -34,7 +34,7 @@ impl<'a> FacetDistribution<'a> { FacetDistribution { facets: None, candidates: None, - max_values_by_facet: DEFAULT_VALUES_BY_FACET, + max_values_per_facet: DEFAULT_VALUES_PER_FACET, rtxn, index, } @@ -45,8 +45,8 @@ impl<'a> FacetDistribution<'a> { self } - pub fn max_values_by_facet(&mut self, max: usize) -> &mut Self { - self.max_values_by_facet = max; + pub fn max_values_per_facet(&mut self, max: usize) -> &mut Self { + self.max_values_per_facet = max; self } @@ -82,7 +82,8 @@ impl<'a> FacetDistribution<'a> { let ((_, _, value), ()) = result?; *distribution.entry(value.to_string()).or_insert(0) += 1; - if distribution.len() - distribution_prelength == self.max_values_by_facet { + if distribution.len() - distribution_prelength == self.max_values_per_facet + { break; } } @@ -108,7 +109,7 @@ impl<'a> FacetDistribution<'a> { .or_insert_with(|| (original_value, 0)); *count += 1; - if normalized_distribution.len() == self.max_values_by_facet { + if normalized_distribution.len() == self.max_values_per_facet { break; } } @@ -141,7 +142,7 @@ impl<'a> FacetDistribution<'a> { if !docids.is_empty() { distribution.insert(value.to_string(), docids.len()); } - if distribution.len() == self.max_values_by_facet { + if distribution.len() == self.max_values_per_facet { break; } } @@ -164,7 +165,7 @@ impl<'a> FacetDistribution<'a> { if !docids.is_empty() { distribution.insert(original.to_string(), docids.len()); } - if distribution.len() == self.max_values_by_facet { + if distribution.len() == self.max_values_per_facet { break; } } @@ -186,7 +187,7 @@ impl<'a> FacetDistribution<'a> { for result in range { let ((_, _, value, _), docids) = result?; distribution.insert(value.to_string(), docids.len()); - if distribution.len() == self.max_values_by_facet { + if distribution.len() == self.max_values_per_facet { break; } } @@ -202,7 +203,7 @@ impl<'a> FacetDistribution<'a> { for result in iter { let ((_, normalized_value), (original_value, docids)) = result?; normalized_distribution.insert(normalized_value, (original_value, docids.len())); - if normalized_distribution.len() == self.max_values_by_facet { + if normalized_distribution.len() == self.max_values_per_facet { break; } } @@ -290,12 +291,13 @@ impl<'a> FacetDistribution<'a> { impl fmt::Debug for FacetDistribution<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let FacetDistribution { facets, candidates, max_values_by_facet, rtxn: _, index: _ } = self; + let FacetDistribution { facets, candidates, max_values_per_facet, rtxn: _, index: _ } = + self; f.debug_struct("FacetDistribution") .field("facets", facets) .field("candidates", candidates) - .field("max_values_by_facet", max_values_by_facet) + .field("max_values_per_facet", max_values_per_facet) .finish() } } diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index c8f91352b..e3ac95882 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -1,4 +1,4 @@ -pub use self::facet_distribution::FacetDistribution; +pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET}; pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange}; pub use self::facet_string::FacetStringIter; pub use self::filter::Filter; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 62a7815b0..1c363e142 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -15,7 +15,7 @@ use log::debug; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; -pub use self::facet::{FacetDistribution, FacetNumberIter, Filter}; +pub use self::facet::{FacetDistribution, FacetNumberIter, Filter, DEFAULT_VALUES_PER_FACET}; use self::fst_utils::{Complement, Intersection, StartsWith, Union}; pub use self::matches::{ FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWord, MatchingWords, diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 86c168be3..ce4bfbc70 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -105,7 +105,7 @@ pub struct Settings<'a, 't, 'u, 'i> { /// Attributes on which typo tolerance is disabled. exact_attributes: Setting>, max_values_per_facet: Setting, - limit_pagination_to: Setting, + pagination_limited_to: Setting, } impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { @@ -132,7 +132,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { min_word_len_one_typo: Setting::NotSet, exact_attributes: Setting::NotSet, max_values_per_facet: Setting::NotSet, - limit_pagination_to: Setting::NotSet, + pagination_limited_to: Setting::NotSet, indexer_config, } } @@ -632,6 +632,20 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { Ok(()) } + fn update_max_values_per_facet(&mut self) -> Result<()> { + match self.max_values_per_facet { + Setting::Set(max) => { + self.index.put_max_values_per_facet(&mut self.wtxn, max)?; + } + Setting::Reset => { + self.index.delete_max_values_per_facet(&mut self.wtxn)?; + } + Setting::NotSet => (), + } + + Ok(()) + } + pub fn execute(mut self, progress_callback: F) -> Result<()> where F: Fn(UpdateIndexingStep) + Sync, @@ -650,6 +664,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.update_authorize_typos()?; self.update_min_typo_word_len()?; self.update_exact_words()?; + self.update_max_values_per_facet()?; + self.update_pagination_limited_to()?; // If there is new faceted fields we indicate that we must reindex as we must // index new fields as facets. It means that the distinct attribute, @@ -1546,7 +1562,7 @@ mod tests { exact_words, exact_attributes, max_values_per_facet, - limit_pagination_to, + pagination_limited_to, } = builder; assert!(matches!(searchable_fields, Setting::NotSet)); @@ -1564,6 +1580,6 @@ mod tests { assert!(matches!(exact_words, Setting::NotSet)); assert!(matches!(exact_attributes, Setting::NotSet)); assert!(matches!(max_values_per_facet, Setting::NotSet)); - assert!(matches!(limit_pagination_to, Setting::NotSet)); + assert!(matches!(pagination_limited_to, Setting::NotSet)); } } From 445d5474cca00142f8b509f9e058760f0f3ad9c9 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 8 Jun 2022 17:31:21 +0200 Subject: [PATCH 3/3] Add the pagination_limited_to setting to the database --- milli/src/index.rs | 16 ++++++++++++++++ milli/src/update/settings.rs | 22 ++++++++++++++++++---- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index f7e3aa14a..28c870592 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1101,6 +1101,22 @@ impl Index { pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn) -> heed::Result { self.main.delete::<_, Str>(txn, main_key::MAX_VALUES_PER_FACET) } + + pub fn pagination_limited_to(&self, txn: &RoTxn) -> heed::Result> { + self.main.get::<_, Str, OwnedType>(txn, main_key::PAGINATION_LIMITED_TO) + } + + pub(crate) fn put_pagination_limited_to( + &self, + txn: &mut RwTxn, + val: usize, + ) -> heed::Result<()> { + self.main.put::<_, Str, OwnedType>(txn, main_key::PAGINATION_LIMITED_TO, &val) + } + + pub(crate) fn delete_pagination_limited_to(&self, txn: &mut RwTxn) -> heed::Result { + self.main.delete::<_, Str>(txn, main_key::PAGINATION_LIMITED_TO) + } } #[cfg(test)] diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index ce4bfbc70..174d7073d 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -258,12 +258,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.max_values_per_facet = Setting::Reset; } - pub fn set_limit_pagination_to(&mut self, value: usize) { - self.limit_pagination_to = Setting::Set(value); + pub fn set_pagination_limited_to(&mut self, value: usize) { + self.pagination_limited_to = Setting::Set(value); } - pub fn reset_limit_pagination_to(&mut self) { - self.limit_pagination_to = Setting::Reset; + pub fn reset_pagination_limited_to(&mut self) { + self.pagination_limited_to = Setting::Reset; } fn reindex(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()> @@ -646,6 +646,20 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { Ok(()) } + fn update_pagination_limited_to(&mut self) -> Result<()> { + match self.pagination_limited_to { + Setting::Set(max) => { + self.index.put_pagination_limited_to(&mut self.wtxn, max)?; + } + Setting::Reset => { + self.index.delete_pagination_limited_to(&mut self.wtxn)?; + } + Setting::NotSet => (), + } + + Ok(()) + } + pub fn execute(mut self, progress_callback: F) -> Result<()> where F: Fn(UpdateIndexingStep) + Sync,