apply review recommendations

quickwit-oss · Feb 20, 2023 · 37c8257 · 37c8257
1 parent 1bac8a7
commit 37c8257
Show file tree

Hide file tree

Showing 3 changed files with 60 additions and 31 deletions.
diff --git a/src/query/phrase_prefix_query/phrase_prefix_query.rs b/src/query/phrase_prefix_query/phrase_prefix_query.rs
@@ -5,6 +5,8 @@ use crate::query::bm25::Bm25Weight;
 use crate::query::{EnableScoring, Query, RangeQuery, Weight};
 use crate::schema::{Field, IndexRecordOption, Term};
 
+const DEFAULT_MAX_EXPANSIONS: u32 = 50;
+
 /// `PhrasePrefixQuery` matches a specific sequence of words followed by term of which only a
 /// prefix is known.
 ///
@@ -57,7 +59,7 @@ impl PhrasePrefixQuery {
             field,
             prefix: terms.pop().unwrap(),
             phrase_terms: terms,
-            max_expansions: 50,
+            max_expansions: DEFAULT_MAX_EXPANSIONS,
         }
     }
 
@@ -86,7 +88,10 @@ impl PhrasePrefixQuery {
     /// a specialized type [`PhraseQueryWeight`] instead of a Boxed trait.
     /// If the query was only one term long, this returns `None` wherease [`Query::weight`]
     /// returns a boxed [`RangeWeight`]
-    pub(crate) fn phrase_query_weight(
+    ///    
+    /// Returns `None`, if phrase_terms is empty, which happens if the phrase prefix query was
+    /// built with a single term.
+    pub(crate) fn phrase_prefix_query_weight(
         &self,
         enable_scoring: EnableScoring<'_>,
     ) -> crate::Result<Option<PhrasePrefixWeight>> {
@@ -127,9 +132,10 @@ impl Query for PhrasePrefixQuery {
     ///
     /// See [`Weight`].
     fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
-        if let Some(phrase_weight) = self.phrase_query_weight(enable_scoring)? {
+        if let Some(phrase_weight) = self.phrase_prefix_query_weight(enable_scoring)? {
             Ok(Box::new(phrase_weight))
         } else {
+            // There are no prefix. Let's just match the suffix.
             let end_term = if let Some(end_value) = prefix_end(&self.prefix.1.value_bytes()) {
                 let mut end_term = Term::with_capacity(end_value.len());
                 end_term.set_field_and_type(self.field, self.prefix.1.typ());

diff --git a/src/query/phrase_prefix_query/phrase_prefix_scorer.rs b/src/query/phrase_prefix_query/phrase_prefix_scorer.rs
@@ -7,18 +7,26 @@ use crate::query::Scorer;
 use crate::{DocId, Score};
 
 enum PhraseKind<TPostings: Postings> {
-    SinglePrefix(u32, TPostings, Vec<u32>),
+    SinglePrefix {
+        position_offset: u32,
+        postings: TPostings,
+        positions: Vec<u32>,
+    },
     MultiPrefix(PhraseScorer<TPostings>),
 }
 
 impl<TPostings: Postings> PhraseKind<TPostings> {
     fn get_intersection(&mut self) -> &[u32] {
         match self {
-            PhraseKind::SinglePrefix(offset, postings, indexes) => {
-                if indexes.is_empty() {
-                    postings.positions_with_offset(*offset, indexes);
+            PhraseKind::SinglePrefix {
+                position_offset,
+                postings,
+                positions,
+            } => {
+                if positions.is_empty() {
+                    postings.positions_with_offset(*position_offset, positions);
                 }
-                indexes
+                positions
             }
             PhraseKind::MultiPrefix(postings) => postings.get_intersection(),
         }
@@ -28,8 +36,12 @@ impl<TPostings: Postings> PhraseKind<TPostings> {
 impl<TPostings: Postings> DocSet for PhraseKind<TPostings> {
     fn advance(&mut self) -> DocId {
         match self {
-            PhraseKind::SinglePrefix(_, postings, indexes) => {
-                indexes.clear();
+            PhraseKind::SinglePrefix {
+                postings,
+                positions,
+                ..
+            } => {
+                positions.clear();
                 postings.advance()
             }
             PhraseKind::MultiPrefix(postings) => postings.advance(),
@@ -38,22 +50,26 @@ impl<TPostings: Postings> DocSet for PhraseKind<TPostings> {
 
     fn doc(&self) -> DocId {
         match self {
-            PhraseKind::SinglePrefix(_, postings, _) => postings.doc(),
+            PhraseKind::SinglePrefix { postings, .. } => postings.doc(),
             PhraseKind::MultiPrefix(postings) => postings.doc(),
         }
     }
 
     fn size_hint(&self) -> u32 {
         match self {
-            PhraseKind::SinglePrefix(_, postings, _) => postings.size_hint(),
+            PhraseKind::SinglePrefix { postings, .. } => postings.size_hint(),
             PhraseKind::MultiPrefix(postings) => postings.size_hint(),
         }
     }
 
     fn seek(&mut self, target: DocId) -> DocId {
         match self {
-            PhraseKind::SinglePrefix(_, postings, indexes) => {
-                indexes.clear();
+            PhraseKind::SinglePrefix {
+                postings,
+                positions,
+                ..
+            } => {
+                positions.clear();
                 postings.seek(target)
             }
             PhraseKind::MultiPrefix(postings) => postings.seek(target),
@@ -64,8 +80,8 @@ impl<TPostings: Postings> DocSet for PhraseKind<TPostings> {
 impl<TPostings: Postings> Scorer for PhraseKind<TPostings> {
     fn score(&mut self) -> Score {
         match self {
-            PhraseKind::SinglePrefix(_, _postings, indexes) => {
-                if indexes.is_empty() {
+            PhraseKind::SinglePrefix { positions, .. } => {
+                if positions.is_empty() {
                     0.0
                 } else {
                     1.0
@@ -90,13 +106,13 @@ impl<TPostings: Postings> PhrasePrefixScorer<TPostings> {
         similarity_weight_opt: Option<Bm25Weight>,
         fieldnorm_reader: FieldNormReader,
         suffixes: Vec<TPostings>,
-        suffixe_pos: usize,
+        suffix_pos: usize,
     ) -> PhrasePrefixScorer<TPostings> {
         // correct indices so we can merge with our suffix term the PhraseScorer doesn't know about
         let max_offset = term_postings
             .iter()
             .map(|(pos, _)| *pos)
-            .chain(std::iter::once(suffixe_pos))
+            .chain(std::iter::once(suffix_pos))
             .max()
             .unwrap();
 
@@ -109,22 +125,26 @@ impl<TPostings: Postings> PhrasePrefixScorer<TPostings> {
                 1,
             ))
         } else {
-            let (pos, posting) = term_postings
+            let (pos, postings) = term_postings
                 .pop()
                 .expect("PhrasePrefixScorer must have at least two terms");
-            let offset = suffixe_pos - pos;
-            PhraseKind::SinglePrefix(offset as u32, posting, Vec::with_capacity(100))
+            let offset = suffix_pos - pos;
+            PhraseKind::SinglePrefix {
+                position_offset: offset as u32,
+                postings,
+                positions: Vec::with_capacity(100),
+            }
         };
-        let mut res = PhrasePrefixScorer {
+        let mut phrase_prefix_scorer = PhrasePrefixScorer {
             phrase_scorer,
             suffixes,
-            suffix_offset: (max_offset - suffixe_pos) as u32,
+            suffix_offset: (max_offset - suffix_pos) as u32,
             phrase_count: 0,
         };
-        if !res.matches_prefix() {
-            res.advance();
+        if !phrase_prefix_scorer.matches_prefix() {
+            phrase_prefix_scorer.advance();
         }
-        res
+        phrase_prefix_scorer
     }
 
     pub fn phrase_count(&self) -> u32 {

diff --git a/src/query/phrase_prefix_query/phrase_prefix_weight.rs b/src/query/phrase_prefix_query/phrase_prefix_weight.rs
@@ -85,7 +85,11 @@ impl PhrasePrefixWeight {
 
         #[cfg(feature = "quickwit")]
         {
-            // TODO implement this on fst too so we don't need to check for feature flag.
+            // We don't have this on the fst, hence  we end up needing a feature flag.
+            //
+            // This is not a problem however as we enforce the limit below too.
+            // The point of `stream.limit` is to limit the number of term dictionary
+            // blocks being downloaded.
             stream = stream.limit(self.max_expansions as u64);
         }
 
@@ -152,7 +156,6 @@ impl Weight for PhrasePrefixWeight {
 
 #[cfg(test)]
 mod tests {
-    use super::*;
     use crate::core::Index;
     use crate::docset::TERMINATED;
     use crate::query::{EnableScoring, PhrasePrefixQuery, Query};
@@ -192,7 +195,7 @@ mod tests {
         ]);
         let enable_scoring = EnableScoring::Enabled(&searcher);
         let phrase_weight = phrase_query
-            .phrase_query_weight(enable_scoring)
+            .phrase_prefix_query_weight(enable_scoring)
             .unwrap()
             .unwrap();
         let mut phrase_scorer = phrase_weight
@@ -219,7 +222,7 @@ mod tests {
         ]);
         let enable_scoring = EnableScoring::Enabled(&searcher);
         let phrase_weight = phrase_query
-            .phrase_query_weight(enable_scoring)
+            .phrase_prefix_query_weight(enable_scoring)
             .unwrap()
             .unwrap();
         let mut phrase_scorer = phrase_weight
@@ -243,7 +246,7 @@ mod tests {
         let phrase_query = PhrasePrefixQuery::new(vec![Term::from_field_text(text_field, "c")]);
         let enable_scoring = EnableScoring::Enabled(&searcher);
         assert!(phrase_query
-            .phrase_query_weight(enable_scoring)
+            .phrase_prefix_query_weight(enable_scoring)
             .unwrap()
             .is_none());
         let weight = phrase_query.weight(enable_scoring).unwrap();