Skip to content

Commit

Permalink
apply review recommendations
Browse files Browse the repository at this point in the history
  • Loading branch information
trinity-1686a committed Feb 20, 2023
1 parent 1bac8a7 commit 37c8257
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 31 deletions.
12 changes: 9 additions & 3 deletions src/query/phrase_prefix_query/phrase_prefix_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ use crate::query::bm25::Bm25Weight;
use crate::query::{EnableScoring, Query, RangeQuery, Weight};
use crate::schema::{Field, IndexRecordOption, Term};

const DEFAULT_MAX_EXPANSIONS: u32 = 50;

/// `PhrasePrefixQuery` matches a specific sequence of words followed by term of which only a
/// prefix is known.
///
Expand Down Expand Up @@ -57,7 +59,7 @@ impl PhrasePrefixQuery {
field,
prefix: terms.pop().unwrap(),
phrase_terms: terms,
max_expansions: 50,
max_expansions: DEFAULT_MAX_EXPANSIONS,
}
}

Expand Down Expand Up @@ -86,7 +88,10 @@ impl PhrasePrefixQuery {
/// a specialized type [`PhraseQueryWeight`] instead of a Boxed trait.
/// If the query was only one term long, this returns `None` wherease [`Query::weight`]
/// returns a boxed [`RangeWeight`]
pub(crate) fn phrase_query_weight(
///
/// Returns `None`, if phrase_terms is empty, which happens if the phrase prefix query was
/// built with a single term.
pub(crate) fn phrase_prefix_query_weight(
&self,
enable_scoring: EnableScoring<'_>,
) -> crate::Result<Option<PhrasePrefixWeight>> {
Expand Down Expand Up @@ -127,9 +132,10 @@ impl Query for PhrasePrefixQuery {
///
/// See [`Weight`].
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
if let Some(phrase_weight) = self.phrase_query_weight(enable_scoring)? {
if let Some(phrase_weight) = self.phrase_prefix_query_weight(enable_scoring)? {
Ok(Box::new(phrase_weight))
} else {
// There are no prefix. Let's just match the suffix.
let end_term = if let Some(end_value) = prefix_end(&self.prefix.1.value_bytes()) {
let mut end_term = Term::with_capacity(end_value.len());
end_term.set_field_and_type(self.field, self.prefix.1.typ());
Expand Down
66 changes: 43 additions & 23 deletions src/query/phrase_prefix_query/phrase_prefix_scorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,26 @@ use crate::query::Scorer;
use crate::{DocId, Score};

enum PhraseKind<TPostings: Postings> {
SinglePrefix(u32, TPostings, Vec<u32>),
SinglePrefix {
position_offset: u32,
postings: TPostings,
positions: Vec<u32>,
},
MultiPrefix(PhraseScorer<TPostings>),
}

impl<TPostings: Postings> PhraseKind<TPostings> {
fn get_intersection(&mut self) -> &[u32] {
match self {
PhraseKind::SinglePrefix(offset, postings, indexes) => {
if indexes.is_empty() {
postings.positions_with_offset(*offset, indexes);
PhraseKind::SinglePrefix {
position_offset,
postings,
positions,
} => {
if positions.is_empty() {
postings.positions_with_offset(*position_offset, positions);
}
indexes
positions
}
PhraseKind::MultiPrefix(postings) => postings.get_intersection(),
}
Expand All @@ -28,8 +36,12 @@ impl<TPostings: Postings> PhraseKind<TPostings> {
impl<TPostings: Postings> DocSet for PhraseKind<TPostings> {
fn advance(&mut self) -> DocId {
match self {
PhraseKind::SinglePrefix(_, postings, indexes) => {
indexes.clear();
PhraseKind::SinglePrefix {
postings,
positions,
..
} => {
positions.clear();
postings.advance()
}
PhraseKind::MultiPrefix(postings) => postings.advance(),
Expand All @@ -38,22 +50,26 @@ impl<TPostings: Postings> DocSet for PhraseKind<TPostings> {

fn doc(&self) -> DocId {
match self {
PhraseKind::SinglePrefix(_, postings, _) => postings.doc(),
PhraseKind::SinglePrefix { postings, .. } => postings.doc(),
PhraseKind::MultiPrefix(postings) => postings.doc(),
}
}

fn size_hint(&self) -> u32 {
match self {
PhraseKind::SinglePrefix(_, postings, _) => postings.size_hint(),
PhraseKind::SinglePrefix { postings, .. } => postings.size_hint(),
PhraseKind::MultiPrefix(postings) => postings.size_hint(),
}
}

fn seek(&mut self, target: DocId) -> DocId {
match self {
PhraseKind::SinglePrefix(_, postings, indexes) => {
indexes.clear();
PhraseKind::SinglePrefix {
postings,
positions,
..
} => {
positions.clear();
postings.seek(target)
}
PhraseKind::MultiPrefix(postings) => postings.seek(target),
Expand All @@ -64,8 +80,8 @@ impl<TPostings: Postings> DocSet for PhraseKind<TPostings> {
impl<TPostings: Postings> Scorer for PhraseKind<TPostings> {
fn score(&mut self) -> Score {
match self {
PhraseKind::SinglePrefix(_, _postings, indexes) => {
if indexes.is_empty() {
PhraseKind::SinglePrefix { positions, .. } => {
if positions.is_empty() {
0.0
} else {
1.0
Expand All @@ -90,13 +106,13 @@ impl<TPostings: Postings> PhrasePrefixScorer<TPostings> {
similarity_weight_opt: Option<Bm25Weight>,
fieldnorm_reader: FieldNormReader,
suffixes: Vec<TPostings>,
suffixe_pos: usize,
suffix_pos: usize,
) -> PhrasePrefixScorer<TPostings> {
// correct indices so we can merge with our suffix term the PhraseScorer doesn't know about
let max_offset = term_postings
.iter()
.map(|(pos, _)| *pos)
.chain(std::iter::once(suffixe_pos))
.chain(std::iter::once(suffix_pos))
.max()
.unwrap();

Expand All @@ -109,22 +125,26 @@ impl<TPostings: Postings> PhrasePrefixScorer<TPostings> {
1,
))
} else {
let (pos, posting) = term_postings
let (pos, postings) = term_postings
.pop()
.expect("PhrasePrefixScorer must have at least two terms");
let offset = suffixe_pos - pos;
PhraseKind::SinglePrefix(offset as u32, posting, Vec::with_capacity(100))
let offset = suffix_pos - pos;
PhraseKind::SinglePrefix {
position_offset: offset as u32,
postings,
positions: Vec::with_capacity(100),
}
};
let mut res = PhrasePrefixScorer {
let mut phrase_prefix_scorer = PhrasePrefixScorer {
phrase_scorer,
suffixes,
suffix_offset: (max_offset - suffixe_pos) as u32,
suffix_offset: (max_offset - suffix_pos) as u32,
phrase_count: 0,
};
if !res.matches_prefix() {
res.advance();
if !phrase_prefix_scorer.matches_prefix() {
phrase_prefix_scorer.advance();
}
res
phrase_prefix_scorer
}

pub fn phrase_count(&self) -> u32 {
Expand Down
13 changes: 8 additions & 5 deletions src/query/phrase_prefix_query/phrase_prefix_weight.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,11 @@ impl PhrasePrefixWeight {

#[cfg(feature = "quickwit")]
{
// TODO implement this on fst too so we don't need to check for feature flag.
// We don't have this on the fst, hence we end up needing a feature flag.
//
// This is not a problem however as we enforce the limit below too.
// The point of `stream.limit` is to limit the number of term dictionary
// blocks being downloaded.
stream = stream.limit(self.max_expansions as u64);
}

Expand Down Expand Up @@ -152,7 +156,6 @@ impl Weight for PhrasePrefixWeight {

#[cfg(test)]
mod tests {
use super::*;
use crate::core::Index;
use crate::docset::TERMINATED;
use crate::query::{EnableScoring, PhrasePrefixQuery, Query};
Expand Down Expand Up @@ -192,7 +195,7 @@ mod tests {
]);
let enable_scoring = EnableScoring::Enabled(&searcher);
let phrase_weight = phrase_query
.phrase_query_weight(enable_scoring)
.phrase_prefix_query_weight(enable_scoring)
.unwrap()
.unwrap();
let mut phrase_scorer = phrase_weight
Expand All @@ -219,7 +222,7 @@ mod tests {
]);
let enable_scoring = EnableScoring::Enabled(&searcher);
let phrase_weight = phrase_query
.phrase_query_weight(enable_scoring)
.phrase_prefix_query_weight(enable_scoring)
.unwrap()
.unwrap();
let mut phrase_scorer = phrase_weight
Expand All @@ -243,7 +246,7 @@ mod tests {
let phrase_query = PhrasePrefixQuery::new(vec![Term::from_field_text(text_field, "c")]);
let enable_scoring = EnableScoring::Enabled(&searcher);
assert!(phrase_query
.phrase_query_weight(enable_scoring)
.phrase_prefix_query_weight(enable_scoring)
.unwrap()
.is_none());
let weight = phrase_query.weight(enable_scoring).unwrap();
Expand Down

0 comments on commit 37c8257

Please sign in to comment.