Skip to content

Commit

Permalink
Using the manual reload policy in IndexWriter. (#1667)
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton authored Nov 9, 2022
1 parent 8ca12a5 commit 3edf0a2
Show file tree
Hide file tree
Showing 28 changed files with 189 additions and 162 deletions.
9 changes: 7 additions & 2 deletions src/core/searcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::{fmt, io};

use crate::collector::Collector;
use crate::core::{Executor, SegmentReader};
use crate::query::Query;
use crate::query::{EnableScoring, Query};
use crate::schema::{Document, Schema, Term};
use crate::space_usage::SearcherSpaceUsage;
use crate::store::{CacheStats, StoreReader};
Expand Down Expand Up @@ -199,7 +199,12 @@ impl Searcher {
executor: &Executor,
) -> crate::Result<C::Fruit> {
let scoring_enabled = collector.requires_scoring();
let weight = query.weight(self, scoring_enabled)?;
let enabled_scoring = if scoring_enabled {
EnableScoring::Enabled(self)
} else {
EnableScoring::Disabled(self.schema())
};
let weight = query.weight(enabled_scoring)?;
let segment_readers = self.segment_readers();
let fruits = executor.map(
|(segment_ord, segment_reader)| {
Expand Down
7 changes: 4 additions & 3 deletions src/fastfield/bytes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ pub use self::writer::BytesFastFieldWriter;

#[cfg(test)]
mod tests {
use crate::query::TermQuery;
use crate::query::{EnableScoring, TermQuery};
use crate::schema::{BytesOptions, IndexRecordOption, Schema, Value, FAST, INDEXED, STORED};
use crate::{DocAddress, DocSet, Index, Searcher, Term};

Expand Down Expand Up @@ -82,7 +82,7 @@ mod tests {
let field = searcher.schema().get_field("string_bytes").unwrap();
let term = Term::from_field_bytes(field, b"lucene".as_ref());
let term_query = TermQuery::new(term, IndexRecordOption::Basic);
let term_weight = term_query.specialized_weight(&searcher, true)?;
let term_weight = term_query.specialized_weight(EnableScoring::Enabled(&searcher))?;
let term_scorer = term_weight.specialized_scorer(searcher.segment_reader(0), 1.0)?;
assert_eq!(term_scorer.doc(), 0u32);
Ok(())
Expand All @@ -95,7 +95,8 @@ mod tests {
let field = searcher.schema().get_field("string_bytes").unwrap();
let term = Term::from_field_bytes(field, b"lucene".as_ref());
let term_query = TermQuery::new(term, IndexRecordOption::Basic);
let term_weight_err = term_query.specialized_weight(&searcher, false);
let term_weight_err =
term_query.specialized_weight(EnableScoring::Disabled(searcher.schema()));
assert!(matches!(
term_weight_err,
Err(crate::TantivyError::SchemaError(_))
Expand Down
6 changes: 3 additions & 3 deletions src/fieldnorm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ mod tests {

use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr};
use crate::fieldnorm::{FieldNormReader, FieldNormsSerializer, FieldNormsWriter};
use crate::query::{Query, TermQuery};
use crate::query::{EnableScoring, Query, TermQuery};
use crate::schema::{
Field, IndexRecordOption, Schema, TextFieldIndexing, TextOptions, STORED, TEXT,
};
Expand Down Expand Up @@ -112,7 +112,7 @@ mod tests {
Term::from_field_text(text, "hello"),
IndexRecordOption::WithFreqs,
);
let weight = query.weight(&searcher, true)?;
let weight = query.weight(EnableScoring::Enabled(&searcher))?;
let mut scorer = weight.scorer(searcher.segment_reader(0), 1.0f32)?;
assert_eq!(scorer.doc(), 0);
assert!((scorer.score() - 0.22920431).abs() < 0.001f32);
Expand Down Expand Up @@ -141,7 +141,7 @@ mod tests {
Term::from_field_text(text, "hello"),
IndexRecordOption::WithFreqs,
);
let weight = query.weight(&searcher, true)?;
let weight = query.weight(EnableScoring::Enabled(&searcher))?;
let mut scorer = weight.scorer(searcher.segment_reader(0), 1.0f32)?;
assert_eq!(scorer.doc(), 0);
assert!((scorer.score() - 0.22920431).abs() < 0.001f32);
Expand Down
13 changes: 4 additions & 9 deletions src/indexer/index_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ use crate::indexer::index_writer_status::IndexWriterStatus;
use crate::indexer::operation::DeleteOperation;
use crate::indexer::stamper::Stamper;
use crate::indexer::{MergePolicy, SegmentEntry, SegmentWriter};
use crate::query::{Query, TermQuery};
use crate::query::{EnableScoring, Query, TermQuery};
use crate::schema::{Document, IndexRecordOption, Term};
use crate::{FutureResult, IndexReader, Opstamp};
use crate::{FutureResult, Opstamp};

// Size of the margin for the `memory_arena`. A segment is closed when the remaining memory
// in the `memory_arena` goes below MARGIN_IN_BYTES.
Expand Down Expand Up @@ -57,7 +57,6 @@ pub struct IndexWriter {
_directory_lock: Option<DirectoryLock>,

index: Index,
index_reader: IndexReader,

memory_arena_in_bytes_per_thread: usize,

Expand Down Expand Up @@ -298,8 +297,6 @@ impl IndexWriter {

memory_arena_in_bytes_per_thread,
index: index.clone(),
index_reader: index.reader()?,

index_writer_status: IndexWriterStatus::from(document_receiver),
operation_sender: document_sender,

Expand Down Expand Up @@ -681,8 +678,7 @@ impl IndexWriter {
/// only after calling `commit()`.
#[doc(hidden)]
pub fn delete_query(&self, query: Box<dyn Query>) -> crate::Result<Opstamp> {
let weight = query.weight(&self.index_reader.searcher(), false)?;

let weight = query.weight(EnableScoring::Disabled(&self.index.schema()))?;
let opstamp = self.stamper.stamp();
let delete_operation = DeleteOperation {
opstamp,
Expand Down Expand Up @@ -763,8 +759,7 @@ impl IndexWriter {
match user_op {
UserOperation::Delete(term) => {
let query = TermQuery::new(term, IndexRecordOption::Basic);
let weight = query.weight(&self.index_reader.searcher(), false)?;

let weight = query.weight(EnableScoring::Disabled(&self.index.schema()))?;
let delete_operation = DeleteOperation {
opstamp,
target: weight,
Expand Down
8 changes: 4 additions & 4 deletions src/indexer/merger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1064,7 +1064,7 @@ mod tests {
};
use crate::collector::{Count, FacetCollector};
use crate::core::Index;
use crate::query::{AllQuery, BooleanQuery, Scorer, TermQuery};
use crate::query::{AllQuery, BooleanQuery, EnableScoring, Scorer, TermQuery};
use crate::schema::{
Cardinality, Document, Facet, FacetOptions, IndexRecordOption, NumericOptions, Term,
TextFieldIndexing, INDEXED, TEXT,
Expand Down Expand Up @@ -1977,7 +1977,7 @@ mod tests {
let reader = index.reader()?;
let searcher = reader.searcher();
let mut term_scorer = term_query
.specialized_weight(&searcher, true)?
.specialized_weight(EnableScoring::Enabled(&searcher))?
.specialized_scorer(searcher.segment_reader(0u32), 1.0)?;
assert_eq!(term_scorer.doc(), 0);
assert_nearly_equals!(term_scorer.block_max_score(), 0.0079681855);
Expand All @@ -1992,7 +1992,7 @@ mod tests {
assert_eq!(searcher.segment_readers().len(), 2);
for segment_reader in searcher.segment_readers() {
let mut term_scorer = term_query
.specialized_weight(&searcher, true)?
.specialized_weight(EnableScoring::Enabled(&searcher))?
.specialized_scorer(segment_reader, 1.0)?;
// the difference compared to before is intrinsic to the bm25 formula. no worries
// there.
Expand All @@ -2017,7 +2017,7 @@ mod tests {

let segment_reader = searcher.segment_reader(0u32);
let mut term_scorer = term_query
.specialized_weight(&searcher, true)?
.specialized_weight(EnableScoring::Enabled(&searcher))?
.specialized_scorer(segment_reader, 1.0)?;
// the difference compared to before is intrinsic to the bm25 formula. no worries there.
for doc in segment_reader.doc_ids_alive() {
Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,8 @@ pub mod fastfield;
pub mod fieldnorm;
pub mod positions;
pub mod postings;

/// Module containing the different query implementations.
pub mod query;
pub mod schema;
pub mod space_usage;
Expand Down
12 changes: 6 additions & 6 deletions src/query/all_query.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use crate::core::{Searcher, SegmentReader};
use crate::core::SegmentReader;
use crate::docset::{DocSet, TERMINATED};
use crate::query::boost_query::BoostScorer;
use crate::query::explanation::does_not_match;
use crate::query::{Explanation, Query, Scorer, Weight};
use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight};
use crate::{DocId, Score};

/// Query that matches all of the documents.
Expand All @@ -12,7 +12,7 @@ use crate::{DocId, Score};
pub struct AllQuery;

impl Query for AllQuery {
fn weight(&self, _: &Searcher, _: bool) -> crate::Result<Box<dyn Weight>> {
fn weight(&self, _: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
Ok(Box::new(AllWeight))
}
}
Expand Down Expand Up @@ -72,7 +72,7 @@ impl Scorer for AllScorer {
mod tests {
use super::AllQuery;
use crate::docset::TERMINATED;
use crate::query::Query;
use crate::query::{EnableScoring, Query};
use crate::schema::{Schema, TEXT};
use crate::Index;

Expand All @@ -95,7 +95,7 @@ mod tests {
let index = create_test_index()?;
let reader = index.reader()?;
let searcher = reader.searcher();
let weight = AllQuery.weight(&searcher, false)?;
let weight = AllQuery.weight(EnableScoring::Disabled(&index.schema()))?;
{
let reader = searcher.segment_reader(0);
let mut scorer = weight.scorer(reader, 1.0)?;
Expand All @@ -118,7 +118,7 @@ mod tests {
let index = create_test_index()?;
let reader = index.reader()?;
let searcher = reader.searcher();
let weight = AllQuery.weight(&searcher, false)?;
let weight = AllQuery.weight(EnableScoring::Disabled(searcher.schema()))?;
let reader = searcher.segment_reader(0);
{
let mut scorer = weight.scorer(reader, 2.0)?;
Expand Down
11 changes: 4 additions & 7 deletions src/query/boolean_query/boolean_query.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use super::boolean_weight::BooleanWeight;
use crate::query::{Occur, Query, SumWithCoordsCombiner, TermQuery, Weight};
use crate::query::{EnableScoring, Occur, Query, SumWithCoordsCombiner, TermQuery, Weight};
use crate::schema::{IndexRecordOption, Term};
use crate::Searcher;

/// The boolean query returns a set of documents
/// that matches the Boolean combination of constituent subqueries.
Expand Down Expand Up @@ -143,17 +142,15 @@ impl From<Vec<(Occur, Box<dyn Query>)>> for BooleanQuery {
}

impl Query for BooleanQuery {
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result<Box<dyn Weight>> {
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
let sub_weights = self
.subqueries
.iter()
.map(|&(ref occur, ref subquery)| {
Ok((*occur, subquery.weight(searcher, scoring_enabled)?))
})
.map(|&(ref occur, ref subquery)| Ok((*occur, subquery.weight(enable_scoring)?)))
.collect::<crate::Result<_>>()?;
Ok(Box::new(BooleanWeight::new(
sub_weights,
scoring_enabled,
enable_scoring.is_scoring_enabled(),
Box::new(SumWithCoordsCombiner::default),
)))
}
Expand Down
17 changes: 10 additions & 7 deletions src/query/boolean_query/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ mod tests {
use crate::query::score_combiner::SumWithCoordsCombiner;
use crate::query::term_query::TermScorer;
use crate::query::{
Intersection, Occur, Query, QueryParser, RequiredOptionalScorer, Scorer, TermQuery,
EnableScoring, Intersection, Occur, Query, QueryParser, RequiredOptionalScorer, Scorer,
TermQuery,
};
use crate::schema::*;
use crate::{assert_nearly_equals, DocAddress, DocId, Index, Score};
Expand Down Expand Up @@ -54,7 +55,7 @@ mod tests {
let query_parser = QueryParser::for_index(&index, vec![text_field]);
let query = query_parser.parse_query("+a")?;
let searcher = index.reader()?.searcher();
let weight = query.weight(&searcher, true)?;
let weight = query.weight(EnableScoring::Enabled(&searcher))?;
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert!(scorer.is::<TermScorer>());
Ok(())
Expand All @@ -67,13 +68,13 @@ mod tests {
let searcher = index.reader()?.searcher();
{
let query = query_parser.parse_query("+a +b +c")?;
let weight = query.weight(&searcher, true)?;
let weight = query.weight(EnableScoring::Enabled(&searcher))?;
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert!(scorer.is::<Intersection<TermScorer>>());
}
{
let query = query_parser.parse_query("+a +(b c)")?;
let weight = query.weight(&searcher, true)?;
let weight = query.weight(EnableScoring::Enabled(&searcher))?;
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert!(scorer.is::<Intersection<Box<dyn Scorer>>>());
}
Expand All @@ -87,7 +88,7 @@ mod tests {
let searcher = index.reader()?.searcher();
{
let query = query_parser.parse_query("+a b")?;
let weight = query.weight(&searcher, true)?;
let weight = query.weight(EnableScoring::Enabled(&searcher))?;
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert!(scorer.is::<RequiredOptionalScorer<
Box<dyn Scorer>,
Expand All @@ -97,7 +98,7 @@ mod tests {
}
{
let query = query_parser.parse_query("+a b")?;
let weight = query.weight(&searcher, false)?;
let weight = query.weight(EnableScoring::Disabled(searcher.schema()))?;
let scorer = weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert!(scorer.is::<TermScorer>());
}
Expand Down Expand Up @@ -241,7 +242,9 @@ mod tests {
let searcher = reader.searcher();
let boolean_query =
BooleanQuery::new(vec![(Occur::Should, term_a), (Occur::Should, term_b)]);
let boolean_weight = boolean_query.weight(&searcher, true).unwrap();
let boolean_weight = boolean_query
.weight(EnableScoring::Enabled(&searcher))
.unwrap();
{
let mut boolean_scorer = boolean_weight.scorer(searcher.segment_reader(0u32), 1.0)?;
assert_eq!(boolean_scorer.doc(), 0u32);
Expand Down
10 changes: 5 additions & 5 deletions src/query/boost_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ use std::fmt;

use crate::fastfield::AliveBitSet;
use crate::query::explanation::does_not_match;
use crate::query::{Explanation, Query, Scorer, Weight};
use crate::{DocId, DocSet, Score, Searcher, SegmentReader, Term};
use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight};
use crate::{DocId, DocSet, Score, SegmentReader, Term};

/// `BoostQuery` is a wrapper over a query used to boost its score.
///
Expand Down Expand Up @@ -38,9 +38,9 @@ impl fmt::Debug for BoostQuery {
}

impl Query for BoostQuery {
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result<Box<dyn Weight>> {
let weight_without_boost = self.query.weight(searcher, scoring_enabled)?;
let boosted_weight = if scoring_enabled {
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
let weight_without_boost = self.query.weight(enable_scoring)?;
let boosted_weight = if enable_scoring.is_scoring_enabled() {
Box::new(BoostWeight::new(weight_without_boost, self.boost))
} else {
weight_without_boost
Expand Down
10 changes: 5 additions & 5 deletions src/query/const_score_query.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::fmt;

use crate::query::{Explanation, Query, Scorer, Weight};
use crate::{DocId, DocSet, Score, Searcher, SegmentReader, TantivyError, Term};
use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight};
use crate::{DocId, DocSet, Score, SegmentReader, TantivyError, Term};

/// `ConstScoreQuery` is a wrapper over a query to provide a constant score.
/// It can avoid unnecessary score computation on the wrapped query.
Expand Down Expand Up @@ -36,9 +36,9 @@ impl fmt::Debug for ConstScoreQuery {
}

impl Query for ConstScoreQuery {
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result<Box<dyn Weight>> {
let inner_weight = self.query.weight(searcher, scoring_enabled)?;
Ok(if scoring_enabled {
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
let inner_weight = self.query.weight(enable_scoring)?;
Ok(if enable_scoring.is_scoring_enabled() {
Box::new(ConstWeight::new(inner_weight, self.score))
} else {
inner_weight
Expand Down
10 changes: 5 additions & 5 deletions src/query/disjunction_max_query.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use tantivy_query_grammar::Occur;

use crate::query::{BooleanWeight, DisjunctionMaxCombiner, Query, Weight};
use crate::{Score, Searcher, Term};
use crate::query::{BooleanWeight, DisjunctionMaxCombiner, EnableScoring, Query, Weight};
use crate::{Score, Term};

/// The disjunction max query кeturns documents matching one or more wrapped queries,
/// called query clauses or clauses.
Expand Down Expand Up @@ -91,16 +91,16 @@ impl Clone for DisjunctionMaxQuery {
}

impl Query for DisjunctionMaxQuery {
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result<Box<dyn Weight>> {
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
let disjuncts = self
.disjuncts
.iter()
.map(|disjunct| Ok((Occur::Should, disjunct.weight(searcher, scoring_enabled)?)))
.map(|disjunct| Ok((Occur::Should, disjunct.weight(enable_scoring)?)))
.collect::<crate::Result<_>>()?;
let tie_breaker = self.tie_breaker;
Ok(Box::new(BooleanWeight::new(
disjuncts,
scoring_enabled,
enable_scoring.is_scoring_enabled(),
Box::new(move || DisjunctionMaxCombiner::with_tie_breaker(tie_breaker)),
)))
}
Expand Down
Loading

0 comments on commit 3edf0a2

Please sign in to comment.