Skip to content

Commit

Permalink
Adding dragon API to build index without any thread. (#1496)
Browse files Browse the repository at this point in the history
Closes #1487
  • Loading branch information
fulmicoton authored Sep 1, 2022
1 parent 70e58ad commit 08c4412
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 6 deletions.
2 changes: 1 addition & 1 deletion common/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ impl<W: TerminatingWrite> TerminatingWrite for CountingWriter<W> {
pub struct AntiCallToken(());

/// Trait used to indicate when no more write need to be done on a writer
pub trait TerminatingWrite: Write + Send {
pub trait TerminatingWrite: Write + Send + Sync {
/// Indicate that the writer will no longer be used. Internally call terminate_ref.
fn terminate(mut self) -> io::Result<()>
where Self: Sized {
Expand Down
50 changes: 48 additions & 2 deletions src/core/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use std::sync::Arc;

use super::segment::Segment;
use super::IndexSettings;
use crate::core::single_segment_index_writer::SingleSegmentIndexWriter;
use crate::core::{
Executor, IndexMeta, SegmentId, SegmentMeta, SegmentMetaInventory, META_FILEPATH,
};
Expand Down Expand Up @@ -163,6 +164,25 @@ impl IndexBuilder {
self.create(mmap_directory)
}

/// Dragons ahead!!!
///
/// The point of this API is to let users create a simple index with a single segment
/// and without starting any thread.
///
/// Do not use this method if you are not sure what you are doing.
///
/// It expects an originally empty directory, and will not run any GC operation.
#[doc(hidden)]
pub fn single_segment_index_writer(
self,
dir: impl Into<Box<dyn Directory>>,
mem_budget: usize,
) -> crate::Result<SingleSegmentIndexWriter> {
let index = self.create(dir)?;
let index_simple_writer = SingleSegmentIndexWriter::new(index, mem_budget)?;
Ok(index_simple_writer)
}

/// Creates a new index in a temp directory.
///
/// The index will use the `MMapDirectory` in a newly created directory.
Expand Down Expand Up @@ -608,10 +628,12 @@ impl fmt::Debug for Index {

#[cfg(test)]
mod tests {
use crate::collector::Count;
use crate::directory::{RamDirectory, WatchCallback};
use crate::schema::{Field, Schema, INDEXED, TEXT};
use crate::query::TermQuery;
use crate::schema::{Field, IndexRecordOption, Schema, INDEXED, TEXT};
use crate::tokenizer::TokenizerManager;
use crate::{Directory, Index, IndexBuilder, IndexReader, IndexSettings, ReloadPolicy};
use crate::{Directory, Index, IndexBuilder, IndexReader, IndexSettings, ReloadPolicy, Term};

#[test]
fn test_indexer_for_field() {
Expand Down Expand Up @@ -877,4 +899,28 @@ mod tests {
);
Ok(())
}

#[test]
fn test_single_segment_index_writer() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let directory = RamDirectory::default();
let mut single_segment_index_writer = Index::builder()
.schema(schema)
.single_segment_index_writer(directory, 10_000_000)?;
for _ in 0..10 {
let doc = doc!(text_field=>"hello");
single_segment_index_writer.add_document(doc)?;
}
let index = single_segment_index_writer.finalize()?;
let searcher = index.reader()?.searcher();
let term_query = TermQuery::new(
Term::from_field_text(text_field, "hello"),
IndexRecordOption::Basic,
);
let count = searcher.search(&term_query, &Count)?;
assert_eq!(count, 10);
Ok(())
}
}
2 changes: 2 additions & 0 deletions src/core/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ mod segment;
mod segment_component;
mod segment_id;
mod segment_reader;
mod single_segment_index_writer;

use std::path::Path;

Expand All @@ -23,6 +24,7 @@ pub use self::segment::Segment;
pub use self::segment_component::SegmentComponent;
pub use self::segment_id::SegmentId;
pub use self::segment_reader::SegmentReader;
pub use self::single_segment_index_writer::SingleSegmentIndexWriter;

/// The meta file contains all the information about the list of segments and the schema
/// of the index.
Expand Down
47 changes: 47 additions & 0 deletions src/core/single_segment_index_writer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
use crate::indexer::operation::AddOperation;
use crate::indexer::segment_updater::save_metas;
use crate::indexer::SegmentWriter;
use crate::{Directory, Document, Index, IndexMeta, Opstamp, Segment};

#[doc(hidden)]
pub struct SingleSegmentIndexWriter {
segment_writer: SegmentWriter,
segment: Segment,
opstamp: Opstamp,
}

impl SingleSegmentIndexWriter {
pub fn new(index: Index, mem_budget: usize) -> crate::Result<Self> {
let segment = index.new_segment();
let segment_writer = SegmentWriter::for_segment(mem_budget, segment.clone())?;
Ok(Self {
segment_writer,
segment,
opstamp: 0,
})
}

pub fn add_document(&mut self, document: Document) -> crate::Result<()> {
let opstamp = self.opstamp;
self.opstamp += 1;
self.segment_writer
.add_document(AddOperation { opstamp, document })
}

pub fn finalize(self) -> crate::Result<Index> {
let max_doc = self.segment_writer.max_doc();
self.segment_writer.finalize()?;
let segment: Segment = self.segment.with_max_doc(max_doc);
let index = segment.index();
let index_meta = IndexMeta {
index_settings: index.settings().clone(),
segments: vec![segment.meta().clone()],
schema: index.schema(),
opstamp: 0,
payload: None,
};
save_metas(&index_meta, index.directory())?;
index.directory().sync_directory()?;
Ok(segment.index().clone())
}
}
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ pub use self::docset::{DocSet, TERMINATED};
pub use crate::core::{
Executor, Index, IndexBuilder, IndexMeta, IndexSettings, IndexSortByField, InvertedIndexReader,
Order, Searcher, SearcherGeneration, Segment, SegmentComponent, SegmentId, SegmentMeta,
SegmentReader,
SegmentReader, SingleSegmentIndexWriter,
};
pub use crate::directory::Directory;
pub use crate::indexer::demuxer::*;
Expand Down
2 changes: 1 addition & 1 deletion src/postings/postings_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ pub(crate) struct IndexingPosition {
/// and building a `Segment` in anonymous memory.
///
/// `PostingsWriter` writes in a `MemoryArena`.
pub(crate) trait PostingsWriter {
pub(crate) trait PostingsWriter: Send + Sync {
/// Record that a document contains a term at a given position.
///
/// * doc - the document id
Expand Down
2 changes: 1 addition & 1 deletion src/postings/recorder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ impl<'a> Iterator for VInt32Reader<'a> {
/// * the document id
/// * the term frequency
/// * the term positions
pub(crate) trait Recorder: Copy + Default + 'static {
pub(crate) trait Recorder: Copy + Default + Send + Sync + 'static {
/// Returns the current document
fn current_doc(&self) -> u32;
/// Starts recording information about a new document
Expand Down

0 comments on commit 08c4412

Please sign in to comment.