Skip to content

Commit

Permalink
Adding dragon API to build index without any thread.
Browse files Browse the repository at this point in the history
Closes #1487
  • Loading branch information
fulmicoton committed Aug 31, 2022
1 parent a451f6d commit e40d638
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 8 deletions.
2 changes: 1 addition & 1 deletion common/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ impl<W: TerminatingWrite> TerminatingWrite for CountingWriter<W> {
pub struct AntiCallToken(());

/// Trait used to indicate when no more write need to be done on a writer
pub trait TerminatingWrite: Write + Send {
pub trait TerminatingWrite: Write + Send + Sync {
/// Indicate that the writer will no longer be used. Internally call terminate_ref.
fn terminate(mut self) -> io::Result<()>
where Self: Sized {
Expand Down
49 changes: 47 additions & 2 deletions src/core/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use std::sync::Arc;

use super::segment::Segment;
use super::IndexSettings;
use crate::core::index_simple_writer::IndexSimpleWriter;
use crate::core::{
Executor, IndexMeta, SegmentId, SegmentMeta, SegmentMetaInventory, META_FILEPATH,
};
Expand Down Expand Up @@ -163,6 +164,24 @@ impl IndexBuilder {
self.create(mmap_directory)
}

/// Dragons ahead!!!
///
/// The point of this API is to let users create a simple index with a single segment
/// and without starting any thread.
///
/// Do not use this method if you are not sure what you are doing.
///
/// It expects an originally empty directory, and will not run any GC operation.
#[doc(hidden)]
pub fn create_simple(
self,
dir: impl Into<Box<dyn Directory>>,
) -> crate::Result<IndexSimpleWriter> {
let index = self.create(dir)?;
let index_simple_writer = IndexSimpleWriter::new(index, 100_000_000)?;
Ok(index_simple_writer)
}

/// Creates a new index in a temp directory.
///
/// The index will use the `MMapDirectory` in a newly created directory.
Expand Down Expand Up @@ -608,10 +627,12 @@ impl fmt::Debug for Index {

#[cfg(test)]
mod tests {
use crate::collector::Count;
use crate::directory::{RamDirectory, WatchCallback};
use crate::schema::{Field, Schema, INDEXED, TEXT};
use crate::query::TermQuery;
use crate::schema::{Field, IndexRecordOption, Schema, INDEXED, TEXT};
use crate::tokenizer::TokenizerManager;
use crate::{Directory, Index, IndexBuilder, IndexReader, IndexSettings, ReloadPolicy};
use crate::{Directory, Index, IndexBuilder, IndexReader, IndexSettings, ReloadPolicy, Term};

#[test]
fn test_indexer_for_field() {
Expand Down Expand Up @@ -877,4 +898,28 @@ mod tests {
);
Ok(())
}

#[test]
fn test_simple_writer() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let directory = RamDirectory::default();
let mut simple_writer = Index::builder()
.schema(schema.clone())
.create_simple(directory)?;
for _ in 0..10 {
let doc = doc!(text_field=>"hello");
simple_writer.add_document(doc)?;
}
let index = simple_writer.finalize()?;
let searcher = index.reader()?.searcher();
let term_query = TermQuery::new(
Term::from_field_text(text_field, "hello"),
IndexRecordOption::Basic,
);
let count = searcher.search(&term_query, &Count)?;
assert_eq!(count, 10);
Ok(())
}
}
47 changes: 47 additions & 0 deletions src/core/index_simple_writer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
use crate::indexer::operation::AddOperation;
use crate::indexer::segment_updater::save_metas;
use crate::indexer::SegmentWriter;
use crate::{Directory, Document, Index, IndexMeta, Opstamp, Segment};

#[doc(hidden)]
pub struct IndexSimpleWriter {
segment_writer: SegmentWriter,
segment: Segment,
opstamp: Opstamp,
}

impl IndexSimpleWriter {
pub fn new(index: Index, mem_budget: usize) -> crate::Result<Self> {
let segment = index.new_segment();
let segment_writer = SegmentWriter::for_segment(mem_budget, segment.clone())?;
Ok(Self {
segment_writer,
segment,
opstamp: 0,
})
}

pub fn add_document(&mut self, document: Document) -> crate::Result<()> {
let opstamp = self.opstamp;
self.opstamp += 1;
self.segment_writer
.add_document(AddOperation { opstamp, document })
}

pub fn finalize(self) -> crate::Result<Index> {
let max_doc = self.segment_writer.max_doc();
self.segment_writer.finalize()?;
let segment: Segment = self.segment.with_max_doc(max_doc);
let index = segment.index();
let index_meta = IndexMeta {
index_settings: index.settings().clone(),
segments: vec![segment.meta().clone()],
schema: index.schema().clone(),
opstamp: 0,
payload: None,
};
save_metas(&index_meta, index.directory())?;
index.directory().sync_directory()?;
Ok(segment.index().clone())
}
}
2 changes: 2 additions & 0 deletions src/core/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod executor;
pub mod index;
mod index_meta;
mod index_simple_writer;
mod inverted_index_reader;
pub mod searcher;
mod segment;
Expand All @@ -17,6 +18,7 @@ pub use self::index::{Index, IndexBuilder};
pub use self::index_meta::{
IndexMeta, IndexSettings, IndexSortByField, Order, SegmentMeta, SegmentMetaInventory,
};
pub use self::index_simple_writer::IndexSimpleWriter;
pub use self::inverted_index_reader::InvertedIndexReader;
pub use self::searcher::{Searcher, SearcherGeneration};
pub use self::segment::Segment;
Expand Down
6 changes: 3 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -299,9 +299,9 @@ use serde::{Deserialize, Serialize};

pub use self::docset::{DocSet, TERMINATED};
pub use crate::core::{
Executor, Index, IndexBuilder, IndexMeta, IndexSettings, IndexSortByField, InvertedIndexReader,
Order, Searcher, SearcherGeneration, Segment, SegmentComponent, SegmentId, SegmentMeta,
SegmentReader,
Executor, Index, IndexBuilder, IndexMeta, IndexSettings, IndexSimpleWriter, IndexSortByField,
InvertedIndexReader, Order, Searcher, SearcherGeneration, Segment, SegmentComponent, SegmentId,
SegmentMeta, SegmentReader,
};
pub use crate::directory::Directory;
pub use crate::indexer::demuxer::*;
Expand Down
2 changes: 1 addition & 1 deletion src/postings/postings_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ pub(crate) struct IndexingPosition {
/// and building a `Segment` in anonymous memory.
///
/// `PostingsWriter` writes in a `MemoryArena`.
pub(crate) trait PostingsWriter {
pub(crate) trait PostingsWriter: Send {
/// Record that a document contains a term at a given position.
///
/// * doc - the document id
Expand Down
2 changes: 1 addition & 1 deletion src/postings/recorder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ impl<'a> Iterator for VInt32Reader<'a> {
/// * the document id
/// * the term frequency
/// * the term positions
pub(crate) trait Recorder: Copy + Default + 'static {
pub(crate) trait Recorder: Copy + Default + Send + 'static {
/// Returns the current document
fn current_doc(&self) -> u32;
/// Starts recording information about a new document
Expand Down

0 comments on commit e40d638

Please sign in to comment.