diff --git a/src/core/index.rs b/src/core/index.rs index f3af70ef5a..a4ddb17906 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -7,6 +7,7 @@ use std::sync::Arc; use super::segment::Segment; use super::IndexSettings; +use crate::core::index_simple_writer::IndexSimpleWriter; use crate::core::{ Executor, IndexMeta, SegmentId, SegmentMeta, SegmentMetaInventory, META_FILEPATH, }; @@ -163,6 +164,24 @@ impl IndexBuilder { self.create(mmap_directory) } + /// Dragons ahead!!! + /// + /// The point of this API is to let users create a simple index with a single segment + /// and without starting any thread. + /// + /// Do not use this method if you are not sure what you are doing. + /// + /// It expects an originally empty directory, and will not run any GC operation. + #[doc(hidden)] + pub fn create_simple( + self, + dir: impl Into>, + ) -> crate::Result { + let index = self.create(dir)?; + let index_simple_writer = IndexSimpleWriter::new(index, 100_000_000)?; + Ok(index_simple_writer) + } + /// Creates a new index in a temp directory. /// /// The index will use the `MMapDirectory` in a newly created directory. @@ -608,10 +627,12 @@ impl fmt::Debug for Index { #[cfg(test)] mod tests { + use crate::collector::Count; use crate::directory::{RamDirectory, WatchCallback}; - use crate::schema::{Field, Schema, INDEXED, TEXT}; + use crate::query::TermQuery; + use crate::schema::{Field, IndexRecordOption, Schema, INDEXED, TEXT}; use crate::tokenizer::TokenizerManager; - use crate::{Directory, Index, IndexBuilder, IndexReader, IndexSettings, ReloadPolicy}; + use crate::{Directory, Index, IndexBuilder, IndexReader, IndexSettings, ReloadPolicy, Term}; #[test] fn test_indexer_for_field() { @@ -877,4 +898,28 @@ mod tests { ); Ok(()) } + + #[test] + fn test_simple_writer() -> crate::Result<()> { + let mut schema_builder = Schema::builder(); + let text_field = schema_builder.add_text_field("text", TEXT); + let schema = schema_builder.build(); + let directory = RamDirectory::default(); + let mut simple_writer = Index::builder() + .schema(schema.clone()) + .create_simple(directory)?; + for _ in 0..10 { + let doc = doc!(text_field=>"hello"); + simple_writer.add_document(doc)?; + } + let index = simple_writer.finalize()?; + let searcher = index.reader()?.searcher(); + let term_query = TermQuery::new( + Term::from_field_text(text_field, "hello"), + IndexRecordOption::Basic, + ); + let count = searcher.search(&term_query, &Count)?; + assert_eq!(count, 10); + Ok(()) + } } diff --git a/src/core/index_simple_writer.rs b/src/core/index_simple_writer.rs new file mode 100644 index 0000000000..b80ba76191 --- /dev/null +++ b/src/core/index_simple_writer.rs @@ -0,0 +1,47 @@ +use crate::indexer::operation::AddOperation; +use crate::indexer::segment_updater::save_metas; +use crate::indexer::SegmentWriter; +use crate::{Directory, Document, Index, IndexMeta, Opstamp, Segment}; + +#[doc(hidden)] +pub struct IndexSimpleWriter { + segment_writer: SegmentWriter, + segment: Segment, + opstamp: Opstamp, +} + +impl IndexSimpleWriter { + pub fn new(index: Index, mem_budget: usize) -> crate::Result { + let segment = index.new_segment(); + let segment_writer = SegmentWriter::for_segment(mem_budget, segment.clone())?; + Ok(Self { + segment_writer, + segment, + opstamp: 0, + }) + } + + pub fn add_document(&mut self, document: Document) -> crate::Result<()> { + let opstamp = self.opstamp; + self.opstamp += 1; + self.segment_writer + .add_document(AddOperation { opstamp, document }) + } + + pub fn finalize(self) -> crate::Result { + let max_doc = self.segment_writer.max_doc(); + self.segment_writer.finalize()?; + let segment: Segment = self.segment.with_max_doc(max_doc); + let index = segment.index(); + let index_meta = IndexMeta { + index_settings: index.settings().clone(), + segments: vec![segment.meta().clone()], + schema: index.schema().clone(), + opstamp: 0, + payload: None, + }; + save_metas(&index_meta, index.directory())?; + index.directory().sync_directory()?; + Ok(segment.index().clone()) + } +} diff --git a/src/core/mod.rs b/src/core/mod.rs index 6ebb652473..ca99a73c7c 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -1,6 +1,7 @@ mod executor; pub mod index; mod index_meta; +mod index_simple_writer; mod inverted_index_reader; pub mod searcher; mod segment; @@ -17,6 +18,7 @@ pub use self::index::{Index, IndexBuilder}; pub use self::index_meta::{ IndexMeta, IndexSettings, IndexSortByField, Order, SegmentMeta, SegmentMetaInventory, }; +pub use self::index_simple_writer::IndexSimpleWriter; pub use self::inverted_index_reader::InvertedIndexReader; pub use self::searcher::{Searcher, SearcherGeneration}; pub use self::segment::Segment; diff --git a/src/lib.rs b/src/lib.rs index 40ff110280..f4dcae6c0d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -299,9 +299,9 @@ use serde::{Deserialize, Serialize}; pub use self::docset::{DocSet, TERMINATED}; pub use crate::core::{ - Executor, Index, IndexBuilder, IndexMeta, IndexSettings, IndexSortByField, InvertedIndexReader, - Order, Searcher, SearcherGeneration, Segment, SegmentComponent, SegmentId, SegmentMeta, - SegmentReader, + Executor, Index, IndexBuilder, IndexMeta, IndexSettings, IndexSimpleWriter, IndexSortByField, + InvertedIndexReader, Order, Searcher, SearcherGeneration, Segment, SegmentComponent, SegmentId, + SegmentMeta, SegmentReader, }; pub use crate::directory::Directory; pub use crate::indexer::demuxer::*;