From 3ac4906d801009ce095bd59ce732da28b69a5ee6 Mon Sep 17 00:00:00 2001 From: Cno Date: Sun, 11 Aug 2024 16:37:25 +0800 Subject: [PATCH 1/4] chore: add example --- README.md | 27 ++++++++++----------- examples/mod.rs | 63 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 15 deletions(-) create mode 100644 examples/mod.rs diff --git a/README.md b/README.md index 1e0161c..910906c 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,17 @@ -tantivy-jieba -============================ +# tantivy-jieba [![Crates.io version][crate-img]][crate] [![docs.rs][docs-img]][docs] [![Changelog][changelog-img]][changelog] [![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fjiegec%2Ftantivy-jieba.svg?type=shield)](https://app.fossa.io/projects/git%2Bgithub.com%2Fjiegec%2Ftantivy-jieba?ref=badge_shield) - An adapter that bridges between tantivy and jieba-rs. -Usage -=========================== +# Usage Add dependency `tantivy-jieba` to your `Cargo.toml`. -Example ---------------------------- +## Example ```rust use tantivy::tokenizer::*; @@ -25,8 +21,7 @@ assert_eq!(token_stream.next().unwrap().text, "测试"); assert!(token_stream.next().is_none()); ``` -Register tantivy tokenizer ---------------------------- +## Register tantivy tokenizer ```rust use tantivy::schema::Schema; @@ -38,13 +33,15 @@ index.tokenizers() .register("jieba", tokenizer); ``` -[crate-img]: https://img.shields.io/crates/v/tantivy-jieba.svg -[crate]: https://crates.io/crates/tantivy-jieba -[changelog-img]: https://img.shields.io/badge/changelog-online-blue.svg -[changelog]: https://github.com/jiegec/tantivy-jieba/blob/master/CHANGELOG.md -[docs-img]: https://docs.rs/tantivy-jieba/badge.svg -[docs]: https://docs.rs/tantivy-jieba +See [examples/mod.rs](examples/mod.rs) for detailed example. +[crate-img]: https://img.shields.io/crates/v/tantivy-jieba.svg +[crate]: https://crates.io/crates/tantivy-jieba +[changelog-img]: https://img.shields.io/badge/changelog-online-blue.svg +[changelog]: https://github.com/jiegec/tantivy-jieba/blob/master/CHANGELOG.md +[docs-img]: https://docs.rs/tantivy-jieba/badge.svg +[docs]: https://docs.rs/tantivy-jieba ## License + [![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fjiegec%2Ftantivy-jieba.svg?type=large)](https://app.fossa.io/projects/git%2Bgithub.com%2Fjiegec%2Ftantivy-jieba?ref=badge_large) diff --git a/examples/mod.rs b/examples/mod.rs new file mode 100644 index 0000000..15208e1 --- /dev/null +++ b/examples/mod.rs @@ -0,0 +1,63 @@ +use tantivy::collector::TopDocs; +use tantivy::doc; +use tantivy::query::QueryParser; +use tantivy::schema::{IndexRecordOption, Schema, TextFieldIndexing, TextOptions, Value}; +use tantivy::tokenizer::*; +use tantivy::Index; +use tantivy::TantivyDocument; + +fn main() { + // Build schema + let mut schema_builder = Schema::builder(); + let name = schema_builder.add_text_field( + "name", + TextOptions::default() + .set_indexing_options( + TextFieldIndexing::default() + .set_tokenizer("jieba") + .set_index_option(IndexRecordOption::WithFreqsAndPositions), + ) + .set_stored(), + ); + let schema = schema_builder.build(); + + // Register tantivy tokenizer + let tokenizer = tantivy_jieba::JiebaTokenizer {}; + let index = Index::create_in_ram(schema); + let analyzer = TextAnalyzer::builder(tokenizer) + .filter(RemoveLongFilter::limit(40)) + .filter(LowerCaser) + .filter(Stemmer::default()) + .build(); + index.tokenizers().register("jieba", analyzer); + + // Index some documents + let mut index_writer = index.writer(50_000_000).unwrap(); + index_writer.add_document(doc!( + name => "张华考上了北京大学;李萍进了中等技术学校;我在百货公司当售货员:我们都有光明的前途", + )).unwrap(); + index_writer.commit().unwrap(); + + // Search keywords + let reader = index.reader().unwrap(); + let searcher = reader.searcher(); + let query_parser = QueryParser::for_index(&index, vec![name]); + let query = query_parser.parse_query("售货员").unwrap(); + let top_docs = searcher.search(&query, &TopDocs::with_limit(10)).unwrap(); + println!("Search Result:"); + for (_, doc_address) in top_docs { + let retrieved_doc: TantivyDocument = searcher.doc(doc_address).unwrap(); + let val = retrieved_doc.get_first(name).unwrap(); + let res = val.as_str().unwrap_or_default().to_string(); + println!("{res}"); + assert_eq!( + res, + *"张华考上了北京大学;李萍进了中等技术学校;我在百货公司当售货员:我们都有光明的前途" + ); + } +} + +#[test] +fn test() { + main(); +} From 116b48479785958806b3ee686695f5461676fa61 Mon Sep 17 00:00:00 2001 From: L0serj3rry Date: Sun, 11 Aug 2024 16:46:18 +0800 Subject: [PATCH 2/4] Update README.md --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 910906c..da453e8 100644 --- a/README.md +++ b/README.md @@ -35,12 +35,12 @@ index.tokenizers() See [examples/mod.rs](examples/mod.rs) for detailed example. -[crate-img]: https://img.shields.io/crates/v/tantivy-jieba.svg -[crate]: https://crates.io/crates/tantivy-jieba +[crate-img]: https://img.shields.io/crates/v/tantivy-jieba.svg +[crate]: https://crates.io/crates/tantivy-jieba [changelog-img]: https://img.shields.io/badge/changelog-online-blue.svg -[changelog]: https://github.com/jiegec/tantivy-jieba/blob/master/CHANGELOG.md -[docs-img]: https://docs.rs/tantivy-jieba/badge.svg -[docs]: https://docs.rs/tantivy-jieba +[changelog]: https://github.com/jiegec/tantivy-jieba/blob/master/CHANGELOG.md +[docs-img]: https://docs.rs/tantivy-jieba/badge.svg +[docs]: https://docs.rs/tantivy-jieba ## License From 1fc77b2e481d514656fe392d1a71c061dd80d2d4 Mon Sep 17 00:00:00 2001 From: L0serj3rry Date: Sun, 11 Aug 2024 16:53:22 +0800 Subject: [PATCH 3/4] chore: adjust title --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index da453e8..c6071da 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,6 @@ See [examples/mod.rs](examples/mod.rs) for detailed example. [docs-img]: https://docs.rs/tantivy-jieba/badge.svg [docs]: https://docs.rs/tantivy-jieba -## License +# License [![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fjiegec%2Ftantivy-jieba.svg?type=large)](https://app.fossa.io/projects/git%2Bgithub.com%2Fjiegec%2Ftantivy-jieba?ref=badge_large) From ab0286cf65ba297e8f5a65a16806757a8406585f Mon Sep 17 00:00:00 2001 From: L0serj3rry Date: Sun, 11 Aug 2024 16:56:57 +0800 Subject: [PATCH 4/4] chore: adjust title --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c6071da..8a0d4e7 100644 --- a/README.md +++ b/README.md @@ -7,11 +7,11 @@ An adapter that bridges between tantivy and jieba-rs. -# Usage +## Usage Add dependency `tantivy-jieba` to your `Cargo.toml`. -## Example +### Example ```rust use tantivy::tokenizer::*; @@ -21,7 +21,7 @@ assert_eq!(token_stream.next().unwrap().text, "测试"); assert!(token_stream.next().is_none()); ``` -## Register tantivy tokenizer +### Register tantivy tokenizer ```rust use tantivy::schema::Schema; @@ -42,6 +42,6 @@ See [examples/mod.rs](examples/mod.rs) for detailed example. [docs-img]: https://docs.rs/tantivy-jieba/badge.svg [docs]: https://docs.rs/tantivy-jieba -# License +## License [![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fjiegec%2Ftantivy-jieba.svg?type=large)](https://app.fossa.io/projects/git%2Bgithub.com%2Fjiegec%2Ftantivy-jieba?ref=badge_large)