diff --git a/README.md b/README.md index 3c7147ff..37c9ff22 100644 --- a/README.md +++ b/README.md @@ -27,11 +27,23 @@ let splitter = TextSplitter::default() .with_trim_chunks(true); let chunks = splitter.chunks("your document text", max_characters); +println!("{}", chunks.count()) ``` ### With Huggingface Tokenizer -Requires the `tokenizers` feature to be activated. +Requires the `tokenizers` feature to be activated and adding `tokenizers` to dependencies. The example below, using `from_pretrained()`, also requires tokenizers `http` feature to be enabled. +
+ +Click to show Cargo.toml. + + +```toml +[dependencies] +text-splitter = { version = "0.6", features = ["tokenizers"] } +tokenizers = { version = "0.15", features = ["http"] } +``` +
```rust use text_splitter::TextSplitter; @@ -46,11 +58,23 @@ let splitter = TextSplitter::new(tokenizer) .with_trim_chunks(true); let chunks = splitter.chunks("your document text", max_tokens); +println!("{}", chunks.count()) ``` ### With Tiktoken Tokenizer -Requires the `tiktoken-rs` feature to be activated. +Requires the `tiktoken-rs` feature to be activated and adding `tiktoken-rs` to dependencies. + +
+ +Click to show Cargo.toml. + + +```toml +text-splitter = { version = "0.6", features = ["tiktoken-rs"] } +tiktoken-rs = "0.5" +``` +
```rust use text_splitter::TextSplitter; @@ -65,6 +89,7 @@ let splitter = TextSplitter::new(tokenizer) .with_trim_chunks(true); let chunks = splitter.chunks("your document text", max_tokens); +println!("{}", chunks.count()) ``` ### Using a Range for Chunk Capacity @@ -85,6 +110,7 @@ let max_characters = 500..2000; let splitter = TextSplitter::default().with_trim_chunks(true); let chunks = splitter.chunks("your document text", max_characters); +println!("{}", chunks.count()) ``` ## Method