ik-analyzer for Rust
[dependencies]
ik-rs = "0.5.0"
#[cfg(test)]
mod test {
use ik_rs::core::ik_segmenter::{IKSegmenter, TokenMode};
#[test]
pub fn test_ik() {
let mut ik = IKSegmenter::new();
let text = "中华人民共和国";
let tokens = ik.tokenize(text, TokenMode::INDEX); // TokenMode::SEARCH
let mut token_texts = Vec::new();
for token in tokens.iter() {
println!("{:?}", token);
token_texts.push(token.get_lexeme_text());
}
assert_eq!(
token_texts,
vec![
"中华人民共和国",
"中华人民",
"中华",
"华人",
"人民共和国",
"人民",
"共和国",
"共和",
"国"
]
)
}
}
High performance
cargo bench
ik_tokenize_benchmark time: [19.366 µs 19.572 µs 19.850 µs]
change: [-1.5364% -0.4029% +0.7357%] (p = 0.51 > 0.05)
use tantivy-ik project
Welcome to rust developer and search engine developer join us, and maintain this project together!
you can PR or submit issue...
and star⭐️ or fork this project to support me!