Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Fix: Count the number of char instead of counting bytes to assign the typo tolerance (fix Meilisearch 1714) #372

Merged
merged 2 commits into from
Sep 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion milli/src/search/query_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result<Option<O
/// and the provided word length.
fn typos(word: String, authorize_typos: bool) -> QueryKind {
if authorize_typos {
match word.len() {
match word.chars().count() {
0..=4 => QueryKind::exact(word),
5..=8 => QueryKind::tolerant(1, word),
_ => QueryKind::tolerant(2, word),
Expand Down
37 changes: 37 additions & 0 deletions milli/src/update/index_documents/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -981,4 +981,41 @@ mod tests {
let count = index.number_of_documents(&rtxn).unwrap();
assert_eq!(count, 4);
}

#[test]
fn test_meilisearch_1714() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();

let content = documents!([
{"id": "123", "title": "小化妆包" },
{"id": "456", "title": "Ipad 包" }
]);

let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(&mut wtxn, &index, 0);
builder.execute(content, |_, _| ()).unwrap();
wtxn.commit().unwrap();

let rtxn = index.read_txn().unwrap();

// Only the first document should match.
let count = index.word_docids.get(&rtxn, "化妆包").unwrap().unwrap().len();
assert_eq!(count, 1);

// Only the second document should match.
let count = index.word_docids.get(&rtxn, "包").unwrap().unwrap().len();
assert_eq!(count, 1);

let mut search = crate::Search::new(&rtxn, &index);
search.query("化妆包");
search.authorize_typos(true);
search.optional_words(true);

// only 1 document should be returned
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids.len(), 1);
}
}