Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
Merge #372
Browse files Browse the repository at this point in the history
372: Fix Meilisearch 1714 r=Kerollmops a=ManyTheFish

The bug comes from the typo tolerance, to know how many typos are accepted we were counting bytes instead of characters in a word.
On Chinese Script characters, we were allowing  2 typos on 3 characters words.
We are now counting the number of char instead of counting bytes to assign the typo tolerance.

Related to [Meilisearch#1714](meilisearch/meilisearch#1714)

Co-authored-by: many <maxime@meilisearch.com>
  • Loading branch information
bors[bot] and ManyTheFish authored Sep 28, 2021
2 parents 3b47994 + 8046ae4 commit b2a3325
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 1 deletion.
2 changes: 1 addition & 1 deletion milli/src/search/query_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result<Option<O
/// and the provided word length.
fn typos(word: String, authorize_typos: bool) -> QueryKind {
if authorize_typos {
match word.len() {
match word.chars().count() {
0..=4 => QueryKind::exact(word),
5..=8 => QueryKind::tolerant(1, word),
_ => QueryKind::tolerant(2, word),
Expand Down
37 changes: 37 additions & 0 deletions milli/src/update/index_documents/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -981,4 +981,41 @@ mod tests {
let count = index.number_of_documents(&rtxn).unwrap();
assert_eq!(count, 4);
}

#[test]
fn test_meilisearch_1714() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();

let content = documents!([
{"id": "123", "title": "小化妆包" },
{"id": "456", "title": "Ipad 包" }
]);

let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(&mut wtxn, &index, 0);
builder.execute(content, |_, _| ()).unwrap();
wtxn.commit().unwrap();

let rtxn = index.read_txn().unwrap();

// Only the first document should match.
let count = index.word_docids.get(&rtxn, "化妆包").unwrap().unwrap().len();
assert_eq!(count, 1);

// Only the second document should match.
let count = index.word_docids.get(&rtxn, "包").unwrap().unwrap().len();
assert_eq!(count, 1);

let mut search = crate::Search::new(&rtxn, &index);
search.query("化妆包");
search.authorize_typos(true);
search.optional_words(true);

// only 1 document should be returned
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids.len(), 1);
}
}

0 comments on commit b2a3325

Please sign in to comment.