From 53f5e97ae52ffcd50806d1f78224d2d4e1444728 Mon Sep 17 00:00:00 2001 From: JP Hwang Date: Mon, 1 Jul 2024 16:06:47 +0100 Subject: [PATCH 1/2] Add kagome validation --- weaviate/collections/classes/config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index dae73a7d8..288889be3 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -144,6 +144,7 @@ class Tokenization(str, Enum): FIELD = "field" GSE = "gse" TRIGRAM = "trigram" + KAGOME_KR = "kagome_kr" class GenerativeSearches(str, Enum): From 1b7ec9270db7cbc71ed361862c19050f4a7bc4c9 Mon Sep 17 00:00:00 2001 From: JP Hwang Date: Wed, 3 Jul 2024 20:21:54 +0100 Subject: [PATCH 2/2] Add kagome_kr to docstring --- weaviate/collections/classes/config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 288889be3..7bab9a56a 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -136,6 +136,8 @@ class Tokenization(str, Enum): Tokenize using GSE (for Chinese and Japanese). `TRIGRAM` Tokenize into trigrams. + `KAGOME_KR` + Tokenize using the 'Kagome' tokenizer and a Korean MeCab dictionary (for Korean). """ WORD = "word"