From 2a72aef2ca68bef553307f77c434da7871ab1f20 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Sun, 21 Apr 2024 14:15:40 +0200 Subject: [PATCH 1/2] Update sentencepiece.py --- src/mistral_common/tokens/tokenizers/sentencepiece.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mistral_common/tokens/tokenizers/sentencepiece.py b/src/mistral_common/tokens/tokenizers/sentencepiece.py index 8cd2a9c..12f62a8 100644 --- a/src/mistral_common/tokens/tokenizers/sentencepiece.py +++ b/src/mistral_common/tokens/tokenizers/sentencepiece.py @@ -289,6 +289,8 @@ def encode_assistant_message(self, message: AssistantMessage, is_before_last_use ] elif message.content: curr_tokens = self.tokenizer.encode(message.content, bos=False, eos=False) + elif message.content == "": + raise TokenizerException(f"Empty assistant message.") else: raise TokenizerException(f"Invalid assistant message: {message.content}") From e06e82a2026946ba3b84849371e1d934cc4f5c80 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Sun, 21 Apr 2024 14:16:21 +0200 Subject: [PATCH 2/2] Update src/mistral_common/tokens/tokenizers/sentencepiece.py --- src/mistral_common/tokens/tokenizers/sentencepiece.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mistral_common/tokens/tokenizers/sentencepiece.py b/src/mistral_common/tokens/tokenizers/sentencepiece.py index 12f62a8..bc8089b 100644 --- a/src/mistral_common/tokens/tokenizers/sentencepiece.py +++ b/src/mistral_common/tokens/tokenizers/sentencepiece.py @@ -290,7 +290,7 @@ def encode_assistant_message(self, message: AssistantMessage, is_before_last_use elif message.content: curr_tokens = self.tokenizer.encode(message.content, bos=False, eos=False) elif message.content == "": - raise TokenizerException(f"Empty assistant message.") + raise TokenizerException("Empty assistant message.") else: raise TokenizerException(f"Invalid assistant message: {message.content}")