diff --git a/src/transformers/convert_slow_tokenizer.py b/src/transformers/convert_slow_tokenizer.py index 1934e35a575d14..8ff40c573e3c2b 100644 --- a/src/transformers/convert_slow_tokenizer.py +++ b/src/transformers/convert_slow_tokenizer.py @@ -1134,9 +1134,9 @@ def tokenizer(self, proto): ) tokenizer.add_special_tokens( [ - AddedToken("", normalized=True), - AddedToken("", normalized=True), - AddedToken("", normalized=True), + AddedToken("", normalized=False), + AddedToken("", normalized=False), + AddedToken("", normalized=False), ] ) else: diff --git a/src/transformers/models/llama/tokenization_llama_fast.py b/src/transformers/models/llama/tokenization_llama_fast.py index c3946d83b0e0b8..095f65b628b202 100644 --- a/src/transformers/models/llama/tokenization_llama_fast.py +++ b/src/transformers/models/llama/tokenization_llama_fast.py @@ -77,6 +77,7 @@ class LlamaTokenizerFast(PreTrainedTokenizerFast): vocab_files_names = VOCAB_FILES_NAMES slow_tokenizer_class = LlamaTokenizer padding_side = "left" + model_input_names = ["input_ids", "attention_mask"] def __init__( self,