From 1f7d20ad4fe6ad91339263d3745bbf54d9aef66c Mon Sep 17 00:00:00 2001 From: edknv <109497216+edknv@users.noreply.github.com> Date: Thu, 7 Dec 2023 16:42:01 -0800 Subject: [PATCH] fix tokenizer cache file name bug (#36) --- crossfit/op/tokenize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crossfit/op/tokenize.py b/crossfit/op/tokenize.py index 3f549e3..dee2ac4 100644 --- a/crossfit/op/tokenize.py +++ b/crossfit/op/tokenize.py @@ -159,7 +159,7 @@ def from_pretrained(cls, name, cache_dir=None): # Save vocabulary to disk # `save_vocabulary()` automatically appends `-vocab.txt` suffix. - vocab_path = tokenizer.save_vocabulary(cache_dir, "{tokenizer_class}")[0] + vocab_path = tokenizer.save_vocabulary(cache_dir, f"{tokenizer_class}")[0] # Hash the vocabulary and save it hash_vocab(vocab_path, hashed_vocab_path)