Skip to content

Commit

Permalink
tokenizer: trust-remote-code
Browse files Browse the repository at this point in the history
  • Loading branch information
baberabb committed Oct 1, 2024
1 parent 15ffb0d commit cd77207
Showing 1 changed file with 12 additions and 3 deletions.
15 changes: 12 additions & 3 deletions lm_eval/models/api_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,12 @@ def __init__(
seed: int = 1234,
max_length: Optional[int] = 2048,
add_bos_token: bool = False,
custom_prefix_token_id=None,
custom_prefix_token_id: int = None,
# send the requests as tokens or strings
tokenized_requests=True,
tokenized_requests: bool = True,
trust_remote_code: bool = False,
revision: Optional[str] = "main",
use_fast_tokenizer: bool = True,
**kwargs,
) -> None:
super().__init__()
Expand Down Expand Up @@ -128,7 +131,10 @@ def __init__(
import transformers

self.tokenizer = transformers.AutoTokenizer.from_pretrained(
self.tokenizer if self.tokenizer else self.model
self.tokenizer if self.tokenizer else self.model,
trust_remote_code=trust_remote_code,
revision=revision,
use_fast=use_fast_tokenizer,
)
# Not used as the API will handle padding but to mirror the behavior of the HFLM
self.tokenizer = configure_pad_token(self.tokenizer)
Expand All @@ -153,6 +159,9 @@ def __init__(
assert isinstance(tokenizer, str), "tokenizer must be a string"
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
tokenizer,
trust_remote_code=trust_remote_code,
revision=revision,
use_fast=use_fast_tokenizer,
)

@abc.abstractmethod
Expand Down

0 comments on commit cd77207

Please sign in to comment.