Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ValueError: A custom logits processor of type <class 'transformers.generation.logits_process.ForceTokensLogitsProcessor'>.... #13

Open
JensCoetsiers opened this issue May 5, 2024 · 0 comments

Comments

@JensCoetsiers
Copy link

Code as in the guide:

import gc
import numpy as np
from tqdm import tqdm
from torch.utils.data import DataLoader
from transformers.models.whisper.english_normalizer import BasicTextNormalizer

eval_dataloader = DataLoader(test_dataset, batch_size=4, collate_fn=data_collator)
forced_decoder_ids = processor.get_decoder_prompt_ids(language="Dutch", task="transcribe")
normalizer = BasicTextNormalizer()

predictions = []
references = []
normalized_predictions = []
normalized_references = []

model.eval()
for step, batch in enumerate(tqdm(eval_dataloader)):
with torch.cuda.amp.autocast():
with torch.no_grad():
generated_tokens = (
model.generate(
input_features=batch["input_features"].to("cuda"),
forced_decoder_ids=forced_decoder_ids,
max_new_tokens=255,
)
.cpu()
.numpy()
)
labels = batch["labels"].cpu().numpy()
labels = np.where(labels != -100, labels, processor.tokenizer.pad_token_id)
decoded_preds = processor.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
decoded_labels = processor.tokenizer.batch_decode(labels, skip_special_tokens=True)
predictions.extend(decoded_preds)
references.extend(decoded_labels)
normalized_predictions.extend([normalizer(pred).strip() for pred in decoded_preds])
normalized_references.extend([normalizer(label).strip() for label in decoded_labels])
del generated_tokens, labels, batch
gc.collect()
wer = 100 * metric.compute(predictions=predictions, references=references)
normalized_wer = 100 * metric.compute(predictions=normalized_predictions, references=normalized_references)
eval_metrics = {"eval/wer": wer, "eval/normalized_wer": normalized_wer}

print(f"{wer=} and {normalized_wer=}")
print(eval_metrics)

Full error:


ValueError Traceback (most recent call last)
Cell In[33], line 21
18 with torch.cuda.amp.autocast():
19 with torch.no_grad():
20 generated_tokens = (
---> 21 model.generate(
22 input_features=batch["input_features"].to("cuda"),
23 forced_decoder_ids=forced_decoder_ids,
24 max_new_tokens=255,
25 )
26 .cpu()
27 .numpy()
28 )
29 labels = batch["labels"].cpu().numpy()
30 labels = np.where(labels != -100, labels, processor.tokenizer.pad_token_id)

File c:\Users\jensc\Desktop\BP_Models\WhisperV3\lib\site-packages\peft\peft_model.py:612, in PeftModel.generate(self, *args, **kwargs)
610 with self._enable_peft_forward_hooks(*args, **kwargs):
611 kwargs = {k: v for k, v in kwargs.items() if k not in self.special_peft_forward_args}
--> 612 return self.get_base_model().generate(*args, **kwargs)

File c:\Users\jensc\Desktop\BP_Models\WhisperV3\lib\site-packages\transformers\models\whisper\generation_whisper.py:543, in WhisperGenerationMixin.generate(self, input_features, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, return_timestamps, task, language, is_multilingual, prompt_ids, condition_on_prev_tokens, temperature, compression_ratio_threshold, logprob_threshold, no_speech_threshold, num_segment_frames, attention_mask, time_precision, return_token_timestamps, return_segments, return_dict_in_generate, **kwargs)
540 if temperature is not None:
541 kwargs["temperature"] = temperature
--> 543 outputs = super().generate(
544 input_features,
545 generation_config=generation_config,
546 logits_processor=logits_processor,
547 stopping_criteria=stopping_criteria,
548 prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
549 synced_gpus=synced_gpus,
550 **kwargs,
551 )
553 if generation_config.return_token_timestamps and hasattr(generation_config, "alignment_heads"):
554 outputs["token_timestamps"] = self._extract_token_timestamps(
555 outputs, generation_config.alignment_heads, num_frames=generation_config.num_frames
556 )

File c:\Users\jensc\Desktop\BP_Models\WhisperV3\lib\site-packages\torch\utils_contextlib.py:115, in context_decorator..decorate_context(*args, **kwargs)
112 @functools.wraps(func)
113 def decorate_context(*args, **kwargs):
114 with ctx_factory():
--> 115 return func(*args, **kwargs)
...
915 )
916 default_list.extend(custom_list)
917 return default_list

ValueError: A custom logits processor of type <class 'transformers.generation.logits_process.ForceTokensLogitsProcessor'> with values <transformers.generation.logits_process.ForceTokensLogitsProcessor object at 0x0000015389623160> has been passed to .generate(), but it has already been created with the values <transformers.generation.logits_process.ForceTokensLogitsProcessor object at 0x0000015572CF9300>. <transformers.generation.logits_process.ForceTokensLogitsProcessor object at 0x0000015572CF9300> has been created by passing the corresponding arguments to generate or by the model's config default values. If you just want to change the default values of logits processor consider passing them as arguments to .generate() instead of using a custom logits processor.
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant