Skip to content

Commit

Permalink
Small fix for SacreBLEUScore and the mteval-v13a tokenizer (#1778)
Browse files Browse the repository at this point in the history
  • Loading branch information
RistoAle97 authored May 13, 2023
1 parent 17c0e9f commit 2d35650
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/torchmetrics/functional/text/sacre_bleu.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def _tokenize_base(cls, line: str) -> str:

@classmethod
def _tokenize_13a(cls, line: str) -> str:
"""Tokenizes an line using a relatively minimal tokenization that is equivalent to mteval-v13a, used by WMT.
"""Tokenizes a line using a relatively minimal tokenization that is equivalent to mteval-v13a, used by WMT.
Args:
line: input sentence
Expand All @@ -193,7 +193,7 @@ def _tokenize_13a(cls, line: str) -> str:
line = line.replace("&lt;", "<")
line = line.replace("&gt;", ">")

return cls._tokenize_regex(line)
return cls._tokenize_regex(f" {line} ")

@classmethod
def _tokenize_zh(cls, line: str) -> str:
Expand Down

0 comments on commit 2d35650

Please sign in to comment.