From 3150f2b489eb74c1763d23e48a6cb20bc1d19f43 Mon Sep 17 00:00:00 2001 From: Dmitry Ustalov Date: Wed, 22 Nov 2023 09:30:33 +0100 Subject: [PATCH 1/4] Explicitly list the exported symbols (#249) --- sacrebleu/__init__.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sacrebleu/__init__.py b/sacrebleu/__init__.py index e7342fc..ea73221 100644 --- a/sacrebleu/__init__.py +++ b/sacrebleu/__init__.py @@ -29,3 +29,15 @@ from .compat import corpus_bleu, raw_corpus_bleu, sentence_bleu # noqa: F401 from .compat import corpus_chrf, sentence_chrf # noqa: F401 from .compat import corpus_ter, sentence_ter # noqa: F401 + +__all__ = [ + 'smart_open', 'SACREBLEU_DIR', 'download_test_set', + 'get_source_file', 'get_reference_files', + 'get_available_testsets', 'get_langpairs_for_testset', + 'extract_word_ngrams', 'extract_char_ngrams', + 'DATASETS', + 'BLEU', 'CHRF', 'TER', + 'corpus_bleu', 'raw_corpus_bleu', 'sentence_bleu', + 'corpus_chrf', 'sentence_chrf', + 'corpus_ter', 'sentence_ter' +] From 275e2920af0f73767597e43acf1c1d05e97227f3 Mon Sep 17 00:00:00 2001 From: Dmitry Ustalov Date: Tue, 28 Nov 2023 16:40:56 +0100 Subject: [PATCH 2/4] Update check-build.yml (#252) * Update check-build.yml to run tests on every push and every pull request --- .github/workflows/check-build.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/check-build.yml b/.github/workflows/check-build.yml index 881091a..d9c113b 100644 --- a/.github/workflows/check-build.yml +++ b/.github/workflows/check-build.yml @@ -1,7 +1,9 @@ name: check-build on: - pull_request + push: + pull_request: + workflow_dispatch: env: PYTHONUTF8: "1" From 9e57a5176ba8f010f03873478528837ea8bf6ee2 Mon Sep 17 00:00:00 2001 From: Dmitry Ustalov Date: Tue, 28 Nov 2023 16:44:25 +0100 Subject: [PATCH 3/4] Use more linters, better (#250) * Use more linters, better * Rely on Ruff during CI only due to the outdated Python versions * Run on Python 3.12, too, and fix typing on Windows * Do not test on Python 3.12 as mecab-python3 is not available yet * Build (but not publish) during CI * Add wheel * Update check-build.yml * Update the dataset URLs to use HTTPS --- .github/workflows/check-build.yml | 16 ++-- .gitignore | 3 +- Makefile | 1 + mypy.ini | 9 ++ sacrebleu/__init__.py | 18 ++-- sacrebleu/compat.py | 2 +- sacrebleu/dataset/__init__.py | 116 +++++++++++++------------- sacrebleu/dataset/__main__.py | 2 + sacrebleu/dataset/base.py | 10 +-- sacrebleu/dataset/wmt_xml.py | 6 +- sacrebleu/sacrebleu.py | 2 +- sacrebleu/significance.py | 15 ++-- sacrebleu/tokenizers/tokenizer_spm.py | 1 - sacrebleu/utils.py | 6 +- scripts/perf_test.py | 4 +- setup.cfg | 4 +- setup.py | 6 +- test.sh | 2 +- test/test_api.py | 8 +- test/test_dataset.py | 2 +- test/test_significance.py | 11 ++- 21 files changed, 134 insertions(+), 110 deletions(-) diff --git a/.github/workflows/check-build.yml b/.github/workflows/check-build.yml index d9c113b..c546c0e 100644 --- a/.github/workflows/check-build.yml +++ b/.github/workflows/check-build.yml @@ -29,8 +29,6 @@ jobs: - os: ubuntu-20.04 python-version: '3.6' # test Python 3.6 on older Ubuntu instead steps: - # - name: update - # run: sudo apt-get -y update - uses: actions/checkout@v3 - name: Setup Python ${{ matrix.python-version }} uses: actions/setup-python@v3 @@ -44,12 +42,18 @@ jobs: run: choco install wget unzip - name: Install python dependencies run: | - python -m pip install --upgrade pip - pip install pytest - pip install .[ja] - pip install .[ko] + python3 -m pip install --upgrade pip + pip3 install .[dev] + pip3 install .[ja] + pip3 install .[ko] + - name: Lint with Mypy + run: mypy sacrebleu scripts test + - name: Lint with Ruff + uses: chartboost/ruff-action@v1 - name: Python pytest test suite run: python3 -m pytest - name: CLI bash test suite shell: bash run: ./test.sh + - name: Build + run: python3 setup.py sdist bdist_wheel diff --git a/.gitignore b/.gitignore index 6bf040a..e2d4f53 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ __pycache__ sacrebleu.egg-info .sacrebleu *~ -.DS_Store \ No newline at end of file +.DS_Store +.idea/ diff --git a/Makefile b/Makefile index f6f8236..6b378c7 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,6 @@ .PHONY: test test: + mypy sacrebleu scripts test python3 -m pytest bash test.sh diff --git a/mypy.ini b/mypy.ini index 7207d68..26ff095 100644 --- a/mypy.ini +++ b/mypy.ini @@ -18,3 +18,12 @@ ignore_missing_imports = True [mypy-MeCab.*] ignore_missing_imports = True + +[mypy-mecab_ko.*] +ignore_missing_imports = True + +[mypy-mecab_ko_dic.*] +ignore_missing_imports = True + +[mypy-sentencepiece.*] +ignore_missing_imports = True diff --git a/sacrebleu/__init__.py b/sacrebleu/__init__.py index ea73221..19f7059 100644 --- a/sacrebleu/__init__.py +++ b/sacrebleu/__init__.py @@ -18,17 +18,17 @@ __description__ = 'Hassle-free computation of shareable, comparable, and reproducible BLEU, chrF, and TER scores' -from .utils import smart_open, SACREBLEU_DIR, download_test_set # noqa: F401 -from .utils import get_source_file, get_reference_files # noqa: F401 -from .utils import get_available_testsets, get_langpairs_for_testset # noqa: F401 -from .metrics.helpers import extract_word_ngrams, extract_char_ngrams # noqa: F401 -from .dataset import DATASETS # noqa: F401 -from .metrics import BLEU, CHRF, TER # noqa: F401 +from .utils import smart_open, SACREBLEU_DIR, download_test_set +from .utils import get_source_file, get_reference_files +from .utils import get_available_testsets, get_langpairs_for_testset +from .metrics.helpers import extract_word_ngrams, extract_char_ngrams +from .dataset import DATASETS +from .metrics import BLEU, CHRF, TER # Backward compatibility functions for old style API access (<= 1.4.10) -from .compat import corpus_bleu, raw_corpus_bleu, sentence_bleu # noqa: F401 -from .compat import corpus_chrf, sentence_chrf # noqa: F401 -from .compat import corpus_ter, sentence_ter # noqa: F401 +from .compat import corpus_bleu, raw_corpus_bleu, sentence_bleu +from .compat import corpus_chrf, sentence_chrf +from .compat import corpus_ter, sentence_ter __all__ = [ 'smart_open', 'SACREBLEU_DIR', 'download_test_set', diff --git a/sacrebleu/compat.py b/sacrebleu/compat.py index cce90e9..5735960 100644 --- a/sacrebleu/compat.py +++ b/sacrebleu/compat.py @@ -64,7 +64,7 @@ def raw_corpus_bleu(hypotheses: Sequence[str], def sentence_bleu(hypothesis: str, references: Sequence[str], smooth_method: str = 'exp', - smooth_value: float = None, + smooth_value: Optional[float] = None, lowercase: bool = False, tokenize=BLEU.TOKENIZER_DEFAULT, use_effective_order: bool = True) -> BLEUScore: diff --git a/sacrebleu/dataset/__init__.py b/sacrebleu/dataset/__init__.py index 19f4f16..037c8f6 100644 --- a/sacrebleu/dataset/__init__.py +++ b/sacrebleu/dataset/__init__.py @@ -14,6 +14,19 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. + +# This defines data locations. +# Right below are test sets. +# Beneath each test set, we define the location to download the test data. +# The other keys are each language pair contained in the tarball, and the respective locations of the source and reference data within each. +# Many of these are *.sgm files, which are processed to produced plain text that can be used by this script. +# The canonical location of unpacked, processed data is $SACREBLEU_DIR/$TEST/$SOURCE-$TARGET.{$SOURCE,$TARGET} +from .fake_sgml import FakeSGMLDataset, WMTAdditionDataset +from .iwslt_xml import IWSLTXMLDataset +from .plain_text import PlainTextDataset +from .tsv import TSVDataset +from .wmt_xml import WMTXMLDataset + # Detailed document metadata annotation in form DocumentID -> CountryCode - Domain - OptionalFinegrainedCountryCode # While the annotation is subjective with many unclear cases, it may provide useful insights # when applied on large data (TODO: annotate all documents from recent WMT years, at least for origlang=en, consider renaming "world" to "other"). @@ -59,19 +72,6 @@ COUNTRIES = sorted(list({v.split("-")[0] for v in SUBSETS["wmt19"].values()})) DOMAINS = sorted(list({v.split("-")[1] for v in SUBSETS["wmt19"].values()})) - -# This defines data locations. -# At the top level are test sets. -# Beneath each test set, we define the location to download the test data. -# The other keys are each language pair contained in the tarball, and the respective locations of the source and reference data within each. -# Many of these are *.sgm files, which are processed to produced plain text that can be used by this script. -# The canonical location of unpacked, processed data is $SACREBLEU_DIR/$TEST/$SOURCE-$TARGET.{$SOURCE,$TARGET} -from .fake_sgml import FakeSGMLDataset, WMTAdditionDataset -from .iwslt_xml import IWSLTXMLDataset -from .plain_text import PlainTextDataset -from .tsv import TSVDataset -from .wmt_xml import WMTXMLDataset - DATASETS = { # wmt "wmt22": WMTXMLDataset( @@ -151,7 +151,7 @@ ), "wmt21": WMTXMLDataset( "wmt21", - data=["http://data.statmt.org/wmt21/translation-task/test.tgz"], + data=["https://data.statmt.org/wmt21/translation-task/test.tgz"], description="Official evaluation data for WMT21.", md5=["32e7ab995bc318414375d60f0269af92"], langpairs={ @@ -181,7 +181,7 @@ ), "wmt21/B": WMTXMLDataset( "wmt21/B", - data=["http://data.statmt.org/wmt21/translation-task/test.tgz"], + data=["https://data.statmt.org/wmt21/translation-task/test.tgz"], description="Official evaluation data for WMT21 with reference B.", md5=["32e7ab995bc318414375d60f0269af92"], langpairs={ @@ -197,7 +197,7 @@ ), "wmt21/AB": WMTXMLDataset( "wmt21/AB", - data=["http://data.statmt.org/wmt21/translation-task/test.tgz"], + data=["https://data.statmt.org/wmt21/translation-task/test.tgz"], description="Official evaluation data for WMT21 with references A and B.", md5=["32e7ab995bc318414375d60f0269af92"], langpairs={ @@ -214,7 +214,7 @@ ), "wmt21/C": WMTXMLDataset( "wmt21/C", - data=["http://data.statmt.org/wmt21/translation-task/test.tgz"], + data=["https://data.statmt.org/wmt21/translation-task/test.tgz"], description="Official evaluation data for WMT21 with reference C", md5=["32e7ab995bc318414375d60f0269af92"], langpairs={ @@ -225,7 +225,7 @@ ), "wmt21/AC": WMTXMLDataset( "wmt21/AC", - data=["http://data.statmt.org/wmt21/translation-task/test.tgz"], + data=["https://data.statmt.org/wmt21/translation-task/test.tgz"], description="Official evaluation data for WMT21 with references A and C", md5=["32e7ab995bc318414375d60f0269af92"], langpairs={ @@ -236,7 +236,7 @@ ), "wmt21/D": WMTXMLDataset( "wmt21/D", - data=["http://data.statmt.org/wmt21/translation-task/test.tgz"], + data=["https://data.statmt.org/wmt21/translation-task/test.tgz"], description="Official evaluation data for WMT21 with reference D", md5=["32e7ab995bc318414375d60f0269af92"], langpairs={ @@ -247,7 +247,7 @@ ), "wmt21/dev": WMTXMLDataset( "wmt21/dev", - data=["http://data.statmt.org/wmt21/translation-task/dev.tgz"], + data=["https://data.statmt.org/wmt21/translation-task/dev.tgz"], description="Development data for WMT21,if multiple references are available, the first one is used.", md5=["165da59ac8dfb5b7cafd7e90b1cac672"], langpairs={ @@ -261,7 +261,7 @@ ), "wmt20/tworefs": FakeSGMLDataset( "wmt20/tworefs", - data=["http://data.statmt.org/wmt20/translation-task/test.tgz"], + data=["https://data.statmt.org/wmt20/translation-task/test.tgz"], description="WMT20 news test sets with two references", md5=["3b1f777cfd2fb15ccf66e9bfdb2b1699"], langpairs={ @@ -294,7 +294,7 @@ ), "wmt20": FakeSGMLDataset( "wmt20", - data=["http://data.statmt.org/wmt20/translation-task/test.tgz"], + data=["https://data.statmt.org/wmt20/translation-task/test.tgz"], description="Official evaluation data for WMT20", md5=["3b1f777cfd2fb15ccf66e9bfdb2b1699"], langpairs={ @@ -390,7 +390,7 @@ ), "wmt20/dev": FakeSGMLDataset( "wmt20/dev", - data=["http://data.statmt.org/wmt20/translation-task/dev.tgz"], + data=["https://data.statmt.org/wmt20/translation-task/dev.tgz"], description="Development data for tasks new to 2020.", md5=["037f2b37aab74febbb1b2307dc2afb54"], langpairs={ @@ -430,7 +430,7 @@ ), "wmt20/robust/set1": PlainTextDataset( "wmt20/robust/set1", - data=["http://data.statmt.org/wmt20/robustness-task/robustness20-3-sets.zip"], + data=["https://data.statmt.org/wmt20/robustness-task/robustness20-3-sets.zip"], md5=["a12ac9ebe89b72195041518dffc4a9d5"], description="WMT20 robustness task, set 1", langpairs={ @@ -446,7 +446,7 @@ ), "wmt20/robust/set2": PlainTextDataset( "wmt20/robust/set2", - data=["http://data.statmt.org/wmt20/robustness-task/robustness20-3-sets.zip"], + data=["https://data.statmt.org/wmt20/robustness-task/robustness20-3-sets.zip"], md5=["a12ac9ebe89b72195041518dffc4a9d5"], description="WMT20 robustness task, set 2", langpairs={ @@ -462,7 +462,7 @@ ), "wmt20/robust/set3": PlainTextDataset( "wmt20/robust/set3", - data=["http://data.statmt.org/wmt20/robustness-task/robustness20-3-sets.zip"], + data=["https://data.statmt.org/wmt20/robustness-task/robustness20-3-sets.zip"], md5=["a12ac9ebe89b72195041518dffc4a9d5"], description="WMT20 robustness task, set 3", langpairs={ @@ -474,7 +474,7 @@ ), "wmt19": FakeSGMLDataset( "wmt19", - data=["http://data.statmt.org/wmt19/translation-task/test.tgz"], + data=["https://data.statmt.org/wmt19/translation-task/test.tgz"], description="Official evaluation data.", md5=["84de7162d158e28403103b01aeefc39a"], citation=r"""@proceedings{ws-2019-machine, @@ -583,7 +583,7 @@ ), "wmt19/dev": FakeSGMLDataset( "wmt19/dev", - data=["http://data.statmt.org/wmt19/translation-task/dev.tgz"], + data=["https://data.statmt.org/wmt19/translation-task/dev.tgz"], description="Development data for tasks new to 2019.", md5=["f2ec7af5947c19e0cacb3882eb208002"], langpairs={ @@ -616,7 +616,7 @@ "wmt19/google/ar": WMTAdditionDataset( "wmt19/google/ar", data=[ - "http://data.statmt.org/wmt19/translation-task/test.tgz", + "https://data.statmt.org/wmt19/translation-task/test.tgz", "https://raw.githubusercontent.com/google/wmt19-paraphrased-references/master/wmt19/ende/wmt19-ende-ar.ref", ], description="Additional high-quality reference for WMT19/en-de.", @@ -629,7 +629,7 @@ "wmt19/google/arp": WMTAdditionDataset( "wmt19/google/arp", data=[ - "http://data.statmt.org/wmt19/translation-task/test.tgz", + "https://data.statmt.org/wmt19/translation-task/test.tgz", "https://raw.githubusercontent.com/google/wmt19-paraphrased-references/master/wmt19/ende/wmt19-ende-arp.ref", ], description="Additional paraphrase of wmt19/google/ar.", @@ -642,7 +642,7 @@ "wmt19/google/wmtp": WMTAdditionDataset( "wmt19/google/wmtp", data=[ - "http://data.statmt.org/wmt19/translation-task/test.tgz", + "https://data.statmt.org/wmt19/translation-task/test.tgz", "https://raw.githubusercontent.com/google/wmt19-paraphrased-references/master/wmt19/ende/wmt19-ende-wmtp.ref", ], description="Additional paraphrase of the official WMT19 reference.", @@ -655,7 +655,7 @@ "wmt19/google/hqr": WMTAdditionDataset( "wmt19/google/hqr", data=[ - "http://data.statmt.org/wmt19/translation-task/test.tgz", + "https://data.statmt.org/wmt19/translation-task/test.tgz", "https://raw.githubusercontent.com/google/wmt19-paraphrased-references/master/wmt19/ende/wmt19-ende-hqr.ref", ], description="Best human selected-reference between wmt19 and wmt19/google/ar.", @@ -668,7 +668,7 @@ "wmt19/google/hqp": WMTAdditionDataset( "wmt19/google/hqp", data=[ - "http://data.statmt.org/wmt19/translation-task/test.tgz", + "https://data.statmt.org/wmt19/translation-task/test.tgz", "https://raw.githubusercontent.com/google/wmt19-paraphrased-references/master/wmt19/ende/wmt19-ende-hqp.ref", ], description="Best human-selected reference between wmt19/google/arp and wmt19/google/wmtp.", @@ -681,7 +681,7 @@ "wmt19/google/hqall": WMTAdditionDataset( "wmt19/google/hqall", data=[ - "http://data.statmt.org/wmt19/translation-task/test.tgz", + "https://data.statmt.org/wmt19/translation-task/test.tgz", "https://raw.githubusercontent.com/google/wmt19-paraphrased-references/master/wmt19/ende/wmt19-ende-hqall.ref", ], description="Best human-selected reference among original official reference and the Google reference and paraphrases.", @@ -693,7 +693,7 @@ ), "wmt18": FakeSGMLDataset( "wmt18", - data=["http://data.statmt.org/wmt18/translation-task/test.tgz"], + data=["https://data.statmt.org/wmt18/translation-task/test.tgz"], md5=["f996c245ecffea23d0006fa4c34e9064"], description="Official evaluation data.", citation='@inproceedings{bojar-etal-2018-findings,\n title = "Findings of the 2018 Conference on Machine Translation ({WMT}18)",\n author = "Bojar, Ond{\v{r}}ej and\n Federmann, Christian and\n Fishel, Mark and\n Graham, Yvette and\n Haddow, Barry and\n Koehn, Philipp and\n Monz, Christof",\n booktitle = "Proceedings of the Third Conference on Machine Translation: Shared Task Papers",\n month = oct,\n year = "2018",\n address = "Belgium, Brussels",\n publisher = "Association for Computational Linguistics",\n url = "https://www.aclweb.org/anthology/W18-6401",\n pages = "272--303",\n}', @@ -758,7 +758,7 @@ ), "wmt18/test-ts": FakeSGMLDataset( "wmt18/test-ts", - data=["http://data.statmt.org/wmt18/translation-task/test-ts.tgz"], + data=["https://data.statmt.org/wmt18/translation-task/test-ts.tgz"], md5=["5c621a34d512cc2dd74162ae7d00b320"], description="Official evaluation sources with extra test sets interleaved.", langpairs={ @@ -780,7 +780,7 @@ ), "wmt18/dev": FakeSGMLDataset( "wmt18/dev", - data=["http://data.statmt.org/wmt18/translation-task/dev.tgz"], + data=["https://data.statmt.org/wmt18/translation-task/dev.tgz"], md5=["486f391da54a7a3247f02ebd25996f24"], description="Development data (Estonian<>English).", langpairs={ @@ -796,7 +796,7 @@ ), "wmt17": FakeSGMLDataset( "wmt17", - data=["http://data.statmt.org/wmt17/translation-task/test.tgz"], + data=["https://data.statmt.org/wmt17/translation-task/test.tgz"], md5=["86a1724c276004aa25455ae2a04cef26"], description="Official evaluation data.", citation="@InProceedings{bojar-EtAl:2017:WMT1,\n author = {Bojar, Ond\\v{r}ej and Chatterjee, Rajen and Federmann, Christian and Graham, Yvette and Haddow, Barry and Huang, Shujian and Huck, Matthias and Koehn, Philipp and Liu, Qun and Logacheva, Varvara and Monz, Christof and Negri, Matteo and Post, Matt and Rubino, Raphael and Specia, Lucia and Turchi, Marco},\n title = {Findings of the 2017 Conference on Machine Translation (WMT17)},\n booktitle = {Proceedings of the Second Conference on Machine Translation, Volume 2: Shared Task Papers},\n month = {September},\n year = {2017},\n address = {Copenhagen, Denmark},\n publisher = {Association for Computational Linguistics},\n pages = {169--214},\n url = {http://www.aclweb.org/anthology/W17-4717}\n}", @@ -861,7 +861,7 @@ ), "wmt17/B": FakeSGMLDataset( "wmt17/B", - data=["http://data.statmt.org/wmt17/translation-task/test.tgz"], + data=["https://data.statmt.org/wmt17/translation-task/test.tgz"], md5=["86a1724c276004aa25455ae2a04cef26"], description="Additional reference for EN-FI and FI-EN.", langpairs={ @@ -873,7 +873,7 @@ ), "wmt17/tworefs": FakeSGMLDataset( "wmt17/tworefs", - data=["http://data.statmt.org/wmt17/translation-task/test.tgz"], + data=["https://data.statmt.org/wmt17/translation-task/test.tgz"], md5=["86a1724c276004aa25455ae2a04cef26"], description="Systems with two references.", langpairs={ @@ -886,7 +886,7 @@ ), "wmt17/improved": FakeSGMLDataset( "wmt17/improved", - data=["http://data.statmt.org/wmt17/translation-task/test-update-1.tgz"], + data=["https://data.statmt.org/wmt17/translation-task/test-update-1.tgz"], md5=["91dbfd5af99bc6891a637a68e04dfd41"], description="Improved zh-en and en-zh translations.", langpairs={ @@ -896,7 +896,7 @@ ), "wmt17/dev": FakeSGMLDataset( "wmt17/dev", - data=["http://data.statmt.org/wmt17/translation-task/dev.tgz"], + data=["https://data.statmt.org/wmt17/translation-task/dev.tgz"], md5=["9b1aa63c1cf49dccdd20b962fe313989"], description="Development sets released for new languages in 2017.", langpairs={ @@ -922,7 +922,7 @@ "wmt17/ms", data=[ "https://github.com/MicrosoftTranslator/Translator-HumanParityData/archive/master.zip", - "http://data.statmt.org/wmt17/translation-task/test-update-1.tgz", + "https://data.statmt.org/wmt17/translation-task/test-update-1.tgz", ], md5=["18fdaa7a3c84cf6ef688da1f6a5fa96f", "91dbfd5af99bc6891a637a68e04dfd41"], description="Additional Chinese-English references from Microsoft Research.", @@ -938,7 +938,7 @@ ), "wmt16": FakeSGMLDataset( "wmt16", - data=["http://data.statmt.org/wmt16/translation-task/test.tgz"], + data=["https://data.statmt.org/wmt16/translation-task/test.tgz"], md5=["3d809cd0c2c86adb2c67034d15c4e446"], description="Official evaluation data.", citation="@InProceedings{bojar-EtAl:2016:WMT1,\n author = {Bojar, Ond\\v{r}ej and Chatterjee, Rajen and Federmann, Christian and Graham, Yvette and Haddow, Barry and Huck, Matthias and Jimeno Yepes, Antonio and Koehn, Philipp and Logacheva, Varvara and Monz, Christof and Negri, Matteo and Neveol, Aurelie and Neves, Mariana and Popel, Martin and Post, Matt and Rubino, Raphael and Scarton, Carolina and Specia, Lucia and Turchi, Marco and Verspoor, Karin and Zampieri, Marcos},\n title = {Findings of the 2016 Conference on Machine Translation},\n booktitle = {Proceedings of the First Conference on Machine Translation},\n month = {August},\n year = {2016},\n address = {Berlin, Germany},\n publisher = {Association for Computational Linguistics},\n pages = {131--198},\n url = {http://www.aclweb.org/anthology/W/W16/W16-2301}\n}", @@ -995,7 +995,7 @@ ), "wmt16/B": FakeSGMLDataset( "wmt16/B", - data=["http://data.statmt.org/wmt16/translation-task/test.tgz"], + data=["https://data.statmt.org/wmt16/translation-task/test.tgz"], md5=["3d809cd0c2c86adb2c67034d15c4e446"], description="Additional reference for EN-FI.", langpairs={ @@ -1007,7 +1007,7 @@ ), "wmt16/tworefs": FakeSGMLDataset( "wmt16/tworefs", - data=["http://data.statmt.org/wmt16/translation-task/test.tgz"], + data=["https://data.statmt.org/wmt16/translation-task/test.tgz"], md5=["3d809cd0c2c86adb2c67034d15c4e446"], description="EN-FI with two references.", langpairs={ @@ -1020,7 +1020,7 @@ ), "wmt16/dev": FakeSGMLDataset( "wmt16/dev", - data=["http://data.statmt.org/wmt16/translation-task/dev.tgz"], + data=["https://data.statmt.org/wmt16/translation-task/dev.tgz"], md5=["4a3dc2760bb077f4308cce96b06e6af6"], description="Development sets released for new languages in 2016.", langpairs={ @@ -1044,7 +1044,7 @@ ), "wmt15": FakeSGMLDataset( "wmt15", - data=["http://statmt.org/wmt15/test.tgz"], + data=["https://statmt.org/wmt15/test.tgz"], md5=["67e3beca15e69fe3d36de149da0a96df"], description="Official evaluation data.", citation="@InProceedings{bojar-EtAl:2015:WMT,\n author = {Bojar, Ond\\v{r}ej and Chatterjee, Rajen and Federmann, Christian and Haddow, Barry and Huck, Matthias and Hokamp, Chris and Koehn, Philipp and Logacheva, Varvara and Monz, Christof and Negri, Matteo and Post, Matt and Scarton, Carolina and Specia, Lucia and Turchi, Marco},\n title = {Findings of the 2015 Workshop on Statistical Machine Translation},\n booktitle = {Proceedings of the Tenth Workshop on Statistical Machine Translation},\n month = {September},\n year = {2015},\n address = {Lisbon, Portugal},\n publisher = {Association for Computational Linguistics},\n pages = {1--46},\n url = {http://aclweb.org/anthology/W15-3001}\n}", @@ -1093,7 +1093,7 @@ ), "wmt14": FakeSGMLDataset( "wmt14", - data=["http://statmt.org/wmt14/test-filtered.tgz"], + data=["https://statmt.org/wmt14/test-filtered.tgz"], md5=["84c597844c1542e29c2aff23aaee4310"], description="Official evaluation data.", citation="@InProceedings{bojar-EtAl:2014:W14-33,\n author = {Bojar, Ondrej and Buck, Christian and Federmann, Christian and Haddow, Barry and Koehn, Philipp and Leveling, Johannes and Monz, Christof and Pecina, Pavel and Post, Matt and Saint-Amand, Herve and Soricut, Radu and Specia, Lucia and Tamchyna, Ale\\v{s}},\n title = {Findings of the 2014 Workshop on Statistical Machine Translation},\n booktitle = {Proceedings of the Ninth Workshop on Statistical Machine Translation},\n month = {June},\n year = {2014},\n address = {Baltimore, Maryland, USA},\n publisher = {Association for Computational Linguistics},\n pages = {12--58},\n url = {http://www.aclweb.org/anthology/W/W14/W14-3302}\n}", @@ -1142,7 +1142,7 @@ ), "wmt14/full": FakeSGMLDataset( "wmt14/full", - data=["http://statmt.org/wmt14/test-full.tgz"], + data=["https://statmt.org/wmt14/test-full.tgz"], md5=["a8cd784e006feb32ac6f3d9ec7eb389a"], description="Evaluation data released after official evaluation for further research.", langpairs={ @@ -1190,7 +1190,7 @@ ), "wmt13": FakeSGMLDataset( "wmt13", - data=["http://statmt.org/wmt13/test.tgz"], + data=["https://statmt.org/wmt13/test.tgz"], md5=["48eca5d02f637af44e85186847141f67"], description="Official evaluation data.", citation="@InProceedings{bojar-EtAl:2013:WMT,\n author = {Bojar, Ond\\v{r}ej and Buck, Christian and Callison-Burch, Chris and Federmann, Christian and Haddow, Barry and Koehn, Philipp and Monz, Christof and Post, Matt and Soricut, Radu and Specia, Lucia},\n title = {Findings of the 2013 {Workshop on Statistical Machine Translation}},\n booktitle = {Proceedings of the Eighth Workshop on Statistical Machine Translation},\n month = {August},\n year = {2013},\n address = {Sofia, Bulgaria},\n publisher = {Association for Computational Linguistics},\n pages = {1--44},\n url = {http://www.aclweb.org/anthology/W13-2201}\n}", @@ -1209,7 +1209,7 @@ ), "wmt12": FakeSGMLDataset( "wmt12", - data=["http://statmt.org/wmt12/test.tgz"], + data=["https://statmt.org/wmt12/test.tgz"], md5=["608232d34ebc4ba2ff70fead45674e47"], description="Official evaluation data.", citation="@InProceedings{callisonburch-EtAl:2012:WMT,\n author = {Callison-Burch, Chris and Koehn, Philipp and Monz, Christof and Post, Matt and Soricut, Radu and Specia, Lucia},\n title = {Findings of the 2012 Workshop on Statistical Machine Translation},\n booktitle = {Proceedings of the Seventh Workshop on Statistical Machine Translation},\n month = {June},\n year = {2012},\n address = {Montr{'e}al, Canada},\n publisher = {Association for Computational Linguistics},\n pages = {10--51},\n url = {http://www.aclweb.org/anthology/W12-3102}\n}", @@ -1226,7 +1226,7 @@ ), "wmt11": FakeSGMLDataset( "wmt11", - data=["http://statmt.org/wmt11/test.tgz"], + data=["https://statmt.org/wmt11/test.tgz"], md5=["b0c9680adf32d394aefc2b24e3a5937e"], description="Official evaluation data.", citation="@InProceedings{callisonburch-EtAl:2011:WMT,\n author = {Callison-Burch, Chris and Koehn, Philipp and Monz, Christof and Zaidan, Omar},\n title = {Findings of the 2011 Workshop on Statistical Machine Translation},\n booktitle = {Proceedings of the Sixth Workshop on Statistical Machine Translation},\n month = {July},\n year = {2011},\n address = {Edinburgh, Scotland},\n publisher = {Association for Computational Linguistics},\n pages = {22--64},\n url = {http://www.aclweb.org/anthology/W11-2103}\n}", @@ -1243,7 +1243,7 @@ ), "wmt10": FakeSGMLDataset( "wmt10", - data=["http://statmt.org/wmt10/test.tgz"], + data=["https://statmt.org/wmt10/test.tgz"], md5=["491cb885a355da5a23ea66e7b3024d5c"], description="Official evaluation data.", citation="@InProceedings{callisonburch-EtAl:2010:WMT,\n author = {Callison-Burch, Chris and Koehn, Philipp and Monz, Christof and Peterson, Kay and Przybocki, Mark and Zaidan, Omar},\n title = {Findings of the 2010 Joint Workshop on Statistical Machine Translation and Metrics for Machine Translation},\n booktitle = {Proceedings of the Joint Fifth Workshop on Statistical Machine Translation and MetricsMATR},\n month = {July},\n year = {2010},\n address = {Uppsala, Sweden},\n publisher = {Association for Computational Linguistics},\n pages = {17--53},\n note = {Revised August 2010},\n url = {http://www.aclweb.org/anthology/W10-1703}\n}", @@ -1260,7 +1260,7 @@ ), "wmt09": FakeSGMLDataset( "wmt09", - data=["http://statmt.org/wmt09/test.tgz"], + data=["https://statmt.org/wmt09/test.tgz"], md5=["da227abfbd7b666ec175b742a0d27b37"], description="Official evaluation data.", citation="@InProceedings{callisonburch-EtAl:2009:WMT-09,\n author = {Callison-Burch, Chris and Koehn, Philipp and Monz, Christof and Schroeder, Josh},\n title = {Findings of the 2009 {W}orkshop on {S}tatistical {M}achine {T}ranslation},\n booktitle = {Proceedings of the Fourth Workshop on Statistical Machine Translation},\n month = {March},\n year = {2009},\n address = {Athens, Greece},\n publisher = {Association for Computational Linguistics},\n pages = {1--28},\n url = {http://www.aclweb.org/anthology/W/W09/W09-0401}\n}", @@ -1281,7 +1281,7 @@ ), "wmt08": FakeSGMLDataset( "wmt08", - data=["http://statmt.org/wmt08/test.tgz"], + data=["https://statmt.org/wmt08/test.tgz"], md5=["0582e4e894a3342044059c894e1aea3d"], description="Official evaluation data.", citation="@InProceedings{callisonburch-EtAl:2008:WMT,\n author = {Callison-Burch, Chris and Fordyce, Cameron and Koehn, Philipp and Monz, Christof and Schroeder, Josh},\n title = {Further Meta-Evaluation of Machine Translation},\n booktitle = {Proceedings of the Third Workshop on Statistical Machine Translation},\n month = {June},\n year = {2008},\n address = {Columbus, Ohio},\n publisher = {Association for Computational Linguistics},\n pages = {70--106},\n url = {http://www.aclweb.org/anthology/W/W08/W08-0309}\n}", @@ -1300,7 +1300,7 @@ ), "wmt08/nc": FakeSGMLDataset( "wmt08/nc", - data=["http://statmt.org/wmt08/test.tgz"], + data=["https://statmt.org/wmt08/test.tgz"], md5=["0582e4e894a3342044059c894e1aea3d"], description="Official evaluation data (news commentary).", langpairs={ @@ -1310,7 +1310,7 @@ ), "wmt08/europarl": FakeSGMLDataset( "wmt08/europarl", - data=["http://statmt.org/wmt08/test.tgz"], + data=["https://statmt.org/wmt08/test.tgz"], md5=["0582e4e894a3342044059c894e1aea3d"], description="Official evaluation data (Europarl).", langpairs={ diff --git a/sacrebleu/dataset/__main__.py b/sacrebleu/dataset/__main__.py index 2295492..5b13d59 100644 --- a/sacrebleu/dataset/__main__.py +++ b/sacrebleu/dataset/__main__.py @@ -16,6 +16,8 @@ for item in DATASETS.values(): if item.md5 is not None: + assert item.data + assert item.md5 assert len(item.data) == len(item.md5) pairs = zip(item.data, item.md5) for url, md5_hash in pairs: diff --git a/sacrebleu/dataset/base.py b/sacrebleu/dataset/base.py index ba6e65b..cf3c092 100644 --- a/sacrebleu/dataset/base.py +++ b/sacrebleu/dataset/base.py @@ -4,7 +4,7 @@ import os import re from abc import ABCMeta, abstractmethod -from typing import Dict, List +from typing import Dict, List, Optional from ..utils import SACREBLEU_DIR, download_file, smart_open @@ -13,10 +13,10 @@ class Dataset(metaclass=ABCMeta): def __init__( self, name: str, - data: List[str] = None, - description: str = None, - citation: str = None, - md5: List[str] = None, + data: Optional[List[str]] = None, + description: Optional[str] = None, + citation: Optional[str] = None, + md5: Optional[List[str]] = None, langpairs=Dict[str, List[str]], **kwargs, ): diff --git a/sacrebleu/dataset/wmt_xml.py b/sacrebleu/dataset/wmt_xml.py index 92c96d5..d5eb5d8 100644 --- a/sacrebleu/dataset/wmt_xml.py +++ b/sacrebleu/dataset/wmt_xml.py @@ -76,7 +76,7 @@ def _unwrap_wmt21_or_later(raw_file): def get_sents(doc): return { int(seg.get("id")): seg.text if seg.text else "" - for seg in doc.findall(f".//seg") + for seg in doc.findall(".//seg") } ref_docs = doc.findall(".//ref") @@ -114,7 +114,7 @@ def _get_langpair_path(self, langpair): in order to allow for overriding which test set to use. """ langpair_data = self._get_langpair_metadata(langpair)[langpair] - rel_path = langpair_data["path"] if type(langpair_data) == dict else langpair_data[0] + rel_path = langpair_data["path"] if isinstance(langpair_data, dict) else langpair_data[0] return os.path.join(self._rawdir, rel_path) def process_to_text(self, langpair=None): @@ -156,7 +156,7 @@ def _get_langpair_allowed_refs(self, langpair): """ defaults = self.kwargs.get("refs", []) langpair_data = self._get_langpair_metadata(langpair)[langpair] - if type(langpair_data) == dict: + if isinstance(langpair_data, dict): allowed_refs = langpair_data.get("refs", defaults) else: allowed_refs = defaults diff --git a/sacrebleu/sacrebleu.py b/sacrebleu/sacrebleu.py index 7edbe3a..d778e1d 100755 --- a/sacrebleu/sacrebleu.py +++ b/sacrebleu/sacrebleu.py @@ -50,7 +50,7 @@ try: # SIGPIPE is not available on Windows machines, throwing an exception. - from signal import SIGPIPE + from signal import SIGPIPE # type: ignore # If SIGPIPE is available, change behaviour to default instead of ignore. from signal import signal, SIG_DFL diff --git a/sacrebleu/significance.py b/sacrebleu/significance.py index b39e0a5..a9c71d0 100644 --- a/sacrebleu/significance.py +++ b/sacrebleu/significance.py @@ -1,7 +1,7 @@ import os import logging import multiprocessing as mp -from typing import Sequence, Dict, Optional, Tuple, List, Union, Any +from typing import Sequence, Dict, Optional, Tuple, List, Union, Any, Mapping import numpy as np @@ -77,11 +77,11 @@ def _bootstrap_resample(stats: List[List[Union[int, float]]], idxs = rng.choice(len(stats), size=(n_samples, len(stats)), replace=True) # convert to numpy array. float32 is more efficient - stats = np.array(stats, dtype='float32') + stats_np = np.array(stats, dtype='float32') # recompute scores for all resamples scores = [ - metric._compute_score_from_stats(_s.sum(0)) for _s in stats[idxs]] + metric._compute_score_from_stats(_s.sum(0)) for _s in stats_np[idxs]] return str(seed).lower(), scores @@ -98,7 +98,7 @@ def _compute_p_value(stats: np.ndarray, real_difference: float) -> float: # "the != is important. if we want to score the same system against itself # having a zero difference should not be attributed to chance." - c = np.sum(stats > real_difference) + c = np.sum(stats > real_difference).item() # "+1 applies here, though it only matters for small numbers of shufflings, # which we typically never do. it's necessary to ensure the probability of @@ -186,8 +186,9 @@ def _paired_ar_test(baseline_info: Dict[str, Tuple[np.ndarray, Result]], sacrelogger.info(f' > Performing bootstrap resampling for confidence interval (# resamples: {n_ar_confidence})') sys_stats = np.array(sys_stats, dtype='float32') # recompute scores for all resamples - sys_scores = [ - metric._compute_score_from_stats(_s.sum(0)).score for _s in sys_stats[bs_idxs]] + sys_scores = np.array([ + metric._compute_score_from_stats(_s.sum(0)).score for _s in sys_stats[bs_idxs] + ]) res.mean, res.ci = estimate_ci(sys_scores) # Store the result @@ -300,7 +301,7 @@ class PairedTest: } def __init__(self, named_systems: List[Tuple[str, Sequence[str]]], - metrics: Dict[str, Metric], + metrics: Mapping[str, Metric], references: Optional[Sequence[Sequence[str]]], test_type: str = 'ar', n_samples: int = 0, diff --git a/sacrebleu/tokenizers/tokenizer_spm.py b/sacrebleu/tokenizers/tokenizer_spm.py index a50d0fb..92729b5 100644 --- a/sacrebleu/tokenizers/tokenizer_spm.py +++ b/sacrebleu/tokenizers/tokenizer_spm.py @@ -2,7 +2,6 @@ import os import logging -import urllib.request from functools import lru_cache from ..utils import SACREBLEU_DIR, download_file diff --git a/sacrebleu/utils.py b/sacrebleu/utils.py index 6187e3b..56e6fca 100644 --- a/sacrebleu/utils.py +++ b/sacrebleu/utils.py @@ -423,9 +423,7 @@ def download_file(source_path, dest_path, extract_to=None, expected_md5=None): with portalocker.Lock(lockfile, timeout=60): if not os.path.exists(dest_path) or os.path.getsize(dest_path) == 0: - sacrelogger.info(f"Downloading {source_path} to {dest_path}") - md5 = hashlib.md5() try: with urllib.request.urlopen(source_path) as f, open(dest_path, 'wb') as out: @@ -441,7 +439,7 @@ def download_file(source_path, dest_path, extract_to=None, expected_md5=None): if cur_md5 != expected_md5: sacrelogger.error(f'Fatal: MD5 sum of downloaded file was incorrect (got {cur_md5}, expected {expected_md5}).') sacrelogger.error(f'Please manually delete {dest_path!r} and rerun the command.') - sacrelogger.error(f'If the problem persists, the tarball may have changed, in which case, please contact the SacreBLEU maintainer.') + sacrelogger.error('If the problem persists, the tarball may have changed, in which case, please contact the SacreBLEU maintainer.') sys.exit(1) # Extract the tarball @@ -594,4 +592,4 @@ def print_subset_results(metrics, full_system, full_refs, args): print(f'{key}: sentences={n_system:<6} {score.name:<{max_metric_width}} = {score.score:.{w}f}') # import at the end to avoid circular import -from .dataset import DATASETS, SUBSETS, DOMAINS, COUNTRIES +from .dataset import DATASETS, SUBSETS, DOMAINS, COUNTRIES # noqa: E402 diff --git a/scripts/perf_test.py b/scripts/perf_test.py index 1cf2b48..f2812db 100644 --- a/scripts/perf_test.py +++ b/scripts/perf_test.py @@ -5,8 +5,8 @@ sys.path.insert(0, '.') -import sacrebleu -from sacrebleu.metrics import BLEU, CHRF +import sacrebleu # noqa: E402 +from sacrebleu.metrics import BLEU, CHRF # noqa: E402 N_REPEATS = 5 diff --git a/setup.cfg b/setup.cfg index 2e0f031..a3fd11d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,3 +1,3 @@ [metadata] -description-file = README.md -license_file = LICENSE.txt +description_file = README.md +license_files = LICENSE.txt diff --git a/setup.py b/setup.py index 00c7002..f104799 100755 --- a/setup.py +++ b/setup.py @@ -130,6 +130,9 @@ def get_long_description(): # Specify the Python versions you support here. In particular, ensure # that you indicate whether you support Python 2, Python 3 or both. 'Programming Language :: Python :: 3 :: Only', + + # Indicate that type hints are provided + 'Typing :: Typed' ], # What does your project relate to? @@ -151,7 +154,8 @@ def get_long_description(): # dependencies). You can install these using the following syntax, # for example: # $ pip install -e .[dev,test] - extras_require={'ja': ['mecab-python3>=1.0.5,<=1.0.6', 'ipadic>=1.0,<2.0'], + extras_require={'dev': ['wheel', 'pytest', 'mypy', 'types-tabulate', 'lxml-stubs'], + 'ja': ['mecab-python3>=1.0.5,<=1.0.6', 'ipadic>=1.0,<2.0'], 'ko': ['mecab-ko>=1.0.0,<=1.0.1', 'mecab-ko-dic>=1.0,<2.0']}, # To provide executable scripts, use entry points in preference to the diff --git a/test.sh b/test.sh index ee1657f..1bb5720 100755 --- a/test.sh +++ b/test.sh @@ -96,7 +96,7 @@ cd data if [[ ! -d wmt17-submitted-data ]]; then echo "Downloading and unpacking WMT'17 system submissions (46 MB)..." - wget -q http://data.statmt.org/wmt17/translation-task/wmt17-submitted-data-v1.0.tgz + wget -q https://data.statmt.org/wmt17/translation-task/wmt17-submitted-data-v1.0.tgz tar xzf wmt17-submitted-data-v1.0.tgz fi diff --git a/test/test_api.py b/test/test_api.py index 511b4c7..02ac9f0 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -44,7 +44,7 @@ def test_api_get_available_testsets(): the test sets found. """ available = get_available_testsets() - assert type(available) is list + assert isinstance(available, list) assert "wmt19" in available assert "wmt05" not in available @@ -59,12 +59,12 @@ def test_api_get_available_testsets_for_langpair(): the test sets found. """ available = get_available_testsets_for_langpair('en-it') - assert type(available) is list + assert isinstance(available, list) assert "wmt09" in available assert "wmt15" not in available available = get_available_testsets_for_langpair('en-fr') - assert type(available) is list + assert isinstance(available, list) assert "wmt11" in available assert "mtedx/test" in available assert "wmt20" not in available @@ -77,7 +77,7 @@ def test_api_get_langpairs_for_testset(): """ for testset in DATASETS.keys(): available = get_langpairs_for_testset(testset) - assert type(available) is list + assert isinstance(available, list) for langpair in DATASETS[testset].langpairs.keys(): # skip non-language keys if "-" not in langpair: diff --git a/test/test_dataset.py b/test/test_dataset.py index cff796d..d3ece8c 100644 --- a/test/test_dataset.py +++ b/test/test_dataset.py @@ -106,7 +106,7 @@ def test_wmt22_references(): # and that ref:A is the default for all languages where it wasn't overridden for langpair, langpair_data in wmt22.langpairs.items(): - if type(langpair_data) == dict: + if isinstance(langpair_data, dict): assert wmt22._get_langpair_allowed_refs(langpair) != ["ref:A"] else: assert wmt22._get_langpair_allowed_refs(langpair) == ["ref:A"] diff --git a/test/test_significance.py b/test/test_significance.py index 46679ac..f709832 100644 --- a/test/test_significance.py +++ b/test/test_significance.py @@ -1,9 +1,10 @@ import os from collections import defaultdict +from typing import DefaultDict from sacrebleu.metrics import BLEU -from sacrebleu.significance import PairedTest +from sacrebleu.significance import PairedTest, Result import pytest @@ -57,8 +58,8 @@ def _read_pickle_file(): } -SACREBLEU_BS_P_VALS = defaultdict(float) -SACREBLEU_AR_P_VALS = defaultdict(float) +SACREBLEU_BS_P_VALS: DefaultDict[str, float] = defaultdict(float) +SACREBLEU_AR_P_VALS: DefaultDict[str, float] = defaultdict(float) # Load data from pickled file to not bother with WMT17 downloading named_systems = _read_pickle_file() @@ -75,7 +76,9 @@ def _read_pickle_file(): test_type='bs', n_samples=2000)()[1] for name, result in zip(bs_scores['System'], bs_scores['BLEU']): + assert isinstance(result, Result) if result.p_value is not None: + assert isinstance(name, str) SACREBLEU_BS_P_VALS[name] += result.p_value @@ -87,7 +90,9 @@ def _read_pickle_file(): test_type='ar', n_samples=10000)()[1] for name, result in zip(ar_scores['System'], ar_scores['BLEU']): + assert isinstance(result, Result) if result.p_value is not None: + assert isinstance(name, str) SACREBLEU_AR_P_VALS[name] += result.p_value From b0ad2cb29c87f51a6553b4d6caaff23d6bb7708c Mon Sep 17 00:00:00 2001 From: Dmitry Ustalov Date: Wed, 29 Nov 2023 21:25:09 +0100 Subject: [PATCH 4/4] Version bump to 2.3.3 (#253) --- CHANGELOG.md | 5 +++++ sacrebleu/__init__.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 042a992..e21bd54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Release Notes +- 2.3.3 (2023-11-28) + Fixed: + - Typing issues (#249, #250) + - Improved builds (#252) + - 2.3.2 (2023-11-06) Fixed: - Special treatment of empty references in TER (#232) diff --git a/sacrebleu/__init__.py b/sacrebleu/__init__.py index 19f7059..e7ddc63 100644 --- a/sacrebleu/__init__.py +++ b/sacrebleu/__init__.py @@ -14,7 +14,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. -__version__ = '2.3.2' +__version__ = '2.3.3' __description__ = 'Hassle-free computation of shareable, comparable, and reproducible BLEU, chrF, and TER scores'