diff --git a/.github/workflows/check-build.yml b/.github/workflows/check-build.yml
index 881091a3..c546c0e1 100644
--- a/.github/workflows/check-build.yml
+++ b/.github/workflows/check-build.yml
@@ -1,7 +1,9 @@
 name: check-build
 
 on:
-  pull_request
+  push:
+  pull_request:
+  workflow_dispatch:
 
 env:
   PYTHONUTF8: "1"
@@ -27,8 +29,6 @@ jobs:
           - os: ubuntu-20.04
             python-version: '3.6'   # test Python 3.6 on older Ubuntu instead
     steps:
-      # - name: update
-      #  run: sudo apt-get -y update
       - uses: actions/checkout@v3
       - name: Setup Python ${{ matrix.python-version }}
         uses: actions/setup-python@v3
@@ -42,12 +42,18 @@ jobs:
         run: choco install wget unzip
       - name: Install python dependencies
         run: |
-          python -m pip install --upgrade pip
-          pip install pytest
-          pip install .[ja]
-          pip install .[ko]
+          python3 -m pip install --upgrade pip
+          pip3 install .[dev]
+          pip3 install .[ja]
+          pip3 install .[ko]
+      - name: Lint with Mypy
+        run: mypy sacrebleu scripts test
+      - name: Lint with Ruff
+        uses: chartboost/ruff-action@v1
       - name: Python pytest test suite
         run: python3 -m pytest
       - name: CLI bash test suite
         shell: bash
         run: ./test.sh
+      - name: Build
+        run: python3 setup.py sdist bdist_wheel
diff --git a/.gitignore b/.gitignore
index 6bf040a7..e2d4f533 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,5 @@ __pycache__
 sacrebleu.egg-info
 .sacrebleu
 *~
-.DS_Store
\ No newline at end of file
+.DS_Store
+.idea/
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 14f74df1..d7196284 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,14 @@
 # Release Notes
 
-
-- 2.4.0 (2023-11-07)
+- 2.4.0 (2023-12-11)
   Added:
   - WMT23 test sets (test set `wmt23`)
 
+- 2.3.3 (2023-11-28)
+  Fixed:
+  - Typing issues (#249, #250)
+  - Improved builds (#252)
+
 - 2.3.2 (2023-11-06)
   Fixed:
   - Special treatment of empty references in TER (#232)
diff --git a/Makefile b/Makefile
index f6f82360..6b378c70 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,6 @@
 .PHONY: test
 test:
+	mypy sacrebleu scripts test
 	python3 -m pytest
 	bash test.sh
 
diff --git a/mypy.ini b/mypy.ini
index 7207d687..26ff0958 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -18,3 +18,12 @@ ignore_missing_imports = True
 
 [mypy-MeCab.*]
 ignore_missing_imports = True
+
+[mypy-mecab_ko.*]
+ignore_missing_imports = True
+
+[mypy-mecab_ko_dic.*]
+ignore_missing_imports = True
+
+[mypy-sentencepiece.*]
+ignore_missing_imports = True
diff --git a/sacrebleu/__init__.py b/sacrebleu/__init__.py
index c3405280..4a3bcab7 100644
--- a/sacrebleu/__init__.py
+++ b/sacrebleu/__init__.py
@@ -18,14 +18,26 @@
 __description__ = 'Hassle-free computation of shareable, comparable, and reproducible BLEU, chrF, and TER scores'
 
 
-from .utils import smart_open, SACREBLEU_DIR, download_test_set  # noqa: F401
-from .utils import get_source_file, get_reference_files  # noqa: F401
-from .utils import get_available_testsets, get_langpairs_for_testset  # noqa: F401
-from .metrics.helpers import extract_word_ngrams, extract_char_ngrams  # noqa: F401
-from .dataset import DATASETS  # noqa: F401
-from .metrics import BLEU, CHRF, TER  # noqa: F401
+from .utils import smart_open, SACREBLEU_DIR, download_test_set
+from .utils import get_source_file, get_reference_files
+from .utils import get_available_testsets, get_langpairs_for_testset
+from .metrics.helpers import extract_word_ngrams, extract_char_ngrams
+from .dataset import DATASETS
+from .metrics import BLEU, CHRF, TER
 
 # Backward compatibility functions for old style API access (<= 1.4.10)
-from .compat import corpus_bleu, raw_corpus_bleu, sentence_bleu  # noqa: F401
-from .compat import corpus_chrf, sentence_chrf  # noqa: F401
-from .compat import corpus_ter, sentence_ter  # noqa: F401
+from .compat import corpus_bleu, raw_corpus_bleu, sentence_bleu
+from .compat import corpus_chrf, sentence_chrf
+from .compat import corpus_ter, sentence_ter
+
+__all__ = [
+    'smart_open', 'SACREBLEU_DIR', 'download_test_set',
+    'get_source_file', 'get_reference_files',
+    'get_available_testsets', 'get_langpairs_for_testset',
+    'extract_word_ngrams', 'extract_char_ngrams',
+    'DATASETS',
+    'BLEU', 'CHRF', 'TER',
+    'corpus_bleu', 'raw_corpus_bleu', 'sentence_bleu',
+    'corpus_chrf', 'sentence_chrf',
+    'corpus_ter', 'sentence_ter'
+]
diff --git a/sacrebleu/compat.py b/sacrebleu/compat.py
index cce90e9f..57359603 100644
--- a/sacrebleu/compat.py
+++ b/sacrebleu/compat.py
@@ -64,7 +64,7 @@ def raw_corpus_bleu(hypotheses: Sequence[str],
 def sentence_bleu(hypothesis: str,
                   references: Sequence[str],
                   smooth_method: str = 'exp',
-                  smooth_value: float = None,
+                  smooth_value: Optional[float] = None,
                   lowercase: bool = False,
                   tokenize=BLEU.TOKENIZER_DEFAULT,
                   use_effective_order: bool = True) -> BLEUScore:
diff --git a/sacrebleu/dataset/__init__.py b/sacrebleu/dataset/__init__.py
index b44a1a70..6ec8aad8 100644
--- a/sacrebleu/dataset/__init__.py
+++ b/sacrebleu/dataset/__init__.py
@@ -14,6 +14,19 @@
 # express or implied. See the License for the specific language governing
 # permissions and limitations under the License.
 
+
+# This defines data locations.
+# Right below are test sets.
+# Beneath each test set, we define the location to download the test data.
+# The other keys are each language pair contained in the tarball, and the respective locations of the source and reference data within each.
+# Many of these are *.sgm files, which are processed to produced plain text that can be used by this script.
+# The canonical location of unpacked, processed data is $SACREBLEU_DIR/$TEST/$SOURCE-$TARGET.{$SOURCE,$TARGET}
+from .fake_sgml import FakeSGMLDataset, WMTAdditionDataset
+from .iwslt_xml import IWSLTXMLDataset
+from .plain_text import PlainTextDataset
+from .tsv import TSVDataset
+from .wmt_xml import WMTXMLDataset
+
 # Detailed document metadata annotation in form DocumentID -> CountryCode - Domain - OptionalFinegrainedCountryCode
 # While the annotation is subjective with many unclear cases, it may provide useful insights
 # when applied on large data (TODO: annotate all documents from recent WMT years, at least for origlang=en, consider renaming "world" to "other").
@@ -59,19 +72,6 @@
 COUNTRIES = sorted(list({v.split("-")[0] for v in SUBSETS["wmt19"].values()}))
 DOMAINS = sorted(list({v.split("-")[1] for v in SUBSETS["wmt19"].values()}))
 
-
-# This defines data locations.
-# At the top level are test sets.
-# Beneath each test set, we define the location to download the test data.
-# The other keys are each language pair contained in the tarball, and the respective locations of the source and reference data within each.
-# Many of these are *.sgm files, which are processed to produced plain text that can be used by this script.
-# The canonical location of unpacked, processed data is $SACREBLEU_DIR/$TEST/$SOURCE-$TARGET.{$SOURCE,$TARGET}
-from .fake_sgml import FakeSGMLDataset, WMTAdditionDataset
-from .iwslt_xml import IWSLTXMLDataset
-from .plain_text import PlainTextDataset
-from .tsv import TSVDataset
-from .wmt_xml import WMTXMLDataset
-
 DATASETS = {
     # wmt
     "wmt23": WMTXMLDataset(
@@ -180,7 +180,7 @@
     ),
     "wmt21": WMTXMLDataset(
         "wmt21",
-        data=["http://data.statmt.org/wmt21/translation-task/test.tgz"],
+        data=["https://data.statmt.org/wmt21/translation-task/test.tgz"],
         description="Official evaluation data for WMT21.",
         md5=["32e7ab995bc318414375d60f0269af92"],
         langpairs={
@@ -210,7 +210,7 @@
     ),
     "wmt21/B": WMTXMLDataset(
         "wmt21/B",
-        data=["http://data.statmt.org/wmt21/translation-task/test.tgz"],
+        data=["https://data.statmt.org/wmt21/translation-task/test.tgz"],
         description="Official evaluation data for WMT21 with reference B.",
         md5=["32e7ab995bc318414375d60f0269af92"],
         langpairs={
@@ -226,7 +226,7 @@
     ),
     "wmt21/AB": WMTXMLDataset(
         "wmt21/AB",
-        data=["http://data.statmt.org/wmt21/translation-task/test.tgz"],
+        data=["https://data.statmt.org/wmt21/translation-task/test.tgz"],
         description="Official evaluation data for WMT21 with references A and B.",
         md5=["32e7ab995bc318414375d60f0269af92"],
         langpairs={
@@ -243,7 +243,7 @@
     ),
     "wmt21/C": WMTXMLDataset(
         "wmt21/C",
-        data=["http://data.statmt.org/wmt21/translation-task/test.tgz"],
+        data=["https://data.statmt.org/wmt21/translation-task/test.tgz"],
         description="Official evaluation data for WMT21 with reference C",
         md5=["32e7ab995bc318414375d60f0269af92"],
         langpairs={
@@ -254,7 +254,7 @@
     ),
     "wmt21/AC": WMTXMLDataset(
         "wmt21/AC",
-        data=["http://data.statmt.org/wmt21/translation-task/test.tgz"],
+        data=["https://data.statmt.org/wmt21/translation-task/test.tgz"],
         description="Official evaluation data for WMT21 with references A and C",
         md5=["32e7ab995bc318414375d60f0269af92"],
         langpairs={
@@ -265,7 +265,7 @@
     ),
     "wmt21/D": WMTXMLDataset(
         "wmt21/D",
-        data=["http://data.statmt.org/wmt21/translation-task/test.tgz"],
+        data=["https://data.statmt.org/wmt21/translation-task/test.tgz"],
         description="Official evaluation data for WMT21 with reference D",
         md5=["32e7ab995bc318414375d60f0269af92"],
         langpairs={
@@ -276,7 +276,7 @@
     ),
     "wmt21/dev": WMTXMLDataset(
         "wmt21/dev",
-        data=["http://data.statmt.org/wmt21/translation-task/dev.tgz"],
+        data=["https://data.statmt.org/wmt21/translation-task/dev.tgz"],
         description="Development data for WMT21，if multiple references are available, the first one is used.",
         md5=["165da59ac8dfb5b7cafd7e90b1cac672"],
         langpairs={
@@ -290,7 +290,7 @@
     ),
     "wmt20/tworefs": FakeSGMLDataset(
         "wmt20/tworefs",
-        data=["http://data.statmt.org/wmt20/translation-task/test.tgz"],
+        data=["https://data.statmt.org/wmt20/translation-task/test.tgz"],
         description="WMT20 news test sets with two references",
         md5=["3b1f777cfd2fb15ccf66e9bfdb2b1699"],
         langpairs={
@@ -323,7 +323,7 @@
     ),
     "wmt20": FakeSGMLDataset(
         "wmt20",
-        data=["http://data.statmt.org/wmt20/translation-task/test.tgz"],
+        data=["https://data.statmt.org/wmt20/translation-task/test.tgz"],
         description="Official evaluation data for WMT20",
         md5=["3b1f777cfd2fb15ccf66e9bfdb2b1699"],
         langpairs={
@@ -419,7 +419,7 @@
     ),
     "wmt20/dev": FakeSGMLDataset(
         "wmt20/dev",
-        data=["http://data.statmt.org/wmt20/translation-task/dev.tgz"],
+        data=["https://data.statmt.org/wmt20/translation-task/dev.tgz"],
         description="Development data for tasks new to 2020.",
         md5=["037f2b37aab74febbb1b2307dc2afb54"],
         langpairs={
@@ -459,7 +459,7 @@
     ),
     "wmt20/robust/set1": PlainTextDataset(
         "wmt20/robust/set1",
-        data=["http://data.statmt.org/wmt20/robustness-task/robustness20-3-sets.zip"],
+        data=["https://data.statmt.org/wmt20/robustness-task/robustness20-3-sets.zip"],
         md5=["a12ac9ebe89b72195041518dffc4a9d5"],
         description="WMT20 robustness task, set 1",
         langpairs={
@@ -475,7 +475,7 @@
     ),
     "wmt20/robust/set2": PlainTextDataset(
         "wmt20/robust/set2",
-        data=["http://data.statmt.org/wmt20/robustness-task/robustness20-3-sets.zip"],
+        data=["https://data.statmt.org/wmt20/robustness-task/robustness20-3-sets.zip"],
         md5=["a12ac9ebe89b72195041518dffc4a9d5"],
         description="WMT20 robustness task, set 2",
         langpairs={
@@ -491,7 +491,7 @@
     ),
     "wmt20/robust/set3": PlainTextDataset(
         "wmt20/robust/set3",
-        data=["http://data.statmt.org/wmt20/robustness-task/robustness20-3-sets.zip"],
+        data=["https://data.statmt.org/wmt20/robustness-task/robustness20-3-sets.zip"],
         md5=["a12ac9ebe89b72195041518dffc4a9d5"],
         description="WMT20 robustness task, set 3",
         langpairs={
@@ -503,7 +503,7 @@
     ),
     "wmt19": FakeSGMLDataset(
         "wmt19",
-        data=["http://data.statmt.org/wmt19/translation-task/test.tgz"],
+        data=["https://data.statmt.org/wmt19/translation-task/test.tgz"],
         description="Official evaluation data.",
         md5=["84de7162d158e28403103b01aeefc39a"],
         citation=r"""@proceedings{ws-2019-machine,
@@ -612,7 +612,7 @@
     ),
     "wmt19/dev": FakeSGMLDataset(
         "wmt19/dev",
-        data=["http://data.statmt.org/wmt19/translation-task/dev.tgz"],
+        data=["https://data.statmt.org/wmt19/translation-task/dev.tgz"],
         description="Development data for tasks new to 2019.",
         md5=["f2ec7af5947c19e0cacb3882eb208002"],
         langpairs={
@@ -645,7 +645,7 @@
     "wmt19/google/ar": WMTAdditionDataset(
         "wmt19/google/ar",
         data=[
-            "http://data.statmt.org/wmt19/translation-task/test.tgz",
+            "https://data.statmt.org/wmt19/translation-task/test.tgz",
             "https://raw.githubusercontent.com/google/wmt19-paraphrased-references/master/wmt19/ende/wmt19-ende-ar.ref",
         ],
         description="Additional high-quality reference for WMT19/en-de.",
@@ -658,7 +658,7 @@
     "wmt19/google/arp": WMTAdditionDataset(
         "wmt19/google/arp",
         data=[
-            "http://data.statmt.org/wmt19/translation-task/test.tgz",
+            "https://data.statmt.org/wmt19/translation-task/test.tgz",
             "https://raw.githubusercontent.com/google/wmt19-paraphrased-references/master/wmt19/ende/wmt19-ende-arp.ref",
         ],
         description="Additional paraphrase of wmt19/google/ar.",
@@ -671,7 +671,7 @@
     "wmt19/google/wmtp": WMTAdditionDataset(
         "wmt19/google/wmtp",
         data=[
-            "http://data.statmt.org/wmt19/translation-task/test.tgz",
+            "https://data.statmt.org/wmt19/translation-task/test.tgz",
             "https://raw.githubusercontent.com/google/wmt19-paraphrased-references/master/wmt19/ende/wmt19-ende-wmtp.ref",
         ],
         description="Additional paraphrase of the official WMT19 reference.",
@@ -684,7 +684,7 @@
     "wmt19/google/hqr": WMTAdditionDataset(
         "wmt19/google/hqr",
         data=[
-            "http://data.statmt.org/wmt19/translation-task/test.tgz",
+            "https://data.statmt.org/wmt19/translation-task/test.tgz",
             "https://raw.githubusercontent.com/google/wmt19-paraphrased-references/master/wmt19/ende/wmt19-ende-hqr.ref",
         ],
         description="Best human selected-reference between wmt19 and wmt19/google/ar.",
@@ -697,7 +697,7 @@
     "wmt19/google/hqp": WMTAdditionDataset(
         "wmt19/google/hqp",
         data=[
-            "http://data.statmt.org/wmt19/translation-task/test.tgz",
+            "https://data.statmt.org/wmt19/translation-task/test.tgz",
             "https://raw.githubusercontent.com/google/wmt19-paraphrased-references/master/wmt19/ende/wmt19-ende-hqp.ref",
         ],
         description="Best human-selected reference between wmt19/google/arp and wmt19/google/wmtp.",
@@ -710,7 +710,7 @@
     "wmt19/google/hqall": WMTAdditionDataset(
         "wmt19/google/hqall",
         data=[
-            "http://data.statmt.org/wmt19/translation-task/test.tgz",
+            "https://data.statmt.org/wmt19/translation-task/test.tgz",
             "https://raw.githubusercontent.com/google/wmt19-paraphrased-references/master/wmt19/ende/wmt19-ende-hqall.ref",
         ],
         description="Best human-selected reference among original official reference and the Google reference and paraphrases.",
@@ -722,7 +722,7 @@
     ),
     "wmt18": FakeSGMLDataset(
         "wmt18",
-        data=["http://data.statmt.org/wmt18/translation-task/test.tgz"],
+        data=["https://data.statmt.org/wmt18/translation-task/test.tgz"],
         md5=["f996c245ecffea23d0006fa4c34e9064"],
         description="Official evaluation data.",
         citation='@inproceedings{bojar-etal-2018-findings,\n    title = "Findings of the 2018 Conference on Machine Translation ({WMT}18)",\n    author = "Bojar, Ond{\v{r}}ej  and\n      Federmann, Christian  and\n      Fishel, Mark  and\n      Graham, Yvette  and\n      Haddow, Barry  and\n      Koehn, Philipp  and\n      Monz, Christof",\n    booktitle = "Proceedings of the Third Conference on Machine Translation: Shared Task Papers",\n    month = oct,\n    year = "2018",\n    address = "Belgium, Brussels",\n    publisher = "Association for Computational Linguistics",\n    url = "https://www.aclweb.org/anthology/W18-6401",\n    pages = "272--303",\n}',
@@ -787,7 +787,7 @@
     ),
     "wmt18/test-ts": FakeSGMLDataset(
         "wmt18/test-ts",
-        data=["http://data.statmt.org/wmt18/translation-task/test-ts.tgz"],
+        data=["https://data.statmt.org/wmt18/translation-task/test-ts.tgz"],
         md5=["5c621a34d512cc2dd74162ae7d00b320"],
         description="Official evaluation sources with extra test sets interleaved.",
         langpairs={
@@ -809,7 +809,7 @@
     ),
     "wmt18/dev": FakeSGMLDataset(
         "wmt18/dev",
-        data=["http://data.statmt.org/wmt18/translation-task/dev.tgz"],
+        data=["https://data.statmt.org/wmt18/translation-task/dev.tgz"],
         md5=["486f391da54a7a3247f02ebd25996f24"],
         description="Development data (Estonian<>English).",
         langpairs={
@@ -825,7 +825,7 @@
     ),
     "wmt17": FakeSGMLDataset(
         "wmt17",
-        data=["http://data.statmt.org/wmt17/translation-task/test.tgz"],
+        data=["https://data.statmt.org/wmt17/translation-task/test.tgz"],
         md5=["86a1724c276004aa25455ae2a04cef26"],
         description="Official evaluation data.",
         citation="@InProceedings{bojar-EtAl:2017:WMT1,\n  author    = {Bojar, Ond\\v{r}ej  and  Chatterjee, Rajen  and  Federmann, Christian  and  Graham, Yvette  and  Haddow, Barry  and  Huang, Shujian  and  Huck, Matthias  and  Koehn, Philipp  and  Liu, Qun  and  Logacheva, Varvara  and  Monz, Christof  and  Negri, Matteo  and  Post, Matt  and  Rubino, Raphael  and  Specia, Lucia  and  Turchi, Marco},\n  title     = {Findings of the 2017 Conference on Machine Translation (WMT17)},\n  booktitle = {Proceedings of the Second Conference on Machine Translation, Volume 2: Shared Task Papers},\n  month     = {September},\n  year      = {2017},\n  address   = {Copenhagen, Denmark},\n  publisher = {Association for Computational Linguistics},\n  pages     = {169--214},\n  url       = {http://www.aclweb.org/anthology/W17-4717}\n}",
@@ -890,7 +890,7 @@
     ),
     "wmt17/B": FakeSGMLDataset(
         "wmt17/B",
-        data=["http://data.statmt.org/wmt17/translation-task/test.tgz"],
+        data=["https://data.statmt.org/wmt17/translation-task/test.tgz"],
         md5=["86a1724c276004aa25455ae2a04cef26"],
         description="Additional reference for EN-FI and FI-EN.",
         langpairs={
@@ -902,7 +902,7 @@
     ),
     "wmt17/tworefs": FakeSGMLDataset(
         "wmt17/tworefs",
-        data=["http://data.statmt.org/wmt17/translation-task/test.tgz"],
+        data=["https://data.statmt.org/wmt17/translation-task/test.tgz"],
         md5=["86a1724c276004aa25455ae2a04cef26"],
         description="Systems with two references.",
         langpairs={
@@ -915,7 +915,7 @@
     ),
     "wmt17/improved": FakeSGMLDataset(
         "wmt17/improved",
-        data=["http://data.statmt.org/wmt17/translation-task/test-update-1.tgz"],
+        data=["https://data.statmt.org/wmt17/translation-task/test-update-1.tgz"],
         md5=["91dbfd5af99bc6891a637a68e04dfd41"],
         description="Improved zh-en and en-zh translations.",
         langpairs={
@@ -925,7 +925,7 @@
     ),
     "wmt17/dev": FakeSGMLDataset(
         "wmt17/dev",
-        data=["http://data.statmt.org/wmt17/translation-task/dev.tgz"],
+        data=["https://data.statmt.org/wmt17/translation-task/dev.tgz"],
         md5=["9b1aa63c1cf49dccdd20b962fe313989"],
         description="Development sets released for new languages in 2017.",
         langpairs={
@@ -951,7 +951,7 @@
         "wmt17/ms",
         data=[
             "https://github.com/MicrosoftTranslator/Translator-HumanParityData/archive/master.zip",
-            "http://data.statmt.org/wmt17/translation-task/test-update-1.tgz",
+            "https://data.statmt.org/wmt17/translation-task/test-update-1.tgz",
         ],
         md5=["18fdaa7a3c84cf6ef688da1f6a5fa96f", "91dbfd5af99bc6891a637a68e04dfd41"],
         description="Additional Chinese-English references from Microsoft Research.",
@@ -967,7 +967,7 @@
     ),
     "wmt16": FakeSGMLDataset(
         "wmt16",
-        data=["http://data.statmt.org/wmt16/translation-task/test.tgz"],
+        data=["https://data.statmt.org/wmt16/translation-task/test.tgz"],
         md5=["3d809cd0c2c86adb2c67034d15c4e446"],
         description="Official evaluation data.",
         citation="@InProceedings{bojar-EtAl:2016:WMT1,\n  author    = {Bojar, Ond\\v{r}ej  and  Chatterjee, Rajen  and  Federmann, Christian  and  Graham, Yvette  and  Haddow, Barry  and  Huck, Matthias  and  Jimeno Yepes, Antonio  and  Koehn, Philipp  and  Logacheva, Varvara  and  Monz, Christof  and  Negri, Matteo  and  Neveol, Aurelie  and  Neves, Mariana  and  Popel, Martin  and  Post, Matt  and  Rubino, Raphael  and  Scarton, Carolina  and  Specia, Lucia  and  Turchi, Marco  and  Verspoor, Karin  and  Zampieri, Marcos},\n  title     = {Findings of the 2016 Conference on Machine Translation},\n  booktitle = {Proceedings of the First Conference on Machine Translation},\n  month     = {August},\n  year      = {2016},\n  address   = {Berlin, Germany},\n  publisher = {Association for Computational Linguistics},\n  pages     = {131--198},\n  url       = {http://www.aclweb.org/anthology/W/W16/W16-2301}\n}",
@@ -1024,7 +1024,7 @@
     ),
     "wmt16/B": FakeSGMLDataset(
         "wmt16/B",
-        data=["http://data.statmt.org/wmt16/translation-task/test.tgz"],
+        data=["https://data.statmt.org/wmt16/translation-task/test.tgz"],
         md5=["3d809cd0c2c86adb2c67034d15c4e446"],
         description="Additional reference for EN-FI.",
         langpairs={
@@ -1036,7 +1036,7 @@
     ),
     "wmt16/tworefs": FakeSGMLDataset(
         "wmt16/tworefs",
-        data=["http://data.statmt.org/wmt16/translation-task/test.tgz"],
+        data=["https://data.statmt.org/wmt16/translation-task/test.tgz"],
         md5=["3d809cd0c2c86adb2c67034d15c4e446"],
         description="EN-FI with two references.",
         langpairs={
@@ -1049,7 +1049,7 @@
     ),
     "wmt16/dev": FakeSGMLDataset(
         "wmt16/dev",
-        data=["http://data.statmt.org/wmt16/translation-task/dev.tgz"],
+        data=["https://data.statmt.org/wmt16/translation-task/dev.tgz"],
         md5=["4a3dc2760bb077f4308cce96b06e6af6"],
         description="Development sets released for new languages in 2016.",
         langpairs={
@@ -1073,7 +1073,7 @@
     ),
     "wmt15": FakeSGMLDataset(
         "wmt15",
-        data=["http://statmt.org/wmt15/test.tgz"],
+        data=["https://statmt.org/wmt15/test.tgz"],
         md5=["67e3beca15e69fe3d36de149da0a96df"],
         description="Official evaluation data.",
         citation="@InProceedings{bojar-EtAl:2015:WMT,\n  author    = {Bojar, Ond\\v{r}ej  and  Chatterjee, Rajen  and  Federmann, Christian  and  Haddow, Barry  and  Huck, Matthias  and  Hokamp, Chris  and  Koehn, Philipp  and  Logacheva, Varvara  and  Monz, Christof  and  Negri, Matteo  and  Post, Matt  and  Scarton, Carolina  and  Specia, Lucia  and  Turchi, Marco},\n  title     = {Findings of the 2015 Workshop on Statistical Machine Translation},\n  booktitle = {Proceedings of the Tenth Workshop on Statistical Machine Translation},\n  month     = {September},\n  year      = {2015},\n  address   = {Lisbon, Portugal},\n  publisher = {Association for Computational Linguistics},\n  pages     = {1--46},\n  url       = {http://aclweb.org/anthology/W15-3001}\n}",
@@ -1122,7 +1122,7 @@
     ),
     "wmt14": FakeSGMLDataset(
         "wmt14",
-        data=["http://statmt.org/wmt14/test-filtered.tgz"],
+        data=["https://statmt.org/wmt14/test-filtered.tgz"],
         md5=["84c597844c1542e29c2aff23aaee4310"],
         description="Official evaluation data.",
         citation="@InProceedings{bojar-EtAl:2014:W14-33,\n  author    = {Bojar, Ondrej  and  Buck, Christian  and  Federmann, Christian  and  Haddow, Barry  and  Koehn, Philipp  and  Leveling, Johannes  and  Monz, Christof  and  Pecina, Pavel  and  Post, Matt  and  Saint-Amand, Herve  and  Soricut, Radu  and  Specia, Lucia  and  Tamchyna, Ale\\v{s}},\n  title     = {Findings of the 2014 Workshop on Statistical Machine Translation},\n  booktitle = {Proceedings of the Ninth Workshop on Statistical Machine Translation},\n  month     = {June},\n  year      = {2014},\n  address   = {Baltimore, Maryland, USA},\n  publisher = {Association for Computational Linguistics},\n  pages     = {12--58},\n  url       = {http://www.aclweb.org/anthology/W/W14/W14-3302}\n}",
@@ -1171,7 +1171,7 @@
     ),
     "wmt14/full": FakeSGMLDataset(
         "wmt14/full",
-        data=["http://statmt.org/wmt14/test-full.tgz"],
+        data=["https://statmt.org/wmt14/test-full.tgz"],
         md5=["a8cd784e006feb32ac6f3d9ec7eb389a"],
         description="Evaluation data released after official evaluation for further research.",
         langpairs={
@@ -1219,7 +1219,7 @@
     ),
     "wmt13": FakeSGMLDataset(
         "wmt13",
-        data=["http://statmt.org/wmt13/test.tgz"],
+        data=["https://statmt.org/wmt13/test.tgz"],
         md5=["48eca5d02f637af44e85186847141f67"],
         description="Official evaluation data.",
         citation="@InProceedings{bojar-EtAl:2013:WMT,\n  author    = {Bojar, Ond\\v{r}ej  and  Buck, Christian  and  Callison-Burch, Chris  and  Federmann, Christian  and  Haddow, Barry  and  Koehn, Philipp  and  Monz, Christof  and  Post, Matt  and  Soricut, Radu  and  Specia, Lucia},\n  title     = {Findings of the 2013 {Workshop on Statistical Machine Translation}},\n  booktitle = {Proceedings of the Eighth Workshop on Statistical Machine Translation},\n  month     = {August},\n  year      = {2013},\n  address   = {Sofia, Bulgaria},\n  publisher = {Association for Computational Linguistics},\n  pages     = {1--44},\n  url       = {http://www.aclweb.org/anthology/W13-2201}\n}",
@@ -1238,7 +1238,7 @@
     ),
     "wmt12": FakeSGMLDataset(
         "wmt12",
-        data=["http://statmt.org/wmt12/test.tgz"],
+        data=["https://statmt.org/wmt12/test.tgz"],
         md5=["608232d34ebc4ba2ff70fead45674e47"],
         description="Official evaluation data.",
         citation="@InProceedings{callisonburch-EtAl:2012:WMT,\n  author    = {Callison-Burch, Chris  and  Koehn, Philipp  and  Monz, Christof  and  Post, Matt  and  Soricut, Radu  and  Specia, Lucia},\n  title     = {Findings of the 2012 Workshop on Statistical Machine Translation},\n  booktitle = {Proceedings of the Seventh Workshop on Statistical Machine Translation},\n  month     = {June},\n  year      = {2012},\n  address   = {Montr{'e}al, Canada},\n  publisher = {Association for Computational Linguistics},\n  pages     = {10--51},\n  url       = {http://www.aclweb.org/anthology/W12-3102}\n}",
@@ -1255,7 +1255,7 @@
     ),
     "wmt11": FakeSGMLDataset(
         "wmt11",
-        data=["http://statmt.org/wmt11/test.tgz"],
+        data=["https://statmt.org/wmt11/test.tgz"],
         md5=["b0c9680adf32d394aefc2b24e3a5937e"],
         description="Official evaluation data.",
         citation="@InProceedings{callisonburch-EtAl:2011:WMT,\n  author    = {Callison-Burch, Chris  and  Koehn, Philipp  and  Monz, Christof  and  Zaidan, Omar},\n  title     = {Findings of the 2011 Workshop on Statistical Machine Translation},\n  booktitle = {Proceedings of the Sixth Workshop on Statistical Machine Translation},\n  month     = {July},\n  year      = {2011},\n  address   = {Edinburgh, Scotland},\n  publisher = {Association for Computational Linguistics},\n  pages     = {22--64},\n  url       = {http://www.aclweb.org/anthology/W11-2103}\n}",
@@ -1272,7 +1272,7 @@
     ),
     "wmt10": FakeSGMLDataset(
         "wmt10",
-        data=["http://statmt.org/wmt10/test.tgz"],
+        data=["https://statmt.org/wmt10/test.tgz"],
         md5=["491cb885a355da5a23ea66e7b3024d5c"],
         description="Official evaluation data.",
         citation="@InProceedings{callisonburch-EtAl:2010:WMT,\n  author    = {Callison-Burch, Chris  and  Koehn, Philipp  and  Monz, Christof  and  Peterson, Kay  and  Przybocki, Mark  and  Zaidan, Omar},\n  title     = {Findings of the 2010 Joint Workshop on Statistical Machine Translation and Metrics for Machine Translation},\n  booktitle = {Proceedings of the Joint Fifth Workshop on Statistical Machine Translation and MetricsMATR},\n  month     = {July},\n  year      = {2010},\n  address   = {Uppsala, Sweden},\n  publisher = {Association for Computational Linguistics},\n  pages     = {17--53},\n  note      = {Revised August 2010},\n  url       = {http://www.aclweb.org/anthology/W10-1703}\n}",
@@ -1289,7 +1289,7 @@
     ),
     "wmt09": FakeSGMLDataset(
         "wmt09",
-        data=["http://statmt.org/wmt09/test.tgz"],
+        data=["https://statmt.org/wmt09/test.tgz"],
         md5=["da227abfbd7b666ec175b742a0d27b37"],
         description="Official evaluation data.",
         citation="@InProceedings{callisonburch-EtAl:2009:WMT-09,\n  author    = {Callison-Burch, Chris  and  Koehn, Philipp  and  Monz, Christof  and  Schroeder, Josh},\n  title     = {Findings of the 2009 {W}orkshop on {S}tatistical {M}achine {T}ranslation},\n  booktitle = {Proceedings of the Fourth Workshop on Statistical Machine Translation},\n  month     = {March},\n  year      = {2009},\n  address   = {Athens, Greece},\n  publisher = {Association for Computational Linguistics},\n  pages     = {1--28},\n  url       = {http://www.aclweb.org/anthology/W/W09/W09-0401}\n}",
@@ -1310,7 +1310,7 @@
     ),
     "wmt08": FakeSGMLDataset(
         "wmt08",
-        data=["http://statmt.org/wmt08/test.tgz"],
+        data=["https://statmt.org/wmt08/test.tgz"],
         md5=["0582e4e894a3342044059c894e1aea3d"],
         description="Official evaluation data.",
         citation="@InProceedings{callisonburch-EtAl:2008:WMT,\n  author    = {Callison-Burch, Chris  and  Fordyce, Cameron  and  Koehn, Philipp  and  Monz, Christof  and  Schroeder, Josh},\n  title     = {Further Meta-Evaluation of Machine Translation},\n  booktitle = {Proceedings of the Third Workshop on Statistical Machine Translation},\n  month     = {June},\n  year      = {2008},\n  address   = {Columbus, Ohio},\n  publisher = {Association for Computational Linguistics},\n  pages     = {70--106},\n  url       = {http://www.aclweb.org/anthology/W/W08/W08-0309}\n}",
@@ -1329,7 +1329,7 @@
     ),
     "wmt08/nc": FakeSGMLDataset(
         "wmt08/nc",
-        data=["http://statmt.org/wmt08/test.tgz"],
+        data=["https://statmt.org/wmt08/test.tgz"],
         md5=["0582e4e894a3342044059c894e1aea3d"],
         description="Official evaluation data (news commentary).",
         langpairs={
@@ -1339,7 +1339,7 @@
     ),
     "wmt08/europarl": FakeSGMLDataset(
         "wmt08/europarl",
-        data=["http://statmt.org/wmt08/test.tgz"],
+        data=["https://statmt.org/wmt08/test.tgz"],
         md5=["0582e4e894a3342044059c894e1aea3d"],
         description="Official evaluation data (Europarl).",
         langpairs={
diff --git a/sacrebleu/dataset/__main__.py b/sacrebleu/dataset/__main__.py
index 22954920..5b13d59a 100644
--- a/sacrebleu/dataset/__main__.py
+++ b/sacrebleu/dataset/__main__.py
@@ -16,6 +16,8 @@
 
     for item in DATASETS.values():
         if item.md5 is not None:
+            assert item.data
+            assert item.md5
             assert len(item.data) == len(item.md5)
             pairs = zip(item.data, item.md5)
             for url, md5_hash in pairs:
diff --git a/sacrebleu/dataset/base.py b/sacrebleu/dataset/base.py
index ba6e65ba..cf3c092f 100644
--- a/sacrebleu/dataset/base.py
+++ b/sacrebleu/dataset/base.py
@@ -4,7 +4,7 @@
 import os
 import re
 from abc import ABCMeta, abstractmethod
-from typing import Dict, List
+from typing import Dict, List, Optional
 
 from ..utils import SACREBLEU_DIR, download_file, smart_open
 
@@ -13,10 +13,10 @@ class Dataset(metaclass=ABCMeta):
     def __init__(
         self,
         name: str,
-        data: List[str] = None,
-        description: str = None,
-        citation: str = None,
-        md5: List[str] = None,
+        data: Optional[List[str]] = None,
+        description: Optional[str] = None,
+        citation: Optional[str] = None,
+        md5: Optional[List[str]] = None,
         langpairs=Dict[str, List[str]],
         **kwargs,
     ):
diff --git a/sacrebleu/dataset/wmt_xml.py b/sacrebleu/dataset/wmt_xml.py
index 92c96d57..d5eb5d86 100644
--- a/sacrebleu/dataset/wmt_xml.py
+++ b/sacrebleu/dataset/wmt_xml.py
@@ -76,7 +76,7 @@ def _unwrap_wmt21_or_later(raw_file):
             def get_sents(doc):
                 return {
                     int(seg.get("id")): seg.text if seg.text else ""
-                    for seg in doc.findall(f".//seg")
+                    for seg in doc.findall(".//seg")
                 }
 
             ref_docs = doc.findall(".//ref")
@@ -114,7 +114,7 @@ def _get_langpair_path(self, langpair):
         in order to allow for overriding which test set to use.
         """
         langpair_data = self._get_langpair_metadata(langpair)[langpair]
-        rel_path = langpair_data["path"] if type(langpair_data) == dict else langpair_data[0]
+        rel_path = langpair_data["path"] if isinstance(langpair_data, dict) else langpair_data[0]
         return os.path.join(self._rawdir, rel_path)
 
     def process_to_text(self, langpair=None):
@@ -156,7 +156,7 @@ def _get_langpair_allowed_refs(self, langpair):
         """
         defaults = self.kwargs.get("refs", [])
         langpair_data = self._get_langpair_metadata(langpair)[langpair]
-        if type(langpair_data) == dict:
+        if isinstance(langpair_data, dict):
             allowed_refs = langpair_data.get("refs", defaults)
         else:
             allowed_refs = defaults
diff --git a/sacrebleu/sacrebleu.py b/sacrebleu/sacrebleu.py
index 7edbe3ac..d778e1db 100755
--- a/sacrebleu/sacrebleu.py
+++ b/sacrebleu/sacrebleu.py
@@ -50,7 +50,7 @@
 
 try:
     # SIGPIPE is not available on Windows machines, throwing an exception.
-    from signal import SIGPIPE
+    from signal import SIGPIPE  # type: ignore
 
     # If SIGPIPE is available, change behaviour to default instead of ignore.
     from signal import signal, SIG_DFL
diff --git a/sacrebleu/significance.py b/sacrebleu/significance.py
index b39e0a59..a9c71d0a 100644
--- a/sacrebleu/significance.py
+++ b/sacrebleu/significance.py
@@ -1,7 +1,7 @@
 import os
 import logging
 import multiprocessing as mp
-from typing import Sequence, Dict, Optional, Tuple, List, Union, Any
+from typing import Sequence, Dict, Optional, Tuple, List, Union, Any, Mapping
 
 import numpy as np
 
@@ -77,11 +77,11 @@ def _bootstrap_resample(stats: List[List[Union[int, float]]],
     idxs = rng.choice(len(stats), size=(n_samples, len(stats)), replace=True)
 
     # convert to numpy array. float32 is more efficient
-    stats = np.array(stats, dtype='float32')
+    stats_np = np.array(stats, dtype='float32')
 
     # recompute scores for all resamples
     scores = [
-        metric._compute_score_from_stats(_s.sum(0)) for _s in stats[idxs]]
+        metric._compute_score_from_stats(_s.sum(0)) for _s in stats_np[idxs]]
 
     return str(seed).lower(), scores
 
@@ -98,7 +98,7 @@ def _compute_p_value(stats: np.ndarray, real_difference: float) -> float:
     # "the != is important. if we want to score the same system against itself
     # having a zero difference should not be attributed to chance."
 
-    c = np.sum(stats > real_difference)
+    c = np.sum(stats > real_difference).item()
 
     # "+1 applies here, though it only matters for small numbers of shufflings,
     # which we typically never do. it's necessary to ensure the probability of
@@ -186,8 +186,9 @@ def _paired_ar_test(baseline_info: Dict[str, Tuple[np.ndarray, Result]],
             sacrelogger.info(f' > Performing bootstrap resampling for confidence interval (# resamples: {n_ar_confidence})')
             sys_stats = np.array(sys_stats, dtype='float32')
             # recompute scores for all resamples
-            sys_scores = [
-                metric._compute_score_from_stats(_s.sum(0)).score for _s in sys_stats[bs_idxs]]
+            sys_scores = np.array([
+                metric._compute_score_from_stats(_s.sum(0)).score for _s in sys_stats[bs_idxs]
+            ])
             res.mean, res.ci = estimate_ci(sys_scores)
 
         # Store the result
@@ -300,7 +301,7 @@ class PairedTest:
     }
 
     def __init__(self, named_systems: List[Tuple[str, Sequence[str]]],
-                 metrics: Dict[str, Metric],
+                 metrics: Mapping[str, Metric],
                  references: Optional[Sequence[Sequence[str]]],
                  test_type: str = 'ar',
                  n_samples: int = 0,
diff --git a/sacrebleu/tokenizers/tokenizer_spm.py b/sacrebleu/tokenizers/tokenizer_spm.py
index a50d0fb8..92729b5b 100644
--- a/sacrebleu/tokenizers/tokenizer_spm.py
+++ b/sacrebleu/tokenizers/tokenizer_spm.py
@@ -2,7 +2,6 @@
 
 import os
 import logging
-import urllib.request
 
 from functools import lru_cache
 from ..utils import SACREBLEU_DIR, download_file
diff --git a/sacrebleu/utils.py b/sacrebleu/utils.py
index 6187e3b3..56e6fcab 100644
--- a/sacrebleu/utils.py
+++ b/sacrebleu/utils.py
@@ -423,9 +423,7 @@ def download_file(source_path, dest_path, extract_to=None, expected_md5=None):
     with portalocker.Lock(lockfile, timeout=60):
 
         if not os.path.exists(dest_path) or os.path.getsize(dest_path) == 0:
-
             sacrelogger.info(f"Downloading {source_path} to {dest_path}")
-            md5 = hashlib.md5()
 
             try:
                 with urllib.request.urlopen(source_path) as f, open(dest_path, 'wb') as out:
@@ -441,7 +439,7 @@ def download_file(source_path, dest_path, extract_to=None, expected_md5=None):
                 if cur_md5 != expected_md5:
                     sacrelogger.error(f'Fatal: MD5 sum of downloaded file was incorrect (got {cur_md5}, expected {expected_md5}).')
                     sacrelogger.error(f'Please manually delete {dest_path!r} and rerun the command.')
-                    sacrelogger.error(f'If the problem persists, the tarball may have changed, in which case, please contact the SacreBLEU maintainer.')
+                    sacrelogger.error('If the problem persists, the tarball may have changed, in which case, please contact the SacreBLEU maintainer.')
                     sys.exit(1)
 
             # Extract the tarball
@@ -594,4 +592,4 @@ def print_subset_results(metrics, full_system, full_refs, args):
             print(f'{key}: sentences={n_system:<6} {score.name:<{max_metric_width}} = {score.score:.{w}f}')
 
 # import at the end to avoid circular import
-from .dataset import DATASETS, SUBSETS, DOMAINS, COUNTRIES
+from .dataset import DATASETS, SUBSETS, DOMAINS, COUNTRIES  # noqa: E402
diff --git a/scripts/perf_test.py b/scripts/perf_test.py
index 1cf2b484..f2812db5 100644
--- a/scripts/perf_test.py
+++ b/scripts/perf_test.py
@@ -5,8 +5,8 @@
 
 sys.path.insert(0, '.')
 
-import sacrebleu
-from sacrebleu.metrics import BLEU, CHRF
+import sacrebleu  # noqa: E402
+from sacrebleu.metrics import BLEU, CHRF  # noqa: E402
 
 
 N_REPEATS = 5
diff --git a/setup.cfg b/setup.cfg
index 2e0f031c..a3fd11de 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,3 +1,3 @@
 [metadata]
-description-file = README.md
-license_file = LICENSE.txt
+description_file = README.md
+license_files = LICENSE.txt
diff --git a/setup.py b/setup.py
index 00c70029..f104799e 100755
--- a/setup.py
+++ b/setup.py
@@ -130,6 +130,9 @@ def get_long_description():
         # Specify the Python versions you support here. In particular, ensure
         # that you indicate whether you support Python 2, Python 3 or both.
         'Programming Language :: Python :: 3 :: Only',
+
+        # Indicate that type hints are provided
+        'Typing :: Typed'
     ],
 
     # What does your project relate to?
@@ -151,7 +154,8 @@ def get_long_description():
     # dependencies). You can install these using the following syntax,
     # for example:
     # $ pip install -e .[dev,test]
-    extras_require={'ja': ['mecab-python3>=1.0.5,<=1.0.6', 'ipadic>=1.0,<2.0'],
+    extras_require={'dev': ['wheel', 'pytest', 'mypy', 'types-tabulate', 'lxml-stubs'],
+                    'ja': ['mecab-python3>=1.0.5,<=1.0.6', 'ipadic>=1.0,<2.0'],
                     'ko': ['mecab-ko>=1.0.0,<=1.0.1', 'mecab-ko-dic>=1.0,<2.0']},
 
     # To provide executable scripts, use entry points in preference to the
diff --git a/test.sh b/test.sh
index ee1657f6..1bb5720a 100755
--- a/test.sh
+++ b/test.sh
@@ -96,7 +96,7 @@ cd data
 
 if [[ ! -d wmt17-submitted-data ]]; then
    echo "Downloading and unpacking WMT'17 system submissions (46 MB)..."
-   wget -q http://data.statmt.org/wmt17/translation-task/wmt17-submitted-data-v1.0.tgz
+   wget -q https://data.statmt.org/wmt17/translation-task/wmt17-submitted-data-v1.0.tgz
    tar xzf wmt17-submitted-data-v1.0.tgz
 fi
 
diff --git a/test/test_api.py b/test/test_api.py
index 511b4c78..02ac9f03 100644
--- a/test/test_api.py
+++ b/test/test_api.py
@@ -44,7 +44,7 @@ def test_api_get_available_testsets():
     the test sets found.
     """
     available = get_available_testsets()
-    assert type(available) is list
+    assert isinstance(available, list)
     assert "wmt19" in available
     assert "wmt05" not in available
 
@@ -59,12 +59,12 @@ def test_api_get_available_testsets_for_langpair():
     the test sets found.
     """
     available = get_available_testsets_for_langpair('en-it')
-    assert type(available) is list
+    assert isinstance(available, list)
     assert "wmt09" in available
     assert "wmt15" not in available
 
     available = get_available_testsets_for_langpair('en-fr')
-    assert type(available) is list
+    assert isinstance(available, list)
     assert "wmt11" in available
     assert "mtedx/test" in available
     assert "wmt20" not in available
@@ -77,7 +77,7 @@ def test_api_get_langpairs_for_testset():
     """
     for testset in DATASETS.keys():
         available = get_langpairs_for_testset(testset)
-        assert type(available) is list
+        assert isinstance(available, list)
         for langpair in DATASETS[testset].langpairs.keys():
             # skip non-language keys
             if "-" not in langpair:
diff --git a/test/test_dataset.py b/test/test_dataset.py
index cff796dd..d3ece8cb 100644
--- a/test/test_dataset.py
+++ b/test/test_dataset.py
@@ -106,7 +106,7 @@ def test_wmt22_references():
 
     # and that ref:A is the default for all languages where it wasn't overridden
     for langpair, langpair_data in wmt22.langpairs.items():
-        if type(langpair_data) == dict:
+        if isinstance(langpair_data, dict):
             assert wmt22._get_langpair_allowed_refs(langpair) != ["ref:A"]
         else:
             assert wmt22._get_langpair_allowed_refs(langpair) == ["ref:A"]
diff --git a/test/test_significance.py b/test/test_significance.py
index 46679ac4..f7098328 100644
--- a/test/test_significance.py
+++ b/test/test_significance.py
@@ -1,9 +1,10 @@
 import os
 
 from collections import defaultdict
+from typing import DefaultDict
 
 from sacrebleu.metrics import BLEU
-from sacrebleu.significance import PairedTest
+from sacrebleu.significance import PairedTest, Result
 
 import pytest
 
@@ -57,8 +58,8 @@ def _read_pickle_file():
 }
 
 
-SACREBLEU_BS_P_VALS = defaultdict(float)
-SACREBLEU_AR_P_VALS = defaultdict(float)
+SACREBLEU_BS_P_VALS: DefaultDict[str, float] = defaultdict(float)
+SACREBLEU_AR_P_VALS: DefaultDict[str, float] = defaultdict(float)
 
 # Load data from pickled file to not bother with WMT17 downloading
 named_systems = _read_pickle_file()
@@ -75,7 +76,9 @@ def _read_pickle_file():
     test_type='bs', n_samples=2000)()[1]
 
 for name, result in zip(bs_scores['System'], bs_scores['BLEU']):
+    assert isinstance(result, Result)
     if result.p_value is not None:
+        assert isinstance(name, str)
         SACREBLEU_BS_P_VALS[name] += result.p_value
 
 
@@ -87,7 +90,9 @@ def _read_pickle_file():
                        test_type='ar', n_samples=10000)()[1]
 
 for name, result in zip(ar_scores['System'], ar_scores['BLEU']):
+    assert isinstance(result, Result)
     if result.p_value is not None:
+        assert isinstance(name, str)
         SACREBLEU_AR_P_VALS[name] += result.p_value