oushu1zhangxiangxuan1 · oushu1zhangxiangxuan1 · Mar 20, 2023 · Feb 23, 2023 · Feb 23, 2023 · Feb 23, 2023
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -9,11 +9,24 @@ parameters:
         default: false
 
 jobs:
+    # Ensure running with CircleCI/huggingface
+    check_circleci_user:
+        docker:
+            - image: cimg/python:3.8.12
+        parallelism: 1
+        steps:
+            - run: echo $CIRCLE_PROJECT_USERNAME
+            - run: |
+                if [ "$CIRCLE_PROJECT_USERNAME" = "huggingface" ]; then
+                    exit 0
+                else
+                    echo "The CI is running under $CIRCLE_PROJECT_USERNAME personal account. Please follow https://support.circleci.com/hc/en-us/articles/360008097173-Troubleshooting-why-pull-requests-are-not-triggering-jobs-on-my-organization- to fix it."; exit -1
+                fi
     # Fetch the tests to run
     fetch_tests:
         working_directory: ~/transformers
         docker:
-            - image: cimg/python:3.7.12
+            - image: cimg/python:3.8.12
         parallelism: 1
         steps:
             - checkout
@@ -72,7 +85,7 @@ jobs:
     fetch_all_tests:
         working_directory: ~/transformers
         docker:
-            - image: cimg/python:3.7.12
+            - image: cimg/python:3.8.12
         parallelism: 1
         steps:
             - checkout
@@ -98,7 +111,7 @@ jobs:
     check_code_quality:
         working_directory: ~/transformers
         docker:
-            - image: cimg/python:3.7.12
+            - image: cimg/python:3.8.12
         resource_class: large
         environment:
             TRANSFORMERS_IS_CI: yes
@@ -108,8 +121,8 @@ jobs:
             - checkout
             - restore_cache:
                   keys:
-                      - v0.5-code_quality-{{ checksum "setup.py" }}
-                      - v0.5-code-quality
+                      - v0.6-code_quality-{{ checksum "setup.py" }}
+                      - v0.6-code-quality
             - run: pip install --upgrade pip
             - run: pip install .[all,quality]
             - save_cache:
@@ -121,18 +134,17 @@ jobs:
                 command: pip freeze | tee installed.txt
             - store_artifacts:
                   path: ~/transformers/installed.txt
-            - run: black --check --preview examples tests src utils
-            - run: isort --check-only examples tests src utils
+            - run: black --check examples tests src utils
+            - run: ruff examples tests src utils
             - run: python utils/custom_init_isort.py --check_only
             - run: python utils/sort_auto_mappings.py --check_only
-            - run: flake8 examples tests src utils
             - run: doc-builder style src/transformers docs/source --max_len 119 --check_only --path_to_docs docs/source
             - run: python utils/check_doc_toc.py
 
     check_repository_consistency:
         working_directory: ~/transformers
         docker:
-            - image: cimg/python:3.7.12
+            - image: cimg/python:3.8.12
         resource_class: large
         environment:
             TRANSFORMERS_IS_CI: yes
@@ -142,8 +154,8 @@ jobs:
             - checkout
             - restore_cache:
                   keys:
-                      - v0.5-repository_consistency-{{ checksum "setup.py" }}
-                      - v0.5-repository_consistency
+                      - v0.6-repository_consistency-{{ checksum "setup.py" }}
+                      - v0.6-repository_consistency
             - run: pip install --upgrade pip
             - run: pip install .[all,quality]
             - save_cache:
@@ -161,23 +173,28 @@ jobs:
             - run: python utils/check_repo.py
             - run: python utils/check_inits.py
             - run: python utils/check_config_docstrings.py
+            - run: python utils/check_config_attributes.py
+            - run: python utils/check_doctest_list.py
             - run: make deps_table_check_updated
             - run: python utils/tests_fetcher.py --sanity_check
             - run: python utils/update_metadata.py --check-only
+            - run: python utils/check_task_guides.py
 
 workflows:
     version: 2
     setup_and_quality:
         when:
             not: <<pipeline.parameters.nightly>>
         jobs:
+            - check_circleci_user
             - check_code_quality
             - check_repository_consistency
             - fetch_tests
 
     nightly:
         when: <<pipeline.parameters.nightly>>
         jobs:
+            - check_circleci_user
             - check_code_quality
             - check_repository_consistency
             - fetch_all_tests
diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py
@@ -15,24 +15,33 @@
 
 import argparse
 import copy
+import glob
 import os
+import random
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional
 
 import yaml
 
 
-COMMON_ENV_VARIABLES = {"OMP_NUM_THREADS": 1, "TRANSFORMERS_IS_CI": True, "PYTEST_TIMEOUT": 120}
+COMMON_ENV_VARIABLES = {
+    "OMP_NUM_THREADS": 1,
+    "TRANSFORMERS_IS_CI": True,
+    "PYTEST_TIMEOUT": 120,
+    "RUN_PIPELINE_TESTS": False,
+    "RUN_PT_TF_CROSS_TESTS": False,
+    "RUN_PT_FLAX_CROSS_TESTS": False,
+}
 COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "s": None}
-DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.7.12"}]
+DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}]
 
 
 @dataclass
 class CircleCIJob:
     name: str
     additional_env: Dict[str, Any] = None
     cache_name: str = None
-    cache_version: str = "0.5"
+    cache_version: str = "0.6"
     docker_image: List[Dict[str, str]] = None
     install_steps: List[str] = None
     marker: Optional[str] = None
@@ -58,12 +67,16 @@ def __post_init__(self):
             self.pytest_options = {}
         if isinstance(self.tests_to_run, str):
             self.tests_to_run = [self.tests_to_run]
+        if self.parallelism is None:
+            self.parallelism = 1
 
     def to_dict(self):
+        env = COMMON_ENV_VARIABLES.copy()
+        env.update(self.additional_env)
         job = {
             "working_directory": self.working_directory,
             "docker": self.docker_image,
-            "environment": {**COMMON_ENV_VARIABLES, **self.additional_env},
+            "environment": env,
         }
         if self.resource_class is not None:
             job["resource_class"] = self.resource_class
@@ -99,10 +112,57 @@ def to_dict(self):
             f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}"
         )
         test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
-        if self.tests_to_run is None:
-            test_command += " << pipeline.parameters.tests_to_run >>"
+        if self.parallelism == 1:
+            if self.tests_to_run is None:
+                test_command += " << pipeline.parameters.tests_to_run >>"
+            else:
+                test_command += " " + " ".join(self.tests_to_run)
         else:
-            test_command += " " + " ".join(self.tests_to_run)
+            # We need explicit list instead of `pipeline.parameters.tests_to_run` (only available at job runtime)
+            tests = self.tests_to_run
+            if tests is None:
+                folder = os.environ["test_preparation_dir"]
+                test_file = os.path.join(folder, "filtered_test_list.txt")
+                if os.path.exists(test_file):
+                    with open(test_file) as f:
+                        tests = f.read().split(" ")
+
+            # expand the test list
+            if tests == ["tests"]:
+                tests = [os.path.join("tests", x) for x in os.listdir("tests")]
+            expanded_tests = []
+            for test in tests:
+                if test.endswith(".py"):
+                    expanded_tests.append(test)
+                elif test == "tests/models":
+                    expanded_tests.extend([os.path.join(test, x) for x in os.listdir(test)])
+                elif test == "tests/pipelines":
+                    expanded_tests.extend([os.path.join(test, x) for x in os.listdir(test)])
+                else:
+                    expanded_tests.append(test)
+            # Avoid long tests always being collected together
+            random.shuffle(expanded_tests)
+            tests = " ".join(expanded_tests)
+
+            # Each executor to run ~10 tests
+            n_executors = max(len(tests) // 10, 1)
+            # Avoid empty test list on some executor(s) or launching too many executors
+            if n_executors > self.parallelism:
+                n_executors = self.parallelism
+            job["parallelism"] = n_executors
+
+            # Need to be newline separated for the command `circleci tests split` below
+            command = f'echo {tests} | tr " " "\\n" >> tests.txt'
+            steps.append({"run": {"name": "Get tests", "command": command}})
+
+            command = 'TESTS=$(circleci tests split tests.txt) && echo $TESTS > splitted_tests.txt'
+            steps.append({"run": {"name": "Split tests", "command": command}})
+
+            steps.append({"store_artifacts": {"path": "~/transformers/tests.txt"}})
+            steps.append({"store_artifacts": {"path": "~/transformers/splitted_tests.txt"}})
+
+            test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
+            test_command += " $(cat splitted_tests.txt)"
         if self.marker is not None:
             test_command += f" -m {self.marker}"
         test_command += " | tee tests_output.txt"
@@ -156,6 +216,7 @@ def job_name(self):
         "pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
         "pip install git+https://github.com/huggingface/accelerate",
     ],
+    parallelism=1,
     pytest_num_workers=3,
 )
 
@@ -168,6 +229,7 @@ def job_name(self):
         "pip install .[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]",
         "pip install tensorflow_probability",
     ],
+    parallelism=1,
     pytest_options={"rA": None},
 )
 
@@ -179,31 +241,34 @@ def job_name(self):
         "pip install --upgrade pip",
         "pip install .[flax,testing,sentencepiece,flax-speech,vision]",
     ],
+    parallelism=1,
     pytest_options={"rA": None},
 )
 
 
 pipelines_torch_job = CircleCIJob(
     "pipelines_torch",
+    additional_env={"RUN_PIPELINE_TESTS": True},
     install_steps=[
         "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng",
         "pip install --upgrade pip",
-        "pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
+        "pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm,video]",
     ],
     pytest_options={"rA": None},
-    tests_to_run="tests/pipelines/"
+    marker="is_pipeline_test",
 )
 
 
 pipelines_tf_job = CircleCIJob(
     "pipelines_tf",
+    additional_env={"RUN_PIPELINE_TESTS": True},
     install_steps=[
         "pip install --upgrade pip",
-        "pip install .[sklearn,tf-cpu,testing,sentencepiece]",
+        "pip install .[sklearn,tf-cpu,testing,sentencepiece,vision]",
         "pip install tensorflow_probability",
     ],
     pytest_options={"rA": None},
-    tests_to_run="tests/pipelines/"
+    marker="is_pipeline_test",
 )
 
 
@@ -298,13 +363,14 @@ def job_name(self):
 )
 
 
-layoutlm_job = CircleCIJob(
-    "layoutlmv2_and_v3",
+exotic_models_job = CircleCIJob(
+    "exotic_models",
     install_steps=[
         "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev",
         "pip install --upgrade pip",
         "pip install .[torch,testing,vision]",
         "pip install torchvision",
+        "pip install scipy",
         "pip install 'git+https://github.com/facebookresearch/detectron2.git'",
         "sudo apt install tesseract-ocr",
         "pip install pytesseract",
@@ -313,6 +379,7 @@ def job_name(self):
     tests_to_run=[
         "tests/models/*layoutlmv*",
         "tests/models/*nat",
+        "tests/models/deta",
     ],
     pytest_num_workers=1,
     pytest_options={"durations": 100},
@@ -323,11 +390,11 @@ def job_name(self):
     "repo_utils",
     install_steps=[
         "pip install --upgrade pip",
-        "pip install .[quality,testing]",
+        "pip install .[quality,testing,torch]",
     ],
     parallelism=None,
     pytest_num_workers=1,
-    resource_class=None,
+    resource_class="large",
     tests_to_run="tests/repo_utils",
 )
 
@@ -340,7 +407,7 @@ def job_name(self):
     custom_tokenizers_job,
     hub_job,
     onnx_job,
-    layoutlm_job,
+    exotic_models_job,
 ]
 EXAMPLES_TESTS = [
     examples_torch_job,
@@ -356,6 +423,8 @@ def job_name(self):
 def create_circleci_config(folder=None):
     if folder is None:
         folder = os.getcwd()
+    # Used in CircleCIJob.to_dict() to expand the test list (for using parallelism)
+    os.environ["test_preparation_dir"] = folder
     jobs = []
     all_test_file = os.path.join(folder, "test_list.txt")
     if os.path.exists(all_test_file):
@@ -378,7 +447,7 @@ def create_circleci_config(folder=None):
     example_file = os.path.join(folder, "examples_test_list.txt")
     if os.path.exists(example_file) and os.path.getsize(example_file) > 0:
         jobs.extend(EXAMPLES_TESTS)
-    
+
     repo_util_file = os.path.join(folder, "test_repo_utils.txt")
     if os.path.exists(repo_util_file) and os.path.getsize(repo_util_file) > 0:
         jobs.extend(REPO_UTIL_TESTS)