commit-0 · wenting-zhao · Sep 10, 2024 · Sep 10, 2024 · Sep 11, 2024 · Sep 11, 2024
diff --git a/commit0/__main__.py b/commit0/__main__.py
@@ -1,18 +1,47 @@
 import commit0.harness.run_pytest_ids
 import commit0.harness.build
 import commit0.harness.setup
+import copy
 import sys
+import hydra
+from hydra.core.config_store import ConfigStore
+from commit0.configs.config_class import Commit0Config
 
 
 def main() -> None:
     command = sys.argv[1]
+    # type check config values
+    cs = ConfigStore.instance()
+    cs.store(name="base", node=Commit0Config)
+    # have hydra to ignore all command-line arguments
+    sys_argv = copy.deepcopy(sys.argv)
+    sys.argv = [sys.argv[0]]
+    hydra.initialize(version_base=None, config_path="configs")
+    config = hydra.compose(config_name="base")
+    # after hydra gets all configs, put command-line arguments back
+    sys.argv = sys_argv
 
     if command == "clone":
-        commit0.harness.setup.main()
+        commit0.harness.setup.main(
+            config.dataset_name, config.dataset_split, config.base_dir
+        )
     elif command == "build":
-        commit0.harness.build.main()
+        commit0.harness.build.main(
+            config.dataset_name, config.dataset_split, config.num_workers
+        )
     elif command == "test":
-        commit0.harness.run_pytest_ids.main()
+        repo = sys.argv[2]
+        test_ids = sys.argv[3]
+        commit0.harness.run_pytest_ids.main(
+            config.dataset_name,
+            config.dataset_split,
+            config.base_dir,
+            repo,
+            config.branch,
+            test_ids,
+            config.backend,
+            config.timeout,
+        )
 
 
 if __name__ == "__main__":

diff --git a/commit0/configs/base.yaml b/commit0/configs/base.yaml
@@ -0,0 +1,18 @@
+defaults:
+  - _self_
+
+# shared in all steps
+dataset_name: wentingzhao/commit0_docstring
+dataset_split: test
+
+# clone related
+base_dir: repos/
+
+# build related
+build: all
+num_workers: 8
+
+# test related
+backend: local
+branch: ai
+timeout: 1_800
diff --git a/commit0/configs/config_class.py b/commit0/configs/config_class.py
@@ -0,0 +1,23 @@
+from dataclasses import dataclass
+
+
+@dataclass
+class Commit0Config:
+    # shared in all steps
+    dataset_name: str
+    dataset_split: str
+
+    # clone related
+    base_dir: str
+
+    # build related
+    # which repo to build, all or one repo
+    build: str
+    num_workers: int
+
+    # test related
+    backend: str
+    # which branch to work on
+    branch: str
+    # timeout for running pytest
+    timeout: int
diff --git a/commit0/harness/build.py b/commit0/harness/build.py
@@ -4,28 +4,25 @@
 from datasets import load_dataset
 from typing import Iterator
 
-from omegaconf import DictConfig
 from commit0.harness.docker_build import build_repo_images
 from commit0.harness.spec import make_spec
 from commit0.harness.constants import RepoInstance
-import hydra
 
 logging.basicConfig(
     level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 )
 logger = logging.getLogger(__name__)
 
 
-@hydra.main(version_base=None, config_path="configs", config_name="base")
-def main(config: DictConfig) -> None:
-    dataset: Iterator[RepoInstance] = load_dataset(hf_name, split="test")  # type: ignore
+def main(dataset_name: str, dataset_split: str, num_workers: int) -> None:
+    dataset: Iterator[RepoInstance] = load_dataset(dataset_name, split=dataset_split)  # type: ignore
     specs = []
     for example in dataset:
         spec = make_spec(example)
         specs.append(spec)
 
     client = docker.from_env()
-    build_repo_images(client, specs)
+    build_repo_images(client, specs, num_workers)
     logger.info("Done building docker images")
 
 

diff --git a/commit0/harness/constants.py b/commit0/harness/constants.py
@@ -1,14 +1,14 @@
 from enum import Enum
 from pathlib import Path
-from typing import TypedDict
+from typing import Dict, TypedDict
 
 
 class RepoInstance(TypedDict):
     repo: str
     base_commit: str
     reference_commit: str
     setup: dict
-    test: str
+    test: Dict[str, str]
 
 
 # Constants - Evaluation Log Directories

diff --git a/commit0/harness/docker_build.py b/commit0/harness/docker_build.py
@@ -119,24 +119,14 @@ def build_image(
         )
 
         # Log the build process continuously
-        buildlog = ""
         for chunk in response:
             if "stream" in chunk:
                 # Remove ANSI escape sequences from the log
                 chunk_stream = ansi_escape.sub("", chunk["stream"])
                 logger.info(chunk_stream.strip())
-                buildlog += chunk_stream
-            elif "errorDetail" in chunk:
-                # Decode error message, raise BuildError
-                logger.error(
-                    f"Error: {ansi_escape.sub('', chunk['errorDetail']['message'])}"
-                )
-                raise docker.errors.BuildError(
-                    chunk["errorDetail"]["message"], buildlog
-                )
         logger.info("Image built successfully!")
-    except docker.errors.BuildError as e:
-        logger.error(f"docker.errors.BuildError during {image_name}: {e}")
+    except docker.errors.APIError as e:
+        logger.error(f"docker.errors.APIError during {image_name}: {e}")
         raise BuildImageError(image_name, str(e), logger) from e
     except Exception as e:
         logger.error(f"Error building image {image_name}: {e}")

diff --git a/commit0/harness/docker_utils.py b/commit0/harness/docker_utils.py
@@ -11,8 +11,8 @@
 from pathlib import Path
 from io import BytesIO
 from typing import Optional, List, Union
-import docker.errors
 
+import docker.errors
 from docker.models.containers import Container
 
 HEREDOC_DELIMITER = "EOF_1399519320"  # different from dataset HEREDOC_DELIMITERs!
@@ -330,7 +330,6 @@ def log_error(x: str) -> None:
         def log_info(x: str) -> None:
             print(x)
 
-        raise_error = True
     elif logger == "quiet":
         # if logger is "quiet", don't print anything
         def log_info(x: str) -> None:
@@ -386,34 +385,31 @@ def exec_run_with_timeout(
     # Local variables to store the result of executing the command
     exec_result = ""
     exec_id = None
-    exception = None
     timed_out = False
 
     # Wrapper function to run the command
     def run_command() -> None:
-        nonlocal exec_result, exec_id, exception
+        nonlocal exec_result, exec_id
         try:
-            assert container.client is not None, "Client did not load"
-            exec_id = container.client.api.exec_create(container.id, cmd)["Id"]
-            exec_stream = container.client.api.exec_start(exec_id, stream=True)
+            exec_id = container.client.api.exec_create(container=container.id, cmd=cmd)[  # pyright: ignore
+                "Id"
+            ]
+            exec_stream = container.client.api.exec_start(exec_id=exec_id, stream=True)  # pyright: ignore
             for chunk in exec_stream:
                 exec_result += chunk.decode("utf-8", errors="replace")
-        except Exception as e:
-            exception = e
+        except docker.errors.APIError as e:
+            raise Exception(f"Container {container.id} cannot execute {cmd}.\n{str(e)}")
 
     # Start the command in a separate thread
     thread = threading.Thread(target=run_command)
     start_time = time.time()
     thread.start()
     thread.join(timeout)
 
-    if exception:
-        raise exception
-
     # If the thread is still alive, the command timed out
     if thread.is_alive():
         if exec_id is not None:
-            exec_pid = container.client.api.exec_inspect(exec_id)["Pid"]
+            exec_pid = container.client.api.exec_inspect(exec_id=exec_id)["Pid"]  # pyright: ignore
             container.exec_run(f"kill -TERM {exec_pid}", detach=True)
         timed_out = True
     end_time = time.time()

diff --git a/commit0/harness/run_pytest_ids.py b/commit0/harness/run_pytest_ids.py
@@ -6,10 +6,8 @@
 from pathlib import Path
 import logging
 
-from omegaconf import DictConfig, OmegaConf
-import hydra
-
-from commit0.harness.constants import RUN_PYTEST_LOG_DIR
+from typing import Iterator
+from commit0.harness.constants import RUN_PYTEST_LOG_DIR, RepoInstance
 from commit0.harness.docker_build import (
     close_logger,
     setup_logger,
@@ -196,39 +194,46 @@ def run_modal(
                 )
 
 
-@hydra.main(version_base=None, config_path="configs", config_name="base")
-def main(config: DictConfig) -> None:
-    OmegaConf.to_yaml(config)
-    dataset = load_dataset(config.dataset_name, split="test")
+def main(
+    dataset_name: str,
+    dataset_split: str,
+    base_dir: str,
+    repo: str,
+    branch: str,
+    test_ids: str,
+    backend: str,
+    timeout: int,
+) -> None:
+    dataset: Iterator[RepoInstance] = load_dataset(dataset_name, split=dataset_split)  # type: ignore
     spec = None
     for example in dataset:
-        if example["repo"].endswith(config.repo):
+        if example["repo"].endswith(repo):
             spec = make_spec(example)
             break
     assert spec is not None, "No spec available"
 
-    hashed_test_ids = get_hash_string(config.test_ids)
+    hashed_test_ids = get_hash_string(test_ids)
     # set up logging
-    log_dir = RUN_PYTEST_LOG_DIR / config.repo / hashed_test_ids
+    log_dir = RUN_PYTEST_LOG_DIR / repo / hashed_test_ids
     log_dir.mkdir(parents=True, exist_ok=True)
     log_file = log_dir / "run_pytest.log"
-    logger = setup_logger(config.repo, log_file)
+    logger = setup_logger(repo, log_file)
 
     # make eval file
     eval_script = spec.eval_script.format(
-        local_repo=f"{config.base_dir}/{config.repo}",
-        branch_name=config.branch,
-        test_ids=config.test_ids,
-        ip=get_ip(config.backend),
+        local_repo=f"{base_dir}/{repo}",
+        branch_name=branch,
+        test_ids=test_ids,
+        ip=get_ip(backend),
         user=get_user(),
     )
     eval_file = Path(log_dir / "eval.sh")
     eval_file.write_text(eval_script)
 
-    if ExecutionBackend(config.backend) == ExecutionBackend.LOCAL:
-        run_docker(spec, logger, eval_file, config.timeout, log_dir)
-    elif ExecutionBackend(config.backend) == ExecutionBackend.MODAL:
-        run_modal(spec, logger, eval_file, config.timeout, log_dir)
+    if ExecutionBackend(backend) == ExecutionBackend.LOCAL:
+        run_docker(spec, logger, eval_file, timeout, log_dir)
+    elif ExecutionBackend(backend) == ExecutionBackend.MODAL:
+        run_modal(spec, logger, eval_file, timeout, log_dir)
 
 
 __all__ = []
diff --git a/commit0/harness/setup.py b/commit0/harness/setup.py
@@ -2,9 +2,7 @@
 import os
 
 import docker
-import hydra
 from datasets import load_dataset
-from omegaconf import DictConfig
 
 from typing import Iterator
 from commit0.harness.utils import clone_repo
@@ -19,9 +17,8 @@
 logger = logging.getLogger(__name__)
 
 
-@hydra.main(version_base=None, config_path="configs", config_name="base")
-def main(config: DictConfig) -> None:
-    dataset: Iterator[RepoInstance] = load_dataset(hf_name, split="test")  # type: ignore
+def main(dataset_name: str, dataset_split: str, base_dir: str) -> None:
+    dataset: Iterator[RepoInstance] = load_dataset(dataset_name, split=dataset_split)  # type: ignore
     out = dict()
     specs = []
     for example in dataset: