Skip to content

Commit

Permalink
OmegaConf integration
Browse files Browse the repository at this point in the history
  • Loading branch information
wenting-zhao committed Sep 10, 2024
1 parent 3ed0d9a commit dbdd0e1
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 89 deletions.
70 changes: 25 additions & 45 deletions commit0/harness/run_pytest_ids.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
from datasets import load_dataset
import docker
from enum import StrEnum, auto
import modal
Expand All @@ -7,6 +8,9 @@
import yaml
from pathlib import Path

from omegaconf import DictConfig, OmegaConf
import hydra

from commit0.harness.constants import RUN_PYTEST_LOG_DIR
from commit0.harness.docker_build import (
close_logger,
Expand All @@ -32,7 +36,7 @@


class ExecutionBackend(StrEnum):
DOCKER = auto()
LOCAL = auto()
MODAL = auto()


Expand Down Expand Up @@ -188,62 +192,38 @@ def run_modal(spec, logger, eval_file, timeout, log_dir):
)


def main(
repo: str,
test_ids: list[str],
timeout: int,
branch_name: str,
backend: ExecutionBackend,
) -> None:
with open("config.yml", "r") as file:
data = yaml.safe_load(file)
spec = make_spec(data["repos"][repo])
test_ids = " ".join(test_ids)
hashed_test_ids = get_hash_string(test_ids)
@hydra.main(version_base=None, config_path="configs", config_name="base")
def main(config: DictConfig) -> None:
OmegaConf.to_yaml(config)
dataset = load_dataset(config.dataset_name, split="test")
for example in dataset:
if example["repo"].endswith(config.repo):
spec = make_spec(example)
break

hashed_test_ids = get_hash_string(config.test_ids)
# set up logging
log_dir = RUN_PYTEST_LOG_DIR / repo / hashed_test_ids
log_dir = RUN_PYTEST_LOG_DIR / config.repo / hashed_test_ids
log_dir.mkdir(parents=True, exist_ok=True)
log_file = log_dir / "run_pytest.log"
logger = setup_logger(repo, log_file)
logger = setup_logger(config.repo, log_file)

# make eval file
eval_script = spec.eval_script.format(
local_repo=f"{data['base_repo_dir']}/{repo}",
branch_name=branch_name,
test_ids=test_ids,
ip=get_ip(data["backend"]),
local_repo=f"{config.base_dir}/{config.repo}",
branch_name=config.branch,
test_ids=config.test_ids,
ip=get_ip(config.backend),
user=get_user(),
)
eval_file = Path(log_dir / "eval.sh")
eval_file.write_text(eval_script)

if ExecutionBackend(backend) == ExecutionBackend.DOCKER:
run_docker(spec, logger, eval_file, timeout, log_dir)
elif ExecutionBackend(backend) == ExecutionBackend.MODAL:
run_modal(spec, logger, eval_file, timeout, log_dir)
if ExecutionBackend(config.backend) == ExecutionBackend.LOCAL:
run_docker(spec, logger, eval_file, config.timeout, log_dir)
elif ExecutionBackend(config.backend) == ExecutionBackend.MODAL:
run_modal(spec, logger, eval_file, config.timeout, log_dir)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--repo", type=str, help="which repo to run unit tests")
parser.add_argument(
"--test_ids", type=str, nargs="+", help="which test ids / files / directories"
)
parser.add_argument(
"--branch_name", type=str, help="which git branch to run unit tests"
)
parser.add_argument(
"--timeout",
type=int,
default=1_800,
help="Timeout (in seconds) for running tests for each instance",
)
parser.add_argument(
"--backend",
choices=[backend.value for backend in ExecutionBackend],
default=ExecutionBackend.DOCKER.value,
help="Execution backend [docker, modal]",
)
args = parser.parse_args()
main(**vars(args))
main()
58 changes: 14 additions & 44 deletions commit0/harness/setup.py
Original file line number Diff line number Diff line change
@@ -1,80 +1,50 @@
import argparse
import logging
import os

import docker
import yaml
from datasets import load_dataset

from omegaconf import DictConfig, OmegaConf
import hydra

from commit0.harness.utils import clone_repo, create_branch
from commit0.harness.constants import REPO_IMAGE_BUILD_DIR
from commit0.harness.docker_build import build_repo_images
from commit0.harness.spec import make_spec


logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)


def main(hf_name: str, base_dir: str, config_file: str, backend: str, repo: str) -> None:
dataset = load_dataset(hf_name, split="test")
@hydra.main(version_base=None, config_path="configs", config_name="base")
def main(config: DictConfig) -> None:
OmegaConf.to_yaml(config)
dataset = load_dataset(config.dataset_name, split="test")
out = dict()
out["backend"] = backend
out["base_repo_dir"] = base_dir
out["repos"] = dict()
out["backend"] = config.backend
out["base_repo_dir"] = config.base_dir
specs = []
for example in dataset:
repo_name = example["repo"].split("/")[-1]
if repo != "all" and repo_name != repo:
if config.build != "all" and repo_name != repo:
logger.info(f"Skipping {repo_name}")
continue
spec = make_spec(example)
specs.append(spec)
out["repos"][repo_name] = example
clone_url = f"https://github.com/{example['repo']}.git"
clone_dir = os.path.join(out["base_repo_dir"], repo_name)
repo = clone_repo(clone_url, clone_dir, example["base_commit"], logger)
create_branch(repo, "aider", logger)
create_branch(repo, config.branch, logger)

config_file = os.path.abspath(config_file)
with open(config_file, "w") as f:
yaml.dump(out, f, default_flow_style=False)
logger.info(f"Config file has been written to {config_file}")
logger.info("Start building docker images")
logger.info(f"Please check {REPO_IMAGE_BUILD_DIR} for build details")
client = docker.from_env()
build_repo_images(client, specs)
logger.info("Done building docker images")


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--hf_name", type=str, help="HF dataset name")
parser.add_argument(
"--base_dir",
type=str,
default="repos/",
help="base directory to write repos to",
)
parser.add_argument(
"--config_file",
type=str,
default="config.yml",
help="where to write config file to",
)
parser.add_argument(
"--backend",
type=str,
choices=["local", "modal"],
default="modal",
help="specify evaluation backend to be local or modal (remote)",
)
parser.add_argument(
"--repo",
type=str,
default="all",
help="which repos to setup. all or one from dataset",
)
args = parser.parse_args()
main(**vars(args))
if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ dependencies = [
"wget",
"ruff>=0.6.4",
"pre-commit>=3.8.0",
"hydra-core>=1.3.2",
]

[tool.pyright]
Expand Down
35 changes: 35 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit dbdd0e1

Please sign in to comment.