Skip to content

Commit

Permalink
Merge branch 'main' into hydra
Browse files Browse the repository at this point in the history
  • Loading branch information
wenting-zhao authored Sep 10, 2024
2 parents dbdd0e1 + bb3213c commit 0940d07
Show file tree
Hide file tree
Showing 14 changed files with 649 additions and 102 deletions.
14 changes: 14 additions & 0 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
name: pre-commit

on:
pull_request:
push:
branches: [main]

jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
- uses: pre-commit/action@v3.0.1
41 changes: 2 additions & 39 deletions commit0/__init__.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,3 @@
__version__ = "0.0.1"


from commit0.harness.docker_build import (
build_image,
build_base_images,
build_repo_images,
close_logger,
setup_logger,
)
"""Commit0 Lib"""

from commit0.harness.docker_utils import (
cleanup_container,
copy_to_container,
copy_from_container,
delete_file_from_container,
exec_run_with_timeout,
write_to_container,
create_container,
)

from commit0.harness.utils import (
extract_test_output,
)

__all__ = [
"build_image",
"build_base_images",
"build_repo_images",
"close_logger",
"setup_logger",
"cleanup_container",
"copy_to_container",
"copy_from_container",
"delete_file_from_container",
"exec_run_with_timeout",
"write_to_container",
"create_container",
"extract_test_output",
]
__version__ = "0.0.1"
30 changes: 30 additions & 0 deletions commit0/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import argparse
import commit0.harness.run_pytest_ids
import commit0.harness.build
import commit0.harness.setup


def main() -> None:
parser = argparse.ArgumentParser(description="Commit0 version control system")
subparsers = parser.add_subparsers(dest="command", help="Available commands")

commit0.harness.setup.add_init_args(subparsers.add_parser("clone"))
commit0.harness.build.add_init_args(subparsers.add_parser("build"))
commit0.harness.run_pytest_ids.add_init_args(subparsers.add_parser("test"))

args = parser.parse_args()

if args.command == "clone":
commit0.harness.setup.run(args)
elif args.command == "build":
commit0.harness.build.run(args)
elif args.command == "test":
commit0.harness.run_pytest_ids.run(args)
else:
parser.print_help()


if __name__ == "__main__":
main()

__all__ = []
62 changes: 62 additions & 0 deletions commit0/harness/build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import argparse
import logging

import docker
from datasets import load_dataset
from typing import Iterator
from commit0.harness.docker_build import build_repo_images
from commit0.harness.spec import make_spec

logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)


def main(
hf_name: str,
base_dir: str,
config_file: str,
) -> None:
dataset: Iterator[RepoInstance] = load_dataset(hf_name, split="test")
specs = []
for example in dataset:
spec = make_spec(example)
specs.append(spec)

client = docker.from_env()
build_repo_images(client, specs)
logger.info("Done building docker images")


def add_init_args(parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"--hf_name",
type=str,
help="HF dataset name",
default="wentingzhao/commit0_docstring",
)
parser.add_argument(
"--base_dir",
type=str,
default="repos/",
help="base directory to write repos to",
)
parser.add_argument(
"--config_file",
type=str,
default="config.yml",
help="where to write config file to",
)
parser.set_defaults(func=run)


def run(args: argparse.Namespace) -> None:
main(
hf_name=args.hf_name,
base_dir=args.base_dir,
config_file=args.config_file,
)


__all__ = []
1 change: 1 addition & 0 deletions commit0/harness/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ class RepoInstance(TypedDict):
base_commit: str
reference_commit: str
setup: dict
test: str


# Constants - Evaluation Log Directories
Expand Down
4 changes: 4 additions & 0 deletions commit0/harness/docker_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re
import traceback
import docker
import docker.errors
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
Expand Down Expand Up @@ -296,3 +297,6 @@ def build_repo_images(

# Return the list of (un)successfuly built images
return successful, failed


__all__ = []
61 changes: 45 additions & 16 deletions commit0/harness/docker_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from pathlib import Path
from io import BytesIO
from typing import Optional, List, Union
import docker.errors

from docker.models.containers import Container

Expand Down Expand Up @@ -107,7 +108,7 @@ def safe_extract(

tar.extractall(path, members, numeric_owner=numeric_owner)

safe_extract(tar, path=dst.parent)
safe_extract(tar, path=str(dst.parent))

# Move the extracted file to desired dst path if tar extraction gives src.name
extracted_file_path = dst.parent / src.name
Expand Down Expand Up @@ -192,7 +193,7 @@ def write_to_container(container: Container, data: str, dst: Path) -> None:
def cleanup_container(
client: docker.DockerClient,
container: docker.Container,
logger: Union[str, logging.Logger],
logger: Union[None, str, logging.Logger],
) -> None:
"""Stop and remove a Docker container.
Performs this forcefully if the container cannot be stopped with the python API.
Expand All @@ -211,8 +212,12 @@ def cleanup_container(

if not logger:
# if logger is None, print to stdout
log_error = print
log_info = print
def log_error(x: str) -> None:
print(x)

def log_info(x: str) -> None:
print(x)

raise_error = True
elif logger == "quiet":
# if logger is "quiet", don't print anything
Expand All @@ -224,9 +229,15 @@ def log_error(x: str) -> None:

raise_error = True
else:
assert isinstance(logger, logging.Logger)

# if logger is a logger object, use it
log_error = logger.info
log_info = logger.info
def log_error(x: str) -> None:
logger.info(x)

def log_info(x: str) -> None:
logger.info(x)

raise_error = False

# Attempt to stop the container
Expand Down Expand Up @@ -276,11 +287,11 @@ def log_error(x: str) -> None:
def create_container(
client: docker.DockerClient,
image_name: str,
container_name: str = None,
user: str = None,
command: str = None,
nano_cpus: int = None,
logger: Union[str, logging.Logger] = None,
container_name: Optional[str] = None,
user: Optional[str] = None,
command: Optional[str] = None,
nano_cpus: Optional[int] = None,
logger: Optional[Union[str, logging.Logger]] = None,
) -> Container:
"""Start a Docker container using the specified image.
Expand Down Expand Up @@ -312,19 +323,33 @@ def create_container(

if not logger:
# if logger is None, print to stdout
log_error = print
log_info = print
def log_error(x: str) -> None:
print(x)

def log_info(x: str) -> None:
print(x)

raise_error = True
elif logger == "quiet":
# if logger is "quiet", don't print anything
def log_info(x: str) -> None:
return None

def log_error(x: str) -> None:
return None

raise_error = True
else:
assert isinstance(logger, logging.Logger)

# if logger is a logger object, use it
log_error = logger.info
log_info = logger.info
def log_error(x: str) -> None:
logger.info(x)

def log_info(x: str) -> None:
logger.info(x)

raise_error = False

container = None
try:
Expand All @@ -349,7 +374,7 @@ def log_error(x: str) -> None:

def exec_run_with_timeout(
container: Container, cmd: str, timeout: Optional[int] = 60
) -> None:
) -> tuple[str, bool, float]:
"""Run a command in a container with a timeout.
Args:
Expand All @@ -369,6 +394,7 @@ def exec_run_with_timeout(
def run_command() -> None:
nonlocal exec_result, exec_id, exception
try:
assert container.client is not None, "Client did not load"
exec_id = container.client.api.exec_create(container.id, cmd)["Id"]
exec_stream = container.client.api.exec_start(exec_id, stream=True)
for chunk in exec_stream:
Expand All @@ -393,3 +419,6 @@ def run_command() -> None:
timed_out = True
end_time = time.time()
return exec_result, timed_out, end_time - start_time


__all__ = []
7 changes: 5 additions & 2 deletions commit0/harness/dockerfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,12 @@
"""


def get_dockerfile_base(platform):
def get_dockerfile_base(platform: str) -> str:
return _DOCKERFILE_BASE.format(platform=platform)


def get_dockerfile_repo(platform):
def get_dockerfile_repo(platform: str) -> str:
return _DOCKERFILE_REPO.format(platform=platform)


__all__ = []
Loading

0 comments on commit 0940d07

Please sign in to comment.