diff --git a/.github/workflows/system.yml b/.github/workflows/system.yml
index fc1d212..0a78ee7 100644
--- a/.github/workflows/system.yml
+++ b/.github/workflows/system.yml
@@ -19,15 +19,15 @@ jobs:
       - name: Install the project
         run: uv sync
       - name: Set up commit0
-        run: uv run commit0 clone simpy
+        run: uv run commit0 setup simpy
       - name: Build docker images
         run: uv run commit0 build simpy
       - name: Get tests
         run: uv run commit0 get-tests simpy
       - name: Test
-        run: uv run commit0 test-reference simpy tests/test_event.py::test_succeed
+        run: uv run commit0 test simpy tests/test_event.py::test_succeed --reference
       - name: Evaluate
-        run: uv run commit0 evaluate-reference simpy
+        run: uv run commit0 evaluate simpy --reference
       - name: Lint
         run: uv run commit0 lint commit0/harness/lint.py
       - name: Save
diff --git a/commit0/__main__.py b/commit0/__main__.py
index c58f74e..9122a45 100644
--- a/commit0/__main__.py
+++ b/commit0/__main__.py
@@ -1,171 +1,10 @@
-import commit0.harness.run_pytest_ids
-import commit0.harness.get_pytest_ids
-import commit0.harness.build
-import commit0.harness.setup
-import commit0.harness.evaluate
-import commit0.harness.lint
-import commit0.harness.save
-import copy
-import sys
-import os
-import hydra
-from hydra.core.config_store import ConfigStore
-from commit0.configs.config_class import Commit0Config
-from commit0.harness.constants import COMMANDS, SPLIT
-from omegaconf import OmegaConf
+from commit0.cli import app as commit0_app
 
 
 def main() -> None:
-    command = sys.argv[1]
-    if command not in COMMANDS:
-        raise ValueError(
-            f"command must be from {', '.join(COMMANDS)}, but you provided {command}"
-        )
-    # type check config values
-    cs = ConfigStore.instance()
-    cs.store(name="user", group="Commit0Config", node=Commit0Config)
-    # have hydra to ignore all command-line arguments
-    sys_argv = copy.deepcopy(sys.argv)
-    cfg_arg = next((arg for arg in sys_argv if arg.startswith("--cfg=")), None)
-
-    hydra.initialize(version_base=None, config_path="configs")
-    config = hydra.compose(config_name="user")
-
-    if cfg_arg:
-        sys_argv.remove(cfg_arg)
-        config_name = cfg_arg.split("=")[1]
-        user_config = OmegaConf.load(config_name)
-        config = OmegaConf.merge(config, user_config)
-
-    # after hydra gets all configs, put command-line arguments back
-    sys.argv = sys_argv
-    # repo_split: split from command line has a higher priority than split in hydra
-    if command in [
-        "clone",
-        "build",
-        "evaluate",
-        "evaluate-reference",
-        "save",
-    ]:
-        if len(sys.argv) >= 3:
-            if sys.argv[2] not in SPLIT:
-                raise ValueError(
-                    f"repo split must be from {', '.join(SPLIT.keys())}, but you provided {sys.argv[2]}"
-                )
-            config.repo_split = sys.argv[2]
-    config.base_dir = os.path.abspath(config.base_dir)
-
-    if command == "clone":
-        if len(sys.argv) != 3:
-            raise ValueError(
-                "You provided an incorrect number of arguments.\nUsage: commit0 clone {repo_split}"
-            )
-        commit0.harness.setup.main(
-            config.dataset_name,
-            config.dataset_split,
-            config.repo_split,
-            config.base_dir,
-        )
-    elif command == "build":
-        if len(sys.argv) != 3:
-            raise ValueError(
-                "You provided an incorrect number of arguments.\nUsage: commit0 build {repo_split}"
-            )
-        commit0.harness.build.main(
-            config.dataset_name,
-            config.dataset_split,
-            config.repo_split,
-            config.num_workers,
-            config.backend,
-        )
-    elif command == "get-tests":
-        if len(sys.argv) != 3:
-            raise ValueError(
-                "You provided an incorrect number of arguments.\nUsage: commit0 get-tests {repo_name}"
-            )
-        repo = sys.argv[2]
-        commit0.harness.get_pytest_ids.main(repo, stdout=True)
-    elif command == "test" or command == "test-reference":
-        # this command assume execution in arbitrary working directory
-        repo_or_repo_path = sys.argv[2]
-        if command == "test-reference":
-            if len(sys.argv) != 4:
-                raise ValueError(
-                    "You provided an incorrect number of arguments.\nUsage: commit0 test-reference {repo_dir} {test_ids}"
-                )
-            branch = "reference"
-            test_ids = sys.argv[3]
-        else:
-            if len(sys.argv) != 5:
-                raise ValueError(
-                    "You provided an incorrect number of arguments.\nUsage: commit0 test {repo_dir} {branch} {test_ids}"
-                )
-            branch = sys.argv[3]
-            test_ids = sys.argv[4]
-        if branch.startswith("branch="):
-            branch = branch[len("branch=") :]
-        commit0.harness.run_pytest_ids.main(
-            config.dataset_name,
-            config.dataset_split,
-            config.base_dir,
-            repo_or_repo_path,
-            branch,
-            test_ids,
-            config.backend,
-            config.timeout,
-            config.num_cpus,
-            stdout=True,
-        )
-    elif command == "evaluate" or command == "evaluate-reference":
-        if command == "evaluate-reference":
-            if len(sys.argv) != 3:
-                raise ValueError(
-                    "You provided an incorrect number of arguments.\nUsage: commit0 evaluate-reference {repo_split}"
-                )
-            branch = "reference"
-        else:
-            if len(sys.argv) != 4:
-                raise ValueError(
-                    "You provided an incorrect number of arguments.\nUsage: commit0 evaluate {repo_split} {branch}"
-                )
-            branch = sys.argv[3]
-        if branch.startswith("branch="):
-            branch = branch[len("branch=") :]
-        commit0.harness.evaluate.main(
-            config.dataset_name,
-            config.dataset_split,
-            config.repo_split,
-            config.base_dir,
-            branch,
-            config.backend,
-            config.timeout,
-            config.num_cpus,
-            config.num_workers,
-        )
-    elif command == "lint":
-        files = sys.argv[1:]
-        commit0.harness.lint.main(config.base_dir, files)
-    elif command == "save":
-        if len(sys.argv) != 5:
-            raise ValueError(
-                "You provided an incorrect number of arguments.\nUsage: commit0 save {repo_split} {owner} {branch}"
-            )
-        owner = sys.argv[3]
-        branch = sys.argv[4]
-        if branch.startswith("branch="):
-            branch = branch[len("branch=") :]
-        commit0.harness.save.main(
-            config.dataset_name,
-            config.dataset_split,
-            config.repo_split,
-            config.base_dir,
-            owner,
-            branch,
-            config.github_token,
-        )
+    """Main function to run the CLI"""
+    commit0_app()
 
 
 if __name__ == "__main__":
     main()
-
-__all__ = []
diff --git a/commit0/cli.py b/commit0/cli.py
new file mode 100644
index 0000000..688bb4d
--- /dev/null
+++ b/commit0/cli.py
@@ -0,0 +1,265 @@
+import typer
+from pathlib import Path
+from typing import List, Union
+from typing_extensions import Annotated
+import commit0.harness.run_pytest_ids
+import commit0.harness.get_pytest_ids
+import commit0.harness.build
+import commit0.harness.setup
+import commit0.harness.evaluate
+import commit0.harness.lint
+import commit0.harness.save
+from commit0.harness.constants import SPLIT, SPLIT_ALL
+
+app = typer.Typer(add_completion=False)
+
+
+class Colors:
+    RESET = "\033[0m"
+    RED = "\033[91m"
+    YELLOW = "\033[93m"
+    CYAN = "\033[96m"
+    ORANGE = "\033[95m"
+
+
+def highlight(text: str, color: str) -> str:
+    """Highlight text with a color."""
+    return f"{color}{text}{Colors.RESET}"
+
+
+def check_valid(one: str, total: Union[list[str], dict[str, list[str]]]) -> None:
+    if isinstance(total, dict):
+        total = list(total.keys())
+    if one not in total:
+        valid = ", ".join([highlight(key, Colors.ORANGE) for key in total])
+        raise typer.BadParameter(
+            f"Invalid {highlight('REPO_OR_REPO_SPLIT', Colors.RED)}. Must be one of: {valid}",
+            param_hint="REPO or REPO_SPLIT",
+        )
+
+
+@app.command()
+def setup(
+    repo_split: str = typer.Argument(
+        ...,
+        help=f"Split of repositories, one of: {', '.join([highlight(key, Colors.ORANGE) for key in SPLIT.keys()])}",
+    ),
+    dataset_name: str = typer.Option(
+        "wentingzhao/commit0_docstring", help="Name of the Huggingface dataset"
+    ),
+    dataset_split: str = typer.Option("test", help="Split of the Huggingface dataset"),
+    base_dir: str = typer.Option("repos/", help="Base directory to clone repos to"),
+) -> None:
+    """Commit0 clone a repo split."""
+    check_valid(repo_split, SPLIT)
+
+    typer.echo(f"Cloning repository for split: {repo_split}")
+    typer.echo(f"Dataset name: {dataset_name}")
+    typer.echo(f"Dataset split: {dataset_split}")
+    typer.echo(f"Base directory: {base_dir}")
+
+    commit0.harness.setup.main(
+        dataset_name,
+        dataset_split,
+        repo_split,
+        base_dir,
+    )
+
+
+@app.command()
+def build(
+    repo_split: str = typer.Argument(
+        ...,
+        help=f"Split of repositories, one of {', '.join(highlight(key, Colors.ORANGE) for key in SPLIT.keys())}",
+    ),
+    dataset_name: str = typer.Option(
+        "wentingzhao/commit0_docstring", help="Name of the Huggingface dataset"
+    ),
+    dataset_split: str = typer.Option("test", help="Split of the Huggingface dataset"),
+    num_workers: int = typer.Option(8, help="Number of workers"),
+) -> None:
+    """Commit0 build a repository."""
+    check_valid(repo_split, SPLIT)
+
+    typer.echo(f"Building repository for split: {repo_split}")
+    typer.echo(f"Dataset name: {dataset_name}")
+    typer.echo(f"Dataset split: {dataset_split}")
+    typer.echo(f"Number of workers: {num_workers}")
+
+    commit0.harness.build.main(
+        dataset_name,
+        dataset_split,
+        repo_split,
+        num_workers,
+    )
+
+
+@app.command()
+def get_tests(
+    repo_name: str = typer.Argument(
+        ...,
+        help=f"Name of the repository to get tests for, one of: {', '.join(highlight(key, Colors.ORANGE) for key in SPLIT_ALL)}",
+    ),
+) -> None:
+    """Get tests for a Commit0 repository."""
+    check_valid(repo_name, SPLIT_ALL)
+
+    typer.echo(f"Getting tests for repository: {repo_name}")
+
+    commit0.harness.get_pytest_ids.main(repo_name, stdout=True)
+
+
+@app.command()
+def test(
+    repo_or_repo_path: str = typer.Argument(
+        ..., help="Directory of the repository to test"
+    ),
+    test_ids: str = typer.Argument(
+        ...,
+        help='All ways pytest supports to run and select tests. Please provide a single string. Example: "test_mod.py", "testing/", "test_mod.py::test_func", "-k \'MyClass and not method\'"',
+    ),
+    branch: Union[str, None] = typer.Option(
+        None, help="Branch to test (branch MUST be provided or use --reference)"
+    ),
+    dataset_name: str = typer.Option(
+        "wentingzhao/commit0_docstring", help="Name of the Huggingface dataset"
+    ),
+    dataset_split: str = typer.Option("test", help="Split of the Huggingface dataset"),
+    base_dir: str = typer.Option("repos/", help="Base directory of repos"),
+    backend: str = typer.Option("local", help="Backend to use for testing"),
+    timeout: int = typer.Option(1800, help="Timeout for tests in seconds"),
+    num_cpus: int = typer.Option(1, help="Number of CPUs to use"),
+    reference: Annotated[
+        bool, typer.Option("--reference", help="Test the reference commit.")
+    ] = False,
+) -> None:
+    """Run tests on a Commit0 repository."""
+    if repo_or_repo_path.endswith("/"):
+        repo_or_repo_path = repo_or_repo_path[:-1]
+    check_valid(repo_or_repo_path.split("/")[-1], SPLIT_ALL)
+    if not branch and not reference:
+        raise typer.BadParameter(
+            f"Invalid {highlight('BRANCH', Colors.RED)}. Either --reference or provide a branch name.",
+            param_hint="BRANCH",
+        )
+    if reference:
+        branch = "reference"
+    assert branch is not None, "branch is not specified"
+
+    typer.echo(f"Running tests for repository: {repo_or_repo_path}")
+    typer.echo(f"Branch: {branch}")
+    typer.echo(f"Test IDs: {test_ids}")
+
+    commit0.harness.run_pytest_ids.main(
+        dataset_name,
+        dataset_split,
+        base_dir,
+        repo_or_repo_path,
+        branch,
+        test_ids,
+        backend,
+        timeout,
+        num_cpus,
+        stdout=True,
+    )
+
+
+@app.command()
+def evaluate(
+    repo_split: str = typer.Argument(
+        ..., help=f"Split of repositories, one of {SPLIT.keys()}"
+    ),
+    branch: Union[str, None] = typer.Option(
+        None, help="Branch to evaluate (branch MUST be provided or use --reference)"
+    ),
+    dataset_name: str = typer.Option(
+        "wentingzhao/commit0_docstring", help="Name of the Huggingface dataset"
+    ),
+    dataset_split: str = typer.Option("test", help="Split of the Huggingface dataset"),
+    base_dir: str = typer.Option("repos/", help="Base directory of repos"),
+    backend: str = typer.Option("local", help="Backend to use for evaluation"),
+    timeout: int = typer.Option(1800, help="Timeout for evaluation in seconds"),
+    num_cpus: int = typer.Option(1, help="Number of CPUs to use"),
+    num_workers: int = typer.Option(8, help="Number of workers to use"),
+    reference: Annotated[
+        bool, typer.Option("--reference", help="Evaluate the reference commit.")
+    ] = False,
+) -> None:
+    """Evaluate a Commit0 repository."""
+    if not branch and not reference:
+        raise typer.BadParameter(
+            f"Invalid {highlight('BRANCH', Colors.RED)}. Either --reference or provide a branch name",
+            param_hint="BRANCH",
+        )
+    if reference:
+        branch = "reference"
+    assert branch is not None, "branch is not specified"
+
+    check_valid(repo_split, SPLIT)
+
+    typer.echo(f"Evaluating repository split: {repo_split}")
+    typer.echo(f"Branch: {branch}")
+
+    commit0.harness.evaluate.main(
+        dataset_name,
+        dataset_split,
+        repo_split,
+        base_dir,
+        branch,
+        backend,
+        timeout,
+        num_cpus,
+        num_workers,
+    )
+
+
+@app.command()
+def lint(
+    files: List[Path] = typer.Argument(
+        ..., help="Files to lint. If not provided, all files will be linted."
+    ),
+) -> None:
+    """Lint given files if provided, otherwise lint all files in the base directory."""
+    assert len(files) > 0, "No files to lint."
+    for path in files:
+        if not path.is_file():
+            raise FileNotFoundError(f"File not found: {str(path)}")
+    typer.echo(
+        f"Linting specific files: {', '.join(highlight(str(file), Colors.ORANGE) for file in files)}"
+    )
+    commit0.harness.lint.main(files)
+
+
+@app.command()
+def save(
+    repo_split: str = typer.Argument(
+        ..., help=f"Split of the repository, one of {SPLIT.keys()}"
+    ),
+    owner: str = typer.Argument(..., help="Owner of the repository"),
+    branch: str = typer.Argument(..., help="Branch to save"),
+    dataset_name: str = typer.Option(
+        "wentingzhao/commit0_docstring", help="Name of the Huggingface dataset"
+    ),
+    dataset_split: str = typer.Option("test", help="Split of the Huggingface dataset"),
+    base_dir: str = typer.Option("repos/", help="Base directory of repos"),
+    github_token: str = typer.Option(None, help="GitHub token for authentication"),
+) -> None:
+    """Save a Commit0 repository to GitHub."""
+    check_valid(repo_split, SPLIT)
+
+    typer.echo(f"Saving repository split: {repo_split}")
+    typer.echo(f"Owner: {owner}")
+    typer.echo(f"Branch: {branch}")
+
+    commit0.harness.save.main(
+        dataset_name,
+        dataset_split,
+        repo_split,
+        base_dir,
+        owner,
+        branch,
+        github_token,
+    )
+
+
+__all__ = []
diff --git a/commit0/harness/build.py b/commit0/harness/build.py
index 37c4d1f..addcc7d 100644
--- a/commit0/harness/build.py
+++ b/commit0/harness/build.py
@@ -19,7 +19,6 @@ def main(
     dataset_split: str,
     repo_split: str,
     num_workers: int,
-    backend: str,
 ) -> None:
     dataset: Iterator[RepoInstance] = load_dataset(dataset_name, split=dataset_split)  # type: ignore
     specs = []
@@ -30,9 +29,8 @@ def main(
         spec = make_spec(example)
         specs.append(spec)
 
-    if backend == "local":
-        client = docker.from_env()
-        build_repo_images(client, specs, num_workers)
+    client = docker.from_env()
+    build_repo_images(client, specs, num_workers)
 
 
 __all__ = []
diff --git a/commit0/harness/lint.py b/commit0/harness/lint.py
index edea70a..ff2bd34 100644
--- a/commit0/harness/lint.py
+++ b/commit0/harness/lint.py
@@ -1,6 +1,7 @@
 import subprocess
 import sys
 from pathlib import Path
+from typing import List
 
 
 config = """repos:
@@ -27,7 +28,7 @@
     - id: pyright"""
 
 
-def main(base_dir: str, files: list[str]) -> None:
+def main(files: List[Path]) -> None:
     config_file = Path(".commit0.pre-commit-config.yaml")
     if not config_file.is_file():
         config_file.write_text(config)
diff --git a/pyproject.toml b/pyproject.toml
index 8c7fb6f..acefbb3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,6 +12,7 @@ dependencies = [
     "pre-commit>=3.8.0",
     "hydra-core>=1.3.2",
     "modal>=0.64.95",
+    "typer>=0.12.0",
     "aider-chat",
     "datasets>=3.0.0",
     "docker>=7.1.0",
diff --git a/uv.lock b/uv.lock
index 4c6b612..f18b588 100644
--- a/uv.lock
+++ b/uv.lock
@@ -441,6 +441,7 @@ dependencies = [
     { name = "pre-commit" },
     { name = "pytest" },
     { name = "ruff" },
+    { name = "typer" },
 ]
 
 [package.metadata]
@@ -456,6 +457,7 @@ requires-dist = [
     { name = "pre-commit", specifier = ">=3.8.0" },
     { name = "pytest", specifier = ">=8.3.3" },
     { name = "ruff", specifier = ">=0.6.4" },
+    { name = "typer", specifier = ">=0.12.0" },
 ]
 
 [[package]]