From 5b278b00730968dc8ef79bda6f39629e3864290e Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Sun, 22 Sep 2024 16:16:57 -0400
Subject: [PATCH 01/16] change lint

---
 agent/commit0_utils.py | 8 ++++----
 agent/run_agent.py     | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/agent/commit0_utils.py b/agent/commit0_utils.py
index 08477b9..69e63a7 100644
--- a/agent/commit0_utils.py
+++ b/agent/commit0_utils.py
@@ -288,12 +288,12 @@ def get_changed_files(repo: git.Repo) -> list[str]:
     return files_changed
 
 
-def get_lint_cmd(repo: git.Repo, use_lint_info: bool) -> str:
-    """Generate a linting command based on whether to include files changed in the latest commit.
+def get_lint_cmd(repo_name: str, use_lint_info: bool) -> str:
+    """Generate a linting command based on whether to include files.
 
     Args:
     ----
-        repo (git.Repo): An instance of GitPython's Repo object representing the Git repository.
+        repo_name (str): The name of the repository.
         use_lint_info (bool): A flag indicating whether to include changed files in the lint command.
 
     Returns:
@@ -304,7 +304,7 @@ def get_lint_cmd(repo: git.Repo, use_lint_info: bool) -> str:
     """
     lint_cmd = "python -m commit0 lint "
     if use_lint_info:
-        lint_cmd += " ".join(get_changed_files(repo))
+        lint_cmd += repo_name + " --files "
     else:
         lint_cmd = ""
     return lint_cmd
diff --git a/agent/run_agent.py b/agent/run_agent.py
index 8227e1d..a29f5c4 100644
--- a/agent/run_agent.py
+++ b/agent/run_agent.py
@@ -101,7 +101,7 @@ def run_agent_for_repo(
                 )
                 test_file_name = test_file.replace(".py", "").replace("/", "__")
                 log_dir = RUN_AIDER_LOG_DIR / "with_tests" / test_file_name
-                lint_cmd = get_lint_cmd(local_repo, agent_config.use_lint_info)
+                lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
                 message = get_message(agent_config, repo_path, test_file=test_file)
                 agent.run(
                     message,
@@ -126,7 +126,7 @@ def run_agent_for_repo(
             for f in target_edit_files:
                 file_name = f.replace(".py", "").replace("/", "__")
                 log_dir = RUN_AIDER_LOG_DIR / "no_tests" / file_name
-                lint_cmd = get_lint_cmd(local_repo, agent_config.use_lint_info)
+                lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
                 agent.run(message, "", lint_cmd, [f], log_dir)
 
 
@@ -164,7 +164,7 @@ def run_agent(agent_config_file: str) -> None:
     with tqdm(
         total=len(filtered_dataset), smoothing=0, desc="Running Aider for repos"
     ) as pbar:
-        with multiprocessing.Pool(processes=2) as pool:
+        with multiprocessing.Pool(processes=5) as pool:
             results = []
 
             # Use apply_async to submit jobs and add progress bar updates

From ea78b9b24da7999a480dbc28d9b60e2899d838c5 Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Tue, 24 Sep 2024 14:39:15 -0400
Subject: [PATCH 02/16] change aider

---
 agent/agents.py         | 149 ++++++++++++++----------
 agent/class_types.py    |   1 +
 agent/cli.py            |  43 ++++++-
 agent/display.py        | 166 +++++++++++++++++++++++++++
 agent/run_agent.py      | 105 +++++++++--------
 agent/run_agent_test.py | 243 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 597 insertions(+), 110 deletions(-)
 create mode 100644 agent/display.py
 create mode 100644 agent/run_agent_test.py

diff --git a/agent/agents.py b/agent/agents.py
index 4c777f6..ff4d32e 100644
--- a/agent/agents.py
+++ b/agent/agents.py
@@ -7,13 +7,34 @@
 from aider.coders import Coder
 from aider.models import Model
 from aider.io import InputOutput
-from tenacity import retry, wait_exponential
+from tenacity import retry, wait_exponential, RetryCallState, retry_if_exception_type
 
 
+class APIError(Exception):
+    def __init__(self, status_code: int, message: str):
+        self.status_code = status_code
+        self.message = message
+        super().__init__(f"API Error: {status_code} - {message}")
+
+def handle_logging(logging_name: str, log_file: Path):
+    logger = logging.getLogger(logging_name)
+    logger.setLevel(logging.INFO)
+    logger.propagate = False
+    logger_handler = logging.FileHandler(log_file)
+    logger_handler.setFormatter(
+        logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+    )
+    logger.addHandler(logger_handler)
+
 class Agents(ABC):
-    def __init__(self, max_iteration: int):
+    def __init__(self, max_iteration: int, retry_if_api_error_codes: tuple[int, ...] = (429, 503, 529)):
         self.max_iteration = max_iteration
 
+        # error code 429 is rate limit exceeded for openai and anthropic
+        # error code 503 is service overloaded for openai
+        # error code 529 is service overloaded for anthropic
+        self.retry_if_api_error_codes = retry_if_api_error_codes
+
     @abstractmethod
     def run(self) -> None:
         """Start agent"""
@@ -27,6 +48,7 @@ def __init__(self, max_iteration: int, model_name: str):
 
     @retry(
         wait=wait_exponential(multiplier=1, min=4, max=10),
+        retry=retry_if_exception_type(APIError)
     )
     def run(
         self,
@@ -37,68 +59,71 @@ def run(
         log_dir: Path,
     ) -> None:
         """Start aider agent"""
-        if test_cmd:
-            auto_test = True
-        else:
-            auto_test = False
-        if lint_cmd:
-            auto_lint = True
-        else:
-            auto_lint = False
-        log_dir = log_dir.resolve()
-        log_dir.mkdir(parents=True, exist_ok=True)
-        input_history_file = log_dir / ".aider.input.history"
-        chat_history_file = log_dir / ".aider.chat.history.md"
-
-        print(
-            f"check {os.path.abspath(chat_history_file)} for prompts and lm generations",
-            file=sys.stderr,
-        )
-        # Set up logging
-        log_file = log_dir / "aider.log"
-        logging.basicConfig(
-            filename=log_file,
-            level=logging.INFO,
-            format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
-        )
+        try:
+            if test_cmd:
+                auto_test = True
+            else:
+                auto_test = False
+            if lint_cmd:
+                auto_lint = True
+            else:
+                auto_lint = False
+            log_dir = log_dir.resolve()
+            log_dir.mkdir(parents=True, exist_ok=True)
+            input_history_file = log_dir / ".aider.input.history"
+            chat_history_file = log_dir / ".aider.chat.history.md"
 
-        # Redirect print statements to the log file
-        sys.stdout = open(log_file, "a")
-        sys.stderr = open(log_file, "a")
+            print(
+                f"check {os.path.abspath(chat_history_file)} for prompts and lm generations",
+                file=sys.stderr,
+            )
+            # Set up logging
+            log_file = log_dir / "aider.log"
+            logging.basicConfig(
+                filename=log_file,
+                level=logging.INFO,
+                format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+            )
 
-        # Configure httpx logging
-        httpx_logger = logging.getLogger("httpx")
-        httpx_logger.setLevel(logging.INFO)
-        httpx_logger.propagate = False  # Prevent propagation to root logger
-        httpx_handler = logging.FileHandler(log_file)
-        httpx_handler.setFormatter(
-            logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
-        )
-        httpx_logger.addHandler(httpx_handler)
+            # Redirect print statements to the log file
+            sys.stdout = open(log_file, "a")
+            sys.stderr = open(log_file, "a")
 
-        io = InputOutput(
-            yes=True,
-            input_history_file=input_history_file,
-            chat_history_file=chat_history_file,
-        )
-        coder = Coder.create(
-            main_model=self.model,
-            fnames=fnames,
-            auto_lint=auto_lint,
-            auto_test=auto_test,
-            lint_cmds={"python": lint_cmd},
-            test_cmd=test_cmd,
-            io=io,
-        )
-        coder.max_reflection = self.max_iteration
-        coder.stream = False
+            # Configure httpx and backoff logging
+            handle_logging("httpx", log_file)
+            handle_logging("backoff", log_file)
 
-        # Run the agent
-        coder.run(message)
+            io = InputOutput(
+                yes=True,
+                input_history_file=input_history_file,
+                chat_history_file=chat_history_file,
+            )
+            coder = Coder.create(
+                main_model=self.model,
+                fnames=fnames,
+                auto_lint=auto_lint,
+                auto_test=auto_test,
+                lint_cmds={"python": lint_cmd},
+                test_cmd=test_cmd,
+                io=io,
+            )
+            coder.max_reflection = self.max_iteration
+            coder.stream = False
 
-        # Close redirected stdout and stderr
-        sys.stdout.close()
-        sys.stderr.close()
-        # Restore original stdout and stderr
-        sys.stdout = sys.__stdout__
-        sys.stderr = sys.__stderr__
+            # Run the agent
+            raise Exception("test")
+            coder.run(message)
+        
+        except Exception as e:
+            # If the exception is related to API errors, raise an APIError
+            if hasattr(e, 'status_code') and e.status_code in self.retry_if_api_error_codes:
+                raise APIError(e.status_code, str(e))
+            # For other exceptions, re-raise them
+            raise
+        finally:
+            # Close redirected stdout and stderr
+            sys.stdout.close()
+            sys.stderr.close()
+            # Restore original stdout and stderr
+            sys.stdout = sys.__stdout__
+            sys.stderr = sys.__stderr__
diff --git a/agent/class_types.py b/agent/class_types.py
index 03debfa..13b9385 100644
--- a/agent/class_types.py
+++ b/agent/class_types.py
@@ -5,6 +5,7 @@
 class AgentConfig:
     agent_name: str
     model_name: str
+    backend: str
     use_user_prompt: bool
     user_prompt: str
     use_repo_info: bool
diff --git a/agent/cli.py b/agent/cli.py
index eceb05d..dac159d 100644
--- a/agent/cli.py
+++ b/agent/cli.py
@@ -2,6 +2,9 @@
 import subprocess
 import yaml
 from agent.run_agent import run_agent
+from agent.run_agent_test import run_agent_test
+from commit0.harness.constants import RUN_AIDER_LOG_DIR
+from pathlib import Path
 
 agent_app = typer.Typer(
     no_args_is_help=True,
@@ -14,6 +17,7 @@
     """,
 )
 
+dot_file_dir = Path(__file__).parent.parent
 
 class Colors:
     RESET = "\033[0m"
@@ -131,7 +135,7 @@ def config(
         help="Path to the pre-commit config file",
     ),
     agent_config_file: str = typer.Option(
-        ".agent.yaml",
+        str(dot_file_dir / ".agent.yaml"),
         help="Path to the agent config file",
     ),
 ) -> None:
@@ -167,10 +171,45 @@ def config(
 
 @agent_app.command()
 def run(
+    experiment_name: str = typer.Argument(
+        ...,
+        help="Experiment name of current run",
+    ),
+    override_previous_changes: bool = typer.Option(
+        False,
+        help="If override the previous agent changes on `experiment_name` or run the agent continuously on the new changes",
+    ),
+    backend: str = typer.Option(
+        "modal",
+        help="Test backend to run the agent on, ignore this option if you are not adding `test` option to agent",
+    ),
+    agent_config_file: str = typer.Option(
+        str(dot_file_dir / ".agent.yaml"),
+        help="Path to the agent config file",
+    ),
+    log_dir: str = typer.Option(
+        str(dot_file_dir / RUN_AIDER_LOG_DIR),
+        help="Log directory to store the logs",
+    ),
+) -> None:
+    """Run the agent on the repository."""
+    run_agent(experiment_name, override_previous_changes, backend, agent_config_file, log_dir)
+
+
+@agent_app.command()
+def run_test(
+    experiment_name: str = typer.Argument(
+        ...,
+        help="Experiment name to run the agent on",
+    ),
+    backend: str = typer.Option(
+        "modal",
+        help="Backend to run the agent on",
+    ),
     agent_config_file: str = typer.Argument(
         ".agent.yaml",
         help="Path to the agent config file",
     ),
 ) -> None:
     """Run the agent on the repository."""
-    run_agent(agent_config_file)
+    run_agent_test(experiment_name, backend, agent_config_file)
diff --git a/agent/display.py b/agent/display.py
new file mode 100644
index 0000000..cd176d5
--- /dev/null
+++ b/agent/display.py
@@ -0,0 +1,166 @@
+from rich.console import Console, Group
+from rich.panel import Panel
+from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, ProgressColumn, Task
+from rich.layout import Layout
+from rich.live import Live
+from rich.text import Text
+from rich.columns import Columns
+from rich.style import Style
+from rich.rule import Rule
+from rich.align import Align
+from collections import deque, OrderedDict
+from types import TracebackType
+
+class RepoBox:
+    def __init__(self, name: str, style: str):
+        self.name = name
+        self.style = style
+
+    def __rich__(self):
+        return Panel(Text(self.name, style=self.style), expand=False, border_style=self.style)
+
+class RepoProgressColumn(ProgressColumn):
+    def render(self, task: Task) -> Text:
+        return Text(f"{int(task.completed or 0)}/{int(task.total or 1)}")
+
+class OngoingRepo:
+    def __init__(self, name: str, current_file: str, finished_files: list[str], total_files: int):
+        self.name = name
+        self.current_file = current_file
+        self.finished_files = finished_files
+        self.total_files = total_files
+
+    def __rich__(self):
+        progress = Progress(
+            SpinnerColumn(),
+            BarColumn(bar_width=None),
+            RepoProgressColumn(),
+            TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
+        )
+        task_id = progress.add_task("", total=self.total_files, completed=len(self.finished_files))
+
+        content = [
+            Text(f"Current working file:", style="bold"),
+            Text(self.current_file, style="green"),
+            Rule(style="dim"),
+            Text("Finished files (recent 5):", style="bold"),
+        ] + [Text(file, style="dim green") for file in self.finished_files[-6:-1]]
+        return Panel(Group(progress, *content), title=self.name, border_style="yellow", expand=True)
+
+class TerminalDisplay:
+    def __init__(self, total_repos: int):
+        self.console = Console()
+        self.total_repos = total_repos
+        self.unstarted_repos = []
+        self.finished_repos = []
+        self.ongoing_repos = OrderedDict()
+        self.finished_files = {}
+        self.total_files_per_repo = {}
+
+        self.overall_progress = Progress(
+            SpinnerColumn(),
+            BarColumn(bar_width=None),
+            TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
+        )
+        self.overall_task = self.overall_progress.add_task("[green]Processing", total=total_repos)
+        
+        self.layout = Layout()
+        self.layout.split_column(
+            Layout(name="progress", ratio=1),
+            Layout(name="main", ratio=14),
+        )
+        self.layout["progress"].split_row(
+            Layout(name="pbar", ratio=4),
+            Layout(name="backend", ratio=1),
+            Layout(name="money", ratio=1),
+        )
+        self.layout["progress"]["pbar"].update(Panel(self.overall_progress, title="Overall Progress", border_style="blue"))
+        self.backend_display = Text(f"Backend Using: ", justify="center")
+        self.layout["progress"]["backend"].update(Panel(self.backend_display, title="Backend", border_style="cyan"))
+        self.money_display = Text(f"Money Spent So Far: $0.00", justify="center")
+        self.layout["progress"]["money"].update(Panel(self.money_display, title="$$$$", border_style="cyan"))
+        self.layout["main"].split_row(
+            Layout(name="left", ratio=1),
+            Layout(name="right", ratio=1),
+        )
+        self.layout["main"]["left"].split_column(
+            Layout(name="unstarted", ratio=1),
+            Layout(name="finished", ratio=1),
+        )
+        self.layout["right"].update(Panel(Layout(name="ongoing"), title="Ongoing", border_style="yellow"))
+        
+        # Initialize panels with empty content
+        self.layout["left"]["unstarted"].update(Panel(Text(""), title="Unstarted Repos", border_style="red"))
+        self.layout["left"]["finished"].update(Panel(Text(""), title="Finished Repos", border_style="green"))
+        
+    def update_backend_display(self, backend: str):
+        self.backend_display = Text(f"Backend Using: {backend}", justify="center")
+        self.layout["progress"]["backend"].update(Panel(self.backend_display, title="Backend", border_style="green"))
+
+    def update_money_display(self, money: float):
+        self.money_display = Text(f"Money Spent So Far: ${money:.2f}", justify="center")
+        self.layout["progress"]["money"].update(Panel(Align.center(self.money_display), title="$$$$"))
+
+    def set_current_file(self, repo_name: str, file_name: str):
+        if repo_name not in self.ongoing_repos:
+            # Start the repo if it's not yet tracked, but don't move it to the start
+            self.start_repo(repo_name)
+        
+        # Just update the file name without reordering the repos
+        self.ongoing_repos[repo_name] = file_name
+        
+        # Append the new file to finished files, keep the order intact
+        self.finished_files.setdefault(repo_name, []).append(file_name)
+
+        # Update the display
+        self.update()
+    
+    def update(self):
+        # Update unstarted repos
+        unstarted_boxes = [RepoBox(repo, "red") for repo in self.unstarted_repos]
+        self.layout["left"]["unstarted"].update(Panel(Columns(unstarted_boxes), title="Not Started Repos", border_style="red"))
+
+        # Update finished repos
+        finished_boxes = [RepoBox(repo, "green") for repo in self.finished_repos]
+        self.layout["left"]["finished"].update(Panel(Columns(finished_boxes), title="Finished Repos", border_style="green"))
+
+        # Update ongoing repos with progress bars
+        ongoing_panels = [OngoingRepo(repo, self.ongoing_repos[repo], self.finished_files.get(repo, []), self.total_files_per_repo.get(repo, 1)) for repo in self.ongoing_repos]
+
+        if ongoing_panels:
+            ongoing_layout = Layout()
+            for i, panel in enumerate(ongoing_panels):
+                ongoing_layout.add_split(Layout(panel, name=f"repo_{i}"))
+            ongoing_layout.split_column(*[ongoing_layout[f"repo_{i}"] for i in range(len(ongoing_panels))])
+            self.layout["right"].update(Panel(ongoing_layout, title="Ongoing", border_style="yellow"))
+        else:
+            self.layout["right"].update(Panel(Text("No ongoing repos"), title="Ongoing", border_style="yellow"))
+
+    def start_repo(self, repo_name: str, total_files: int = 0):
+        if repo_name in self.unstarted_repos:
+            self.unstarted_repos.remove(repo_name)
+        self.ongoing_repos[repo_name] = ""
+        self.finished_files[repo_name] = []
+        self.total_files_per_repo[repo_name] = total_files
+        self.update()
+        
+    def finish_repo(self, repo_name: str):
+        self.finished_repos.append(repo_name)
+        if repo_name in self.ongoing_repos:
+            del self.ongoing_repos[repo_name]
+        if repo_name in self.finished_files:
+            del self.finished_files[repo_name]
+        self.overall_progress.update(self.overall_task, advance=1)
+        self.update()
+        
+    def set_unstarted_repos(self, repos: list[str]):
+        self.unstarted_repos = repos
+        self.update()
+        
+    def __enter__(self):
+        self.live = Live(self.layout, console=self.console, screen=True, refresh_per_second=4)
+        self.live.start()
+        return self
+        
+    def __exit__(self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None):
+        self.live.stop()
\ No newline at end of file
diff --git a/agent/run_agent.py b/agent/run_agent.py
index a29f5c4..ca949da 100644
--- a/agent/run_agent.py
+++ b/agent/run_agent.py
@@ -20,7 +20,8 @@
 from commit0.harness.get_pytest_ids import main as get_tests
 from commit0.harness.constants import RUN_AIDER_LOG_DIR, RepoInstance
 from commit0.cli import read_commit0_dot_file
-
+from pathlib import Path
+from datetime import datetime
 
 class DirContext:
     def __init__(self, d: str):
@@ -51,6 +52,10 @@ def run_agent_for_repo(
     repo_base_dir: str,
     agent_config: AgentConfig,
     example: RepoInstance,
+    experiment_name: Optional[str] = None,
+    override_previous_changes: bool = False,
+    backend: str = "modal",
+    log_dir: str = str(RUN_AIDER_LOG_DIR.resolve()),
 ) -> None:
     """Run Aider for a given repository."""
     # get repo info
@@ -58,13 +63,10 @@ def run_agent_for_repo(
 
     repo_name = repo_name.lower()
     repo_name = repo_name.replace(".", "-")
-
-    # Call the commit0 get-tests command to retrieve test files
-    test_files_str = get_tests(repo_name, verbose=0)
-    test_files = sorted(list(set([i.split(":")[0] for i in test_files_str])))
-
+    
     repo_path = os.path.join(repo_base_dir, repo_name)
     repo_path = os.path.abspath(repo_path)
+
     try:
         local_repo = Repo(repo_path)
     except Exception:
@@ -79,28 +81,45 @@ def run_agent_for_repo(
             f"{agent_config.agent_name} is not implemented; please add your implementations in baselines/agents.py."
         )
 
-    run_id = args2string(agent_config)
-    print(f"Agent is coding on branch: {run_id}", file=sys.stderr)
-    create_branch(local_repo, run_id, example["base_commit"])
-    latest_commit = local_repo.commit(run_id)
+    # if branch_name is not provided, create a new branch name based on agent_config
+    if experiment_name is None:
+        experiment_name = args2string(agent_config)
+
+    create_branch(local_repo, experiment_name, example["base_commit"])
+
     # in cases where the latest commit of branch is not commit 0
     # set it back to commit 0
-    # TODO: ask user for permission
-    if latest_commit.hexsha != example["base_commit"]:
+    latest_commit = local_repo.commit(experiment_name)
+    if latest_commit.hexsha != example["base_commit"] and override_previous_changes:
         local_repo.git.reset("--hard", example["base_commit"])
-    target_edit_files = get_target_edit_files(repo_path)
+
+    # prepare the log dir
+    experiment_log_dir = Path(log_dir) / repo_name / experiment_name / datetime.now().strftime("%Y-%m-%d")
+    experiment_log_dir.mkdir(parents=True, exist_ok=True)
+
+    # write agent_config to .agent.yaml in the log_dir for record
+    agent_config_log_file = experiment_log_dir / ".agent.yaml"
+    with open(agent_config_log_file, "w") as agent_config_file:
+        yaml.dump(agent_config, agent_config_file)
+
     with DirContext(repo_path):
         if agent_config is None:
             raise ValueError("Invalid input")
 
+        target_edit_files = get_target_edit_files(repo_path)
+
         if agent_config.run_tests:
+            # Call the commit0 get-tests command to retrieve test files
+            test_files_str = get_tests(repo_name, verbose=0)
+            test_files = sorted(list(set([i.split(":")[0] for i in test_files_str])))
+
             # when unit test feedback is available, iterate over test files
             for test_file in test_files:
                 test_cmd = (
-                    f"python -m commit0 test {repo_path} {test_file} --branch {run_id}"
+                    f"python -m commit0 test {repo_path} {test_file} --branch {experiment_name} --backend {backend}"
                 )
                 test_file_name = test_file.replace(".py", "").replace("/", "__")
-                log_dir = RUN_AIDER_LOG_DIR / "with_tests" / test_file_name
+                test_log_dir = experiment_log_dir / test_file_name
                 lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
                 message = get_message(agent_config, repo_path, test_file=test_file)
                 agent.run(
@@ -108,29 +127,22 @@ def run_agent_for_repo(
                     test_cmd,
                     lint_cmd,
                     target_edit_files,
-                    log_dir,
+                    test_log_dir,
                 )
         else:
             # when unit test feedback is not available, iterate over target files to edit
             message = get_message(
                 agent_config, repo_path, test_dir=example["test"]["test_dir"]
             )
-            agent_config_log_file = os.path.abspath(
-                RUN_AIDER_LOG_DIR / "no_tests" / ".agent.yaml"
-            )
-            os.makedirs(os.path.dirname(agent_config_log_file), exist_ok=True)
-            # write agent_config to .agent.yaml
-            with open(agent_config_log_file, "w") as agent_config_file:
-                yaml.dump(agent_config, agent_config_file)
-
+            
             for f in target_edit_files:
                 file_name = f.replace(".py", "").replace("/", "__")
-                log_dir = RUN_AIDER_LOG_DIR / "no_tests" / file_name
+                file_log_dir = experiment_log_dir / file_name
                 lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
-                agent.run(message, "", lint_cmd, [f], log_dir)
+                agent.run(message, "", lint_cmd, [f], file_log_dir)
 
 
-def run_agent(agent_config_file: str) -> None:
+def run_agent(experiment_name: str, override_previous_changes: bool, backend: str, agent_config_file: str, log_dir: str) -> None:
     """Main function to run Aider for a given repository.
 
     Will run in parallel for each repo.
@@ -161,22 +173,23 @@ def run_agent(agent_config_file: str) -> None:
     if len(filtered_dataset) > 1:
         sys.stdout = open(os.devnull, "w")
 
-    with tqdm(
-        total=len(filtered_dataset), smoothing=0, desc="Running Aider for repos"
-    ) as pbar:
-        with multiprocessing.Pool(processes=5) as pool:
-            results = []
-
-            # Use apply_async to submit jobs and add progress bar updates
-            for example in filtered_dataset:
-                result = pool.apply_async(
-                    run_agent_for_repo,
-                    args=(commit0_config["base_dir"], agent_config, example),
-                    callback=lambda _: pbar.update(
-                        1
-                    ),  # Update progress bar on task completion
-                )
-                results.append(result)
-
-            for result in results:
-                result.wait()
+    run_agent_for_repo(commit0_config["base_dir"], agent_config, filtered_dataset[0], experiment_name=experiment_name, override_previous_changes=override_previous_changes, backend=backend, log_dir=log_dir)
+    # with tqdm(
+    #     total=len(filtered_dataset), smoothing=0, desc="Running Aider for repos"
+    # ) as pbar:
+    #     with multiprocessing.Pool(processes=3) as pool:
+    #         results = []
+
+    #         # Use apply_async to submit jobs and add progress bar updates
+    #         for example in filtered_dataset:
+    #             result = pool.apply_async(
+    #                 run_agent_for_repo,
+    #                 args=(commit0_config["base_dir"], agent_config, example),
+    #                 callback=lambda _: pbar.update(
+    #                     1
+    #                 ),  # Update progress bar on task completion
+    #             )
+    #             results.append(result)
+
+    #         for result in results:
+    #             result.wait()
diff --git a/agent/run_agent_test.py b/agent/run_agent_test.py
new file mode 100644
index 0000000..c310f34
--- /dev/null
+++ b/agent/run_agent_test.py
@@ -0,0 +1,243 @@
+import os
+import sys
+import yaml
+import multiprocessing
+from tqdm import tqdm
+import queue  # Add this import
+from datasets import load_dataset
+from git import Repo
+from agent.commit0_utils import (
+    args2string,
+    create_branch,
+    get_message,
+    get_target_edit_files,
+    get_lint_cmd,
+)
+from agent.agents import AiderAgents
+from typing import Optional, Type
+from types import TracebackType
+from agent.class_types import AgentConfig
+from commit0.harness.constants import SPLIT
+from commit0.harness.get_pytest_ids import main as get_tests
+from commit0.harness.constants import RUN_AIDER_LOG_DIR, RepoInstance
+from commit0.cli import read_commit0_dot_file
+import time
+import random
+import multiprocessing
+from agent.display import TerminalDisplay
+
+class DirContext:
+    def __init__(self, d: str):
+        self.dir = d
+        self.cwd = os.getcwd()
+
+    def __enter__(self):
+        os.chdir(self.dir)
+
+    def __exit__(
+        self,
+        exctype: Optional[Type[BaseException]],
+        excinst: Optional[BaseException],
+        exctb: Optional[TracebackType],
+    ) -> None:
+        os.chdir(self.cwd)
+
+
+def read_yaml_config(config_file: str) -> dict:
+    """Read the yaml config from the file."""
+    if not os.path.exists(config_file):
+        raise FileNotFoundError(f"The config file '{config_file}' does not exist.")
+    with open(config_file, "r") as f:
+        return yaml.load(f, Loader=yaml.FullLoader)
+
+
+def run_agent_for_repo(
+    repo_base_dir: str,
+    agent_config: AgentConfig,
+    example: RepoInstance,
+    update_queue: multiprocessing.Queue
+) -> None:
+    """Run Aider for a given repository."""
+    # get repo info
+    _, repo_name = example["repo"].split("/")
+    update_queue.put(("start_repo", (repo_name, 0)))
+    repo_name = repo_name.lower()
+    if repo_name != "web3.py":
+        repo_name = repo_name.replace(".", "-")
+
+    # Call the commit0 get-tests command to retrieve test files
+    test_files_str = get_tests(repo_name, verbose=0)
+    test_files = sorted(list(set([i.split(":")[0] for i in test_files_str])))
+
+    repo_path = os.path.join(repo_base_dir, repo_name)
+    repo_path = os.path.abspath(repo_path)
+    try:
+        local_repo = Repo(repo_path)
+    except Exception:
+        raise Exception(
+            f"{repo_path} is not a git repo. Check if base_dir is correctly specified."
+        )
+
+    if agent_config.agent_name == "aider":
+        agent = AiderAgents(agent_config.max_iteration, agent_config.model_name)
+    else:
+        raise NotImplementedError(
+            f"{agent_config.agent_name} is not implemented; please add your implementations in baselines/agents.py."
+        )
+
+    run_id = args2string(agent_config)
+    run_id = run_id.replace("run_tests-1", "run_tests-0")
+    # print(f"Agent is coding on branch: {run_id}", file=sys.stderr)
+    create_branch(local_repo, run_id, example["base_commit"])
+    latest_commit = local_repo.commit(run_id)
+    # in cases where the latest commit of branch is not commit 0
+    # set it back to commit 0
+    # TODO: ask user for permission
+    if latest_commit.hexsha != example["base_commit"]:
+        local_repo.git.reset("--hard", example["base_commit"])
+    target_edit_files = get_target_edit_files(repo_path)
+
+    # Determine the total number of files (either test files or target files)
+    total_files = len(test_files) if agent_config.run_tests else len(target_edit_files)
+
+    # Notify the display to start tracking this repo, pass the total number of files
+    update_queue.put(("start_repo", (repo_name, total_files)))
+    
+    with DirContext(repo_path):
+        if agent_config is None:
+            raise ValueError("Invalid input")
+
+        if agent_config.run_tests:
+            # when unit test feedback is available, iterate over test files
+            for test_file in test_files:
+                update_queue.put(("set_current_file", (repo_name, test_file)))
+                sleep_time = random.randint(1,3) # Random sleep time between 1 and 5 seconds
+                time.sleep(sleep_time)
+                update_queue.put(("update_money_display", random.random()))
+                continue
+                test_cmd = (
+                    f"python -m commit0 test {repo_path} {test_file} --branch {run_id} --backend {agent_config.backend}"
+                )
+                test_file_name = test_file.replace(".py", "").replace("/", "__")
+                log_dir = RUN_AIDER_LOG_DIR / "with_tests" / test_file_name
+                lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
+                message = get_message(agent_config, repo_path, test_file=test_file)
+                agent.run(
+                    message,
+                    test_cmd,
+                    lint_cmd,
+                    target_edit_files,
+                    log_dir,
+                )
+        else:
+            # when unit test feedback is not available, iterate over target files to edit
+            message = get_message(
+                agent_config, repo_path, test_dir=example["test"]["test_dir"]
+            )
+            agent_config_log_file = os.path.abspath(
+                RUN_AIDER_LOG_DIR / "no_tests" / ".agent.yaml"
+            )
+            os.makedirs(os.path.dirname(agent_config_log_file), exist_ok=True)
+            # write agent_config to .agent.yaml
+            with open(agent_config_log_file, "w") as agent_config_file:
+                yaml.dump(agent_config, agent_config_file)
+
+            for f in target_edit_files:
+                update_queue.put(("set_current_file", (repo_name, f)))
+                sleep_time = random.randint(1,3) # Random sleep time between 1 and 5 seconds
+                time.sleep(sleep_time)
+                update_queue.put(("update_money_display", random.random()))
+                continue
+                file_name = f.replace(".py", "").replace("/", "__")
+                log_dir = RUN_AIDER_LOG_DIR / "no_tests" / file_name
+                lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
+                agent.run(message, "", lint_cmd, [f], log_dir)
+    update_queue.put(("finish_repo", repo_name))
+
+def run_agent_test(agent_config_file: str) -> None:
+    """Main function to run Aider for a given repository."""
+    config = read_yaml_config(agent_config_file)
+    agent_config = AgentConfig(**config)
+    commit0_config = read_commit0_dot_file(".commit0.yaml")
+
+    dataset = load_dataset(
+        commit0_config["dataset_name"], split=commit0_config["dataset_split"]
+    )
+    filtered_dataset = [
+        example
+        for example in dataset
+        if commit0_config["repo_split"] == "all"
+        or (
+            isinstance(example, dict)
+            and "repo" in example
+            and isinstance(example["repo"], str)
+            and example["repo"].split("/")[-1]
+            in SPLIT.get(commit0_config["repo_split"], [])
+        )
+    ]
+
+    assert len(filtered_dataset) > 0, "No examples available"
+
+    with TerminalDisplay(len(filtered_dataset)) as display:
+        unstarted_repos = [example["repo"].split("/")[-1] for example in filtered_dataset]
+        display.set_unstarted_repos(unstarted_repos)
+
+        display.update_backend_display(agent_config.backend)
+        total_money_spent = 0
+        
+        with multiprocessing.Manager() as manager:
+            update_queue = manager.Queue()
+
+            
+
+            update_queue.put(("update_backend_display", agent_config.backend))
+            with multiprocessing.Pool(processes=3) as pool:
+                results = []
+
+                for example in filtered_dataset:
+                    result = pool.apply_async(
+                        run_agent_for_repo,
+                        args=(commit0_config["base_dir"], agent_config, example, update_queue)
+                    )
+                    results.append(result)
+
+                while any(not r.ready() for r in results):
+                    try:
+                        while not update_queue.empty():
+                            action, data = update_queue.get_nowait()
+                            if action == "start_repo":
+                                repo_name, total_files = data
+                                display.start_repo(repo_name, total_files)
+                            elif action == "finish_repo":
+                                repo_name = data
+                                display.finish_repo(repo_name)
+                            elif action == "set_current_file":
+                                repo_name, file_name = data
+                                display.set_current_file(repo_name, file_name)
+                            elif action == "update_money_display":
+                                money_spent = data
+                                total_money_spent += money_spent
+                                display.update_money_display(total_money_spent)
+                    except queue.Empty:
+                        pass
+                    time.sleep(0.1)  # Small delay to prevent busy-waiting
+
+                # Final update after all repos are processed
+                while not update_queue.empty():
+                    action, data = update_queue.get()
+                    if action == "start_repo":
+                        repo_name, total_files = data
+                        display.start_repo(repo_name, total_files)
+                    elif action == "finish_repo":
+                        repo_name = data
+                        display.finish_repo(repo_name)
+                    elif action == "set_current_file":
+                        repo_name, file_name = data
+                        display.set_current_file(repo_name, file_name)
+                    elif action == "update_money_display":
+                        money_spent = data
+                        total_money_spent += money_spent
+                        display.update_money_display(total_money_spent)
+
+                for result in results:
+                    result.get()
\ No newline at end of file

From e3cca57a4a382c5f5bfcd029e14d53ec7881dee6 Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Tue, 24 Sep 2024 14:40:49 -0400
Subject: [PATCH 03/16] cli add stdin

---
 commit0/cli.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/commit0/cli.py b/commit0/cli.py
index 3ad212f..e0e61ea 100644
--- a/commit0/cli.py
+++ b/commit0/cli.py
@@ -14,7 +14,7 @@
 import subprocess
 import yaml
 import os
-
+import sys
 commit0_app = typer.Typer(
     no_args_is_help=True,
     add_completion=False,
@@ -209,7 +209,7 @@ def test(
         ..., help="Directory of the repository to test"
     ),
     test_ids: str = typer.Argument(
-        ...,
+        None,
         help='All ways pytest supports to run and select tests. Please provide a single string. Example: "test_mod.py", "testing/", "test_mod.py::test_func", "-k \'MyClass and not method\'"',
     ),
     branch: Union[str, None] = typer.Option(
@@ -238,6 +238,7 @@ def test(
         help="Set this to 2 for more logging information",
         count=True,
     ),
+    stdin: bool = typer.Option(False, "--stdin", help="Read test names from stdin"),
 ) -> None:
     """Run tests on a Commit0 repository."""
     check_commit0_path()
@@ -255,6 +256,13 @@ def test(
         )
         branch = get_active_branch(git_path)
 
+    if stdin:
+        # Read test names from stdin
+        test_ids = sys.stdin.read().strip()
+    elif test_ids is None:
+        typer.echo("Error: test_ids must be provided or use --stdin option", err=True)
+        raise typer.Exit(code=1)
+
     if verbose == 2:
         typer.echo(f"Running tests for repository: {repo_or_repo_path}")
         typer.echo(f"Branch: {branch}")
@@ -276,6 +284,7 @@ def test(
     )
 
 
+
 @commit0_app.command()
 def evaluate(
     branch: Union[str, None] = typer.Option(

From 3a5321b1c287a79afb16f6ef9ab2ca5d7d594436 Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Tue, 24 Sep 2024 18:20:40 -0400
Subject: [PATCH 04/16] agent with rich display

---
 agent/{commit0_utils.py => agent_utils.py} |  19 +-
 agent/agents.py                            | 176 ++++++-------
 agent/class_types.py                       |   1 -
 agent/cli.py                               | 101 ++++++--
 agent/display.py                           | 274 ++++++++++++++++-----
 agent/run_agent.py                         | 191 ++++++++++----
 agent/run_agent_no_rich.py                 | 212 ++++++++++++++++
 agent/run_agent_test.py                    | 243 ------------------
 commit0/cli.py                             |   2 +-
 9 files changed, 761 insertions(+), 458 deletions(-)
 rename agent/{commit0_utils.py => agent_utils.py} (93%)
 create mode 100644 agent/run_agent_no_rich.py
 delete mode 100644 agent/run_agent_test.py

diff --git a/agent/commit0_utils.py b/agent/agent_utils.py
similarity index 93%
rename from agent/commit0_utils.py
rename to agent/agent_utils.py
index 69e63a7..3df8360 100644
--- a/agent/commit0_utils.py
+++ b/agent/agent_utils.py
@@ -5,6 +5,7 @@
 from pathlib import Path
 from typing import List
 import fitz
+import yaml
 
 from agent.class_types import AgentConfig
 
@@ -118,7 +119,7 @@ def get_file_info(file_path: Path, prefix: str = "") -> str:
     return "\n".join(filter(None, tree_string))
 
 
-def get_target_edit_files(target_dir: str) -> list[str]:
+def get_target_edit_files(target_dir: str, src_prefix: str) -> list[str]:
     """Find the files with functions with the pass statement."""
     files = []
     for root, _, filenames in os.walk(target_dir):
@@ -131,7 +132,7 @@ def get_target_edit_files(target_dir: str) -> list[str]:
 
     # Remove the base_dir prefix
     files = [file.replace(target_dir, "").lstrip("/") for file in files]
-
+    files = [src_prefix + file for file in files]
     # Only keep python files
     files = [file for file in files if file.endswith(".py")]
 
@@ -308,3 +309,17 @@ def get_lint_cmd(repo_name: str, use_lint_info: bool) -> str:
     else:
         lint_cmd = ""
     return lint_cmd
+
+
+def write_agent_config(agent_config_file: str, agent_config: dict) -> None:
+    """Write the agent config to the file."""
+    with open(agent_config_file, "w") as f:
+        yaml.dump(agent_config, f)
+
+
+def read_yaml_config(config_file: str) -> dict:
+    """Read the yaml config from the file."""
+    if not os.path.exists(config_file):
+        raise FileNotFoundError(f"The config file '{config_file}' does not exist.")
+    with open(config_file, "r") as f:
+        return yaml.load(f, Loader=yaml.FullLoader)
diff --git a/agent/agents.py b/agent/agents.py
index ff4d32e..053e12c 100644
--- a/agent/agents.py
+++ b/agent/agents.py
@@ -1,5 +1,4 @@
 import sys
-import os
 from abc import ABC, abstractmethod
 from pathlib import Path
 import logging
@@ -7,16 +6,11 @@
 from aider.coders import Coder
 from aider.models import Model
 from aider.io import InputOutput
-from tenacity import retry, wait_exponential, RetryCallState, retry_if_exception_type
+import re
 
 
-class APIError(Exception):
-    def __init__(self, status_code: int, message: str):
-        self.status_code = status_code
-        self.message = message
-        super().__init__(f"API Error: {status_code} - {message}")
-
-def handle_logging(logging_name: str, log_file: Path):
+def handle_logging(logging_name: str, log_file: Path) -> None:
+    """Handle logging for agent"""
     logger = logging.getLogger(logging_name)
     logger.setLevel(logging.INFO)
     logger.propagate = False
@@ -26,17 +20,32 @@ def handle_logging(logging_name: str, log_file: Path):
     )
     logger.addHandler(logger_handler)
 
+
+class AgentReturn(ABC):
+    def __init__(self, log_file: Path):
+        self.log_file = log_file
+        self.last_cost = self.get_money_cost()
+
+    def get_money_cost(self) -> float:
+        """Get accumulated money cost from log file"""
+        last_cost = 0.0
+        with open(self.log_file, "r") as file:
+            for line in file:
+                if "Tokens:" in line and "Cost:" in line:
+                    match = re.search(
+                        r"Cost: \$\d+\.\d+ message, \$(\d+\.\d+) session", line
+                    )
+                    if match:
+                        last_cost = float(match.group(1))
+        return last_cost
+
+
 class Agents(ABC):
-    def __init__(self, max_iteration: int, retry_if_api_error_codes: tuple[int, ...] = (429, 503, 529)):
+    def __init__(self, max_iteration: int):
         self.max_iteration = max_iteration
 
-        # error code 429 is rate limit exceeded for openai and anthropic
-        # error code 503 is service overloaded for openai
-        # error code 529 is service overloaded for anthropic
-        self.retry_if_api_error_codes = retry_if_api_error_codes
-
     @abstractmethod
-    def run(self) -> None:
+    def run(self) -> AgentReturn:
         """Start agent"""
         raise NotImplementedError
 
@@ -46,10 +55,6 @@ def __init__(self, max_iteration: int, model_name: str):
         super().__init__(max_iteration)
         self.model = Model(model_name)
 
-    @retry(
-        wait=wait_exponential(multiplier=1, min=4, max=10),
-        retry=retry_if_exception_type(APIError)
-    )
     def run(
         self,
         message: str,
@@ -57,73 +62,74 @@ def run(
         lint_cmd: str,
         fnames: list[str],
         log_dir: Path,
-    ) -> None:
+    ) -> AgentReturn:
         """Start aider agent"""
-        try:
-            if test_cmd:
-                auto_test = True
-            else:
-                auto_test = False
-            if lint_cmd:
-                auto_lint = True
-            else:
-                auto_lint = False
-            log_dir = log_dir.resolve()
-            log_dir.mkdir(parents=True, exist_ok=True)
-            input_history_file = log_dir / ".aider.input.history"
-            chat_history_file = log_dir / ".aider.chat.history.md"
-
-            print(
-                f"check {os.path.abspath(chat_history_file)} for prompts and lm generations",
-                file=sys.stderr,
-            )
-            # Set up logging
-            log_file = log_dir / "aider.log"
-            logging.basicConfig(
-                filename=log_file,
-                level=logging.INFO,
-                format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
-            )
+        if test_cmd:
+            auto_test = True
+        else:
+            auto_test = False
+        if lint_cmd:
+            auto_lint = True
+        else:
+            auto_lint = False
+        log_dir = log_dir.resolve()
+        log_dir.mkdir(parents=True, exist_ok=True)
+        input_history_file = log_dir / ".aider.input.history"
+        chat_history_file = log_dir / ".aider.chat.history.md"
 
-            # Redirect print statements to the log file
-            sys.stdout = open(log_file, "a")
-            sys.stderr = open(log_file, "a")
+        # Set up logging
+        log_file = log_dir / "aider.log"
+        logging.basicConfig(
+            filename=log_file,
+            level=logging.INFO,
+            format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+        )
 
-            # Configure httpx and backoff logging
-            handle_logging("httpx", log_file)
-            handle_logging("backoff", log_file)
+        # Redirect print statements to the log file
+        sys.stdout = open(log_file, "a")
+        sys.stderr = open(log_file, "a")
 
-            io = InputOutput(
-                yes=True,
-                input_history_file=input_history_file,
-                chat_history_file=chat_history_file,
-            )
-            coder = Coder.create(
-                main_model=self.model,
-                fnames=fnames,
-                auto_lint=auto_lint,
-                auto_test=auto_test,
-                lint_cmds={"python": lint_cmd},
-                test_cmd=test_cmd,
-                io=io,
+        # Configure httpx and backoff logging
+        handle_logging("httpx", log_file)
+        handle_logging("backoff", log_file)
+
+        io = InputOutput(
+            yes=True,
+            input_history_file=input_history_file,
+            chat_history_file=chat_history_file,
+        )
+        coder = Coder.create(
+            main_model=self.model,
+            fnames=fnames,
+            auto_lint=auto_lint,
+            auto_test=auto_test,
+            lint_cmds={"python": lint_cmd},
+            test_cmd=test_cmd,
+            io=io,
+        )
+        coder.max_reflection = self.max_iteration
+        coder.stream = True
+
+        # Run the agent
+        # coder.run(message)
+
+        #### TMP
+        import time
+        import random
+
+        time.sleep(random.random() * 5)
+        n = random.random() / 10
+        with open(log_file, "a") as f:
+            f.write(
+                f"> Tokens: 33k sent, 1.3k received. Cost: $0.12 message, ${n} session.  \n"
             )
-            coder.max_reflection = self.max_iteration
-            coder.stream = False
-
-            # Run the agent
-            raise Exception("test")
-            coder.run(message)
-        
-        except Exception as e:
-            # If the exception is related to API errors, raise an APIError
-            if hasattr(e, 'status_code') and e.status_code in self.retry_if_api_error_codes:
-                raise APIError(e.status_code, str(e))
-            # For other exceptions, re-raise them
-            raise
-        finally:
-            # Close redirected stdout and stderr
-            sys.stdout.close()
-            sys.stderr.close()
-            # Restore original stdout and stderr
-            sys.stdout = sys.__stdout__
-            sys.stderr = sys.__stderr__
+        #### TMP
+
+        # Close redirected stdout and stderr
+        sys.stdout.close()
+        sys.stderr.close()
+        # Restore original stdout and stderr
+        sys.stdout = sys.__stdout__
+        sys.stderr = sys.__stderr__
+
+        return AgentReturn(log_file)
diff --git a/agent/class_types.py b/agent/class_types.py
index 13b9385..03debfa 100644
--- a/agent/class_types.py
+++ b/agent/class_types.py
@@ -5,7 +5,6 @@
 class AgentConfig:
     agent_name: str
     model_name: str
-    backend: str
     use_user_prompt: bool
     user_prompt: str
     use_repo_info: bool
diff --git a/agent/cli.py b/agent/cli.py
index dac159d..82d9d40 100644
--- a/agent/cli.py
+++ b/agent/cli.py
@@ -1,15 +1,15 @@
 import typer
-import subprocess
-import yaml
+from agent.run_agent_no_rich import run_agent as run_agent_no_rich
 from agent.run_agent import run_agent
-from agent.run_agent_test import run_agent_test
 from commit0.harness.constants import RUN_AIDER_LOG_DIR
-from pathlib import Path
+import subprocess
+from agent.agent_utils import write_agent_config
 
 agent_app = typer.Typer(
     no_args_is_help=True,
     add_completion=False,
     context_settings={"help_option_names": ["-h", "--help"]},
+    pretty_exceptions_show_locals=False,
     help="""
     This is the command for running agent on Commit-0.
 
@@ -17,7 +17,6 @@
     """,
 )
 
-dot_file_dir = Path(__file__).parent.parent
 
 class Colors:
     RESET = "\033[0m"
@@ -27,12 +26,6 @@ class Colors:
     ORANGE = "\033[95m"
 
 
-def write_agent_config(agent_config_file: str, agent_config: dict) -> None:
-    """Write the agent config to the file."""
-    with open(agent_config_file, "w") as f:
-        yaml.dump(agent_config, f)
-
-
 def check_aider_path() -> None:
     """Code adapted from https://github.com/modal-labs/modal-client/blob/a8ddd418f8c65b7e168a9125451eeb70da2b6203/modal/cli/entry_point.py#L55
 
@@ -135,7 +128,7 @@ def config(
         help="Path to the pre-commit config file",
     ),
     agent_config_file: str = typer.Option(
-        str(dot_file_dir / ".agent.yaml"),
+        ".agent.yaml",
         help="Path to the agent config file",
     ),
 ) -> None:
@@ -147,6 +140,8 @@ def config(
             f"Invalid {highlight('AGENT', Colors.RED)}. We only support aider for now",
             param_hint="AGENT",
         )
+    if use_user_prompt:
+        user_prompt = typer.prompt("Please enter your user prompt")
 
     agent_config = {
         "agent_name": agent_name,
@@ -184,32 +179,100 @@ def run(
         help="Test backend to run the agent on, ignore this option if you are not adding `test` option to agent",
     ),
     agent_config_file: str = typer.Option(
-        str(dot_file_dir / ".agent.yaml"),
+        ".agent.yaml",
         help="Path to the agent config file",
     ),
     log_dir: str = typer.Option(
-        str(dot_file_dir / RUN_AIDER_LOG_DIR),
+        str(RUN_AIDER_LOG_DIR.resolve()),
         help="Log directory to store the logs",
     ),
+    max_parallel_repos: int = typer.Option(
+        1,
+        help="Maximum number of repositories for agent to run in parallel",
+    ),
 ) -> None:
     """Run the agent on the repository."""
-    run_agent(experiment_name, override_previous_changes, backend, agent_config_file, log_dir)
+    run_agent(
+        experiment_name,
+        override_previous_changes,
+        backend,
+        agent_config_file,
+        log_dir,
+        max_parallel_repos,
+    )
+
+
+@agent_app.command()
+def run_test_no_rich(
+    experiment_name: str = typer.Argument(
+        ...,
+        help="Experiment name of current run",
+    ),
+    override_previous_changes: bool = typer.Option(
+        False,
+        help="If override the previous agent changes on `experiment_name` or run the agent continuously on the new changes",
+    ),
+    backend: str = typer.Option(
+        "modal",
+        help="Test backend to run the agent on, ignore this option if you are not adding `test` option to agent",
+    ),
+    agent_config_file: str = typer.Option(
+        ".agent.yaml",
+        help="Path to the agent config file",
+    ),
+    log_dir: str = typer.Option(
+        str(RUN_AIDER_LOG_DIR.resolve()),
+        help="Log directory to store the logs",
+    ),
+    max_parallel_repos: int = typer.Option(
+        1,
+        help="Maximum number of repositories for agent to run in parallel",
+    ),
+) -> None:
+    """Run the agent on the repository."""
+    run_agent_no_rich(
+        experiment_name,
+        override_previous_changes,
+        backend,
+        agent_config_file,
+        log_dir,
+        max_parallel_repos,
+    )
 
 
 @agent_app.command()
 def run_test(
     experiment_name: str = typer.Argument(
         ...,
-        help="Experiment name to run the agent on",
+        help="Experiment name of current run",
+    ),
+    override_previous_changes: bool = typer.Option(
+        False,
+        help="If override the previous agent changes on `experiment_name` or run the agent continuously on the new changes",
     ),
     backend: str = typer.Option(
         "modal",
-        help="Backend to run the agent on",
+        help="Test backend to run the agent on, ignore this option if you are not adding `test` option to agent",
     ),
-    agent_config_file: str = typer.Argument(
+    agent_config_file: str = typer.Option(
         ".agent.yaml",
         help="Path to the agent config file",
     ),
+    log_dir: str = typer.Option(
+        str(RUN_AIDER_LOG_DIR.resolve()),
+        help="Log directory to store the logs",
+    ),
+    max_parallel_repos: int = typer.Option(
+        1,
+        help="Maximum number of repositories for agent to run in parallel",
+    ),
 ) -> None:
     """Run the agent on the repository."""
-    run_agent_test(experiment_name, backend, agent_config_file)
+    run_agent(
+        experiment_name,
+        override_previous_changes,
+        backend,
+        agent_config_file,
+        log_dir,
+        max_parallel_repos,
+    )
diff --git a/agent/display.py b/agent/display.py
index cd176d5..8b219be 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -1,30 +1,46 @@
 from rich.console import Console, Group
 from rich.panel import Panel
-from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, ProgressColumn, Task
+from rich.progress import (
+    Progress,
+    SpinnerColumn,
+    BarColumn,
+    TextColumn,
+    ProgressColumn,
+    Task,
+)
 from rich.layout import Layout
 from rich.live import Live
 from rich.text import Text
 from rich.columns import Columns
-from rich.style import Style
 from rich.rule import Rule
 from rich.align import Align
-from collections import deque, OrderedDict
+from collections import OrderedDict
 from types import TracebackType
 
+
 class RepoBox:
     def __init__(self, name: str, style: str):
         self.name = name
         self.style = style
 
     def __rich__(self):
-        return Panel(Text(self.name, style=self.style), expand=False, border_style=self.style)
+        return Panel(
+            Text(self.name, style=self.style), expand=False, border_style=self.style
+        )
+
 
 class RepoProgressColumn(ProgressColumn):
+    """Custom progress column for displaying the progress of a repository."""
+
     def render(self, task: Task) -> Text:
+        """Render the progress of a repository."""
         return Text(f"{int(task.completed or 0)}/{int(task.total or 1)}")
 
+
 class OngoingRepo:
-    def __init__(self, name: str, current_file: str, finished_files: list[str], total_files: int):
+    def __init__(
+        self, name: str, current_file: str, finished_files: list[str], total_files: int
+    ):
         self.name = name
         self.current_file = current_file
         self.finished_files = finished_files
@@ -37,114 +53,247 @@ def __rich__(self):
             RepoProgressColumn(),
             TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
         )
-        task_id = progress.add_task("", total=self.total_files, completed=len(self.finished_files))
+        _ = progress.add_task(
+            "", total=self.total_files, completed=len(self.finished_files)
+        )
 
         content = [
-            Text(f"Current working file:", style="bold"),
+            Text("Current working file:", style="bold"),
             Text(self.current_file, style="green"),
             Rule(style="dim"),
             Text("Finished files (recent 5):", style="bold"),
-        ] + [Text(file, style="dim green") for file in self.finished_files[-6:-1]]
-        return Panel(Group(progress, *content), title=self.name, border_style="yellow", expand=True)
+        ] + [Text(file, style="dim green") for file in self.finished_files[-6:-1][::-1]]
+        return Panel(
+            Group(progress, *content),
+            title=self.name,
+            border_style="yellow",
+            expand=True,
+        )
+
 
 class TerminalDisplay:
     def __init__(self, total_repos: int):
         self.console = Console()
         self.total_repos = total_repos
-        self.unstarted_repos = []
+        self.not_started_repos = []
         self.finished_repos = []
         self.ongoing_repos = OrderedDict()
         self.finished_files = {}
         self.total_files_per_repo = {}
+        self.repo_money_spent = {}
 
         self.overall_progress = Progress(
             SpinnerColumn(),
             BarColumn(bar_width=None),
             TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
         )
-        self.overall_task = self.overall_progress.add_task("[green]Processing", total=total_repos)
-        
+        self.overall_task = self.overall_progress.add_task(
+            "[green]Processing", total=total_repos
+        )
+
         self.layout = Layout()
         self.layout.split_column(
-            Layout(name="progress", ratio=1),
+            Layout(name="progress", size=3),
+            Layout(name="info", size=6),
             Layout(name="main", ratio=14),
         )
         self.layout["progress"].split_row(
             Layout(name="pbar", ratio=4),
-            Layout(name="backend", ratio=1),
             Layout(name="money", ratio=1),
         )
-        self.layout["progress"]["pbar"].update(Panel(self.overall_progress, title="Overall Progress", border_style="blue"))
-        self.backend_display = Text(f"Backend Using: ", justify="center")
-        self.layout["progress"]["backend"].update(Panel(self.backend_display, title="Backend", border_style="cyan"))
-        self.money_display = Text(f"Money Spent So Far: $0.00", justify="center")
-        self.layout["progress"]["money"].update(Panel(self.money_display, title="$$$$", border_style="cyan"))
+
+        self.layout["progress"]["pbar"].update(
+            Panel(self.overall_progress, title="Overall Progress", border_style="blue")
+        )
+        self.money_display = Text("Money Spent So Far: $0.00", justify="center")
+        self.layout["progress"]["money"].update(
+            Panel(self.money_display, title="$$$$", border_style="blue")
+        )
+
+        self.layout["info"].split_column(
+            Layout(name="other_info", ratio=1),
+            Layout(name="agent_info", ratio=1),
+        )
+        self.layout["info"]["other_info"].split_row(
+            Layout(name="backend", ratio=1),
+            Layout(name="log_dir", ratio=1),
+        )
+        self.layout["info"]["agent_info"].split_row(
+            Layout(name="agent_name", ratio=1),
+            Layout(name="model_name", ratio=1),
+            Layout(name="run_tests", ratio=1),
+            Layout(name="use_repo_info", ratio=1),
+            Layout(name="use_unit_tests_info", ratio=1),
+            Layout(name="use_spec_info", ratio=1),
+            Layout(name="use_lint_info", ratio=1),
+        )
+
+        self.backend_display = Text("Using: ", justify="center")
+        self.layout["info"]["other_info"]["backend"].update(
+            Panel(self.backend_display, title="Backend", border_style="blue")
+        )
+        self.log_dir_display = Text("", justify="center")
+        self.layout["info"]["other_info"]["log_dir"].update(
+            Panel(self.log_dir_display, title="Log Directory", border_style="blue")
+        )
+
         self.layout["main"].split_row(
             Layout(name="left", ratio=1),
-            Layout(name="right", ratio=1),
+            Layout(name="right", ratio=2),
         )
         self.layout["main"]["left"].split_column(
-            Layout(name="unstarted", ratio=1),
+            Layout(name="not_started", ratio=1),
             Layout(name="finished", ratio=1),
         )
-        self.layout["right"].update(Panel(Layout(name="ongoing"), title="Ongoing", border_style="yellow"))
-        
         # Initialize panels with empty content
-        self.layout["left"]["unstarted"].update(Panel(Text(""), title="Unstarted Repos", border_style="red"))
-        self.layout["left"]["finished"].update(Panel(Text(""), title="Finished Repos", border_style="green"))
-        
-    def update_backend_display(self, backend: str):
+        self.layout["main"]["left"]["not_started"].update(
+            Panel(Text(""), title="Not Started Repos", border_style="red")
+        )
+        self.layout["main"]["left"]["finished"].update(
+            Panel(Text(""), title="Finished Repos", border_style="green")
+        )
+
+        self.layout["main"]["right"].update(
+            Panel(Layout(name="ongoing"), title="Ongoing", border_style="yellow")
+        )
+
+    def update_agent_display(
+        self,
+        agent_name: str,
+        model_name: str,
+        run_tests: bool,
+        use_repo_info: bool,
+        use_unit_tests_info: bool,
+        use_spec_info: bool,
+        use_lint_info: bool,
+    ) -> None:
+        """Update the agent display with the given agent information."""
+        info_items = [
+            ("agent_name", "Agent", agent_name),
+            ("model_name", "Model", model_name),
+            ("run_tests", "Run Tests", run_tests),
+            ("use_repo_info", "Use Repo Info", use_repo_info),
+            ("use_unit_tests_info", "Use Unit Tests", use_unit_tests_info),
+            ("use_spec_info", "Use Spec", use_spec_info),
+            ("use_lint_info", "Use Lint", use_lint_info),
+        ]
+
+        for attr_name, title, value in info_items:
+            text = Text(f"{value}", justify="center")
+            setattr(self, attr_name, text)
+            self.layout["info"]["agent_info"][attr_name].update(
+                Panel(text, title=title, border_style="blue")
+            )
+
+    def update_backend_display(self, backend: str) -> None:
+        """Update the backend display with the given backend."""
         self.backend_display = Text(f"Backend Using: {backend}", justify="center")
-        self.layout["progress"]["backend"].update(Panel(self.backend_display, title="Backend", border_style="green"))
+        self.layout["info"]["other_info"]["backend"].update(
+            Panel(self.backend_display, title="Backend", border_style="blue")
+        )
 
-    def update_money_display(self, money: float):
-        self.money_display = Text(f"Money Spent So Far: ${money:.2f}", justify="center")
-        self.layout["progress"]["money"].update(Panel(Align.center(self.money_display), title="$$$$"))
+    def update_log_dir_display(self, log_dir: str) -> None:
+        """Update the log directory display with the given log directory."""
+        self.log_dir_display = Text(f"{log_dir}", justify="center")
+        self.layout["info"]["other_info"]["log_dir"].update(
+            Panel(self.log_dir_display, title="Log Directory", border_style="blue")
+        )
 
-    def set_current_file(self, repo_name: str, file_name: str):
+    def update_money_display(
+        self, repo_name: str, file_name: str, money: float
+    ) -> None:
+        """Update the money display with the given money spent."""
+        self.repo_money_spent.setdefault(repo_name, {}).setdefault(file_name, 0.0)
+        self.repo_money_spent[repo_name][file_name] = money
+        total_money_spent_for_all_repos = sum(
+            sum(repo_money.values()) for repo_money in self.repo_money_spent.values()
+        )
+        self.money_display = Text(
+            f"Money Spent So Far: ${total_money_spent_for_all_repos:.2f}",
+            justify="center",
+        )
+        self.layout["progress"]["money"].update(
+            Panel(Align.center(self.money_display), title="$$$$", border_style="blue")
+        )
+
+    def set_current_file(self, repo_name: str, file_name: str) -> None:
+        """Set the current file for the given repository."""
         if repo_name not in self.ongoing_repos:
             # Start the repo if it's not yet tracked, but don't move it to the start
             self.start_repo(repo_name)
-        
+
         # Just update the file name without reordering the repos
         self.ongoing_repos[repo_name] = file_name
-        
+
         # Append the new file to finished files, keep the order intact
         self.finished_files.setdefault(repo_name, []).append(file_name)
 
         # Update the display
         self.update()
-    
-    def update(self):
-        # Update unstarted repos
-        unstarted_boxes = [RepoBox(repo, "red") for repo in self.unstarted_repos]
-        self.layout["left"]["unstarted"].update(Panel(Columns(unstarted_boxes), title="Not Started Repos", border_style="red"))
+
+    def update(self) -> None:
+        """Update the display with the current state of the repositories."""
+        # Update not_started repos
+        not_started_boxes = [RepoBox(repo, "red") for repo in self.not_started_repos]
+        self.layout["main"]["left"]["not_started"].update(
+            Panel(
+                Columns(not_started_boxes),
+                title="Not Started Repos",
+                border_style="red",
+            )
+        )
 
         # Update finished repos
         finished_boxes = [RepoBox(repo, "green") for repo in self.finished_repos]
-        self.layout["left"]["finished"].update(Panel(Columns(finished_boxes), title="Finished Repos", border_style="green"))
+        self.layout["main"]["left"]["finished"].update(
+            Panel(Columns(finished_boxes), title="Finished Repos", border_style="green")
+        )
 
         # Update ongoing repos with progress bars
-        ongoing_panels = [OngoingRepo(repo, self.ongoing_repos[repo], self.finished_files.get(repo, []), self.total_files_per_repo.get(repo, 1)) for repo in self.ongoing_repos]
+        ongoing_panels = [
+            OngoingRepo(
+                repo,
+                self.ongoing_repos[repo],
+                self.finished_files.get(repo, []),
+                self.total_files_per_repo.get(repo, 1),
+            )
+            for repo in self.ongoing_repos
+        ]
 
         if ongoing_panels:
             ongoing_layout = Layout()
-            for i, panel in enumerate(ongoing_panels):
+            for i, panel in enumerate(ongoing_panels[:5]):
                 ongoing_layout.add_split(Layout(panel, name=f"repo_{i}"))
-            ongoing_layout.split_column(*[ongoing_layout[f"repo_{i}"] for i in range(len(ongoing_panels))])
-            self.layout["right"].update(Panel(ongoing_layout, title="Ongoing", border_style="yellow"))
+            ongoing_layout.split_row(
+                *[ongoing_layout[f"repo_{i}"] for i in range(len(ongoing_panels[:5]))]
+            )
+            self.layout["main"]["right"].update(
+                Panel(
+                    ongoing_layout,
+                    title="Ongoing(Only first 5 shown if more than 5)",
+                    border_style="yellow",
+                )
+            )
         else:
-            self.layout["right"].update(Panel(Text("No ongoing repos"), title="Ongoing", border_style="yellow"))
+            self.layout["main"]["right"].update(
+                Panel(
+                    Text("Preparing to run repos..."),
+                    title="Ongoing",
+                    border_style="yellow",
+                )
+            )
 
-    def start_repo(self, repo_name: str, total_files: int = 0):
-        if repo_name in self.unstarted_repos:
-            self.unstarted_repos.remove(repo_name)
+    def start_repo(self, repo_name: str, total_files: int = 0) -> None:
+        """Start a repository."""
+        if repo_name in self.not_started_repos:
+            self.not_started_repos.remove(repo_name)
         self.ongoing_repos[repo_name] = ""
         self.finished_files[repo_name] = []
         self.total_files_per_repo[repo_name] = total_files
         self.update()
-        
-    def finish_repo(self, repo_name: str):
+
+    def finish_repo(self, repo_name: str) -> None:
+        """Finish a repository."""
         self.finished_repos.append(repo_name)
         if repo_name in self.ongoing_repos:
             del self.ongoing_repos[repo_name]
@@ -152,15 +301,24 @@ def finish_repo(self, repo_name: str):
             del self.finished_files[repo_name]
         self.overall_progress.update(self.overall_task, advance=1)
         self.update()
-        
-    def set_unstarted_repos(self, repos: list[str]):
-        self.unstarted_repos = repos
+
+    def set_not_started_repos(self, repos: list[str]) -> None:
+        """Set the repositories that have not started."""
+        self.not_started_repos = repos
         self.update()
-        
+
     def __enter__(self):
-        self.live = Live(self.layout, console=self.console, screen=True, refresh_per_second=4)
+        self.live = Live(
+            self.layout, console=self.console, screen=True, refresh_per_second=4
+        )
         self.live.start()
         return self
-        
-    def __exit__(self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None):
-        self.live.stop()
\ No newline at end of file
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: TracebackType | None,
+    ):
+        self.live.stop()
+        print("Agent finished running")
diff --git a/agent/run_agent.py b/agent/run_agent.py
index ca949da..232f023 100644
--- a/agent/run_agent.py
+++ b/agent/run_agent.py
@@ -1,19 +1,18 @@
 import os
-import sys
 import yaml
 import multiprocessing
-from tqdm import tqdm
 from datasets import load_dataset
 from git import Repo
-from agent.commit0_utils import (
+from agent.agent_utils import (
     args2string,
     create_branch,
     get_message,
     get_target_edit_files,
     get_lint_cmd,
+    read_yaml_config,
 )
 from agent.agents import AiderAgents
-from typing import Optional, Type
+from typing import Optional, Type, cast
 from types import TracebackType
 from agent.class_types import AgentConfig
 from commit0.harness.constants import SPLIT
@@ -22,6 +21,10 @@
 from commit0.cli import read_commit0_dot_file
 from pathlib import Path
 from datetime import datetime
+from agent.display import TerminalDisplay
+import queue
+import time
+
 
 class DirContext:
     def __init__(self, d: str):
@@ -40,18 +43,11 @@ def __exit__(
         os.chdir(self.cwd)
 
 
-def read_yaml_config(config_file: str) -> dict:
-    """Read the yaml config from the file."""
-    if not os.path.exists(config_file):
-        raise FileNotFoundError(f"The config file '{config_file}' does not exist.")
-    with open(config_file, "r") as f:
-        return yaml.load(f, Loader=yaml.FullLoader)
-
-
 def run_agent_for_repo(
     repo_base_dir: str,
     agent_config: AgentConfig,
     example: RepoInstance,
+    update_queue: multiprocessing.Queue,
     experiment_name: Optional[str] = None,
     override_previous_changes: bool = False,
     backend: str = "modal",
@@ -63,10 +59,15 @@ def run_agent_for_repo(
 
     repo_name = repo_name.lower()
     repo_name = repo_name.replace(".", "-")
-    
+
+    # before starting, display all information to terminal
+    update_queue.put(("start_repo", (repo_name, 0)))
+
     repo_path = os.path.join(repo_base_dir, repo_name)
     repo_path = os.path.abspath(repo_path)
 
+    src_dir = os.path.join(repo_path, example["src_dir"])
+
     try:
         local_repo = Repo(repo_path)
     except Exception:
@@ -94,7 +95,12 @@ def run_agent_for_repo(
         local_repo.git.reset("--hard", example["base_commit"])
 
     # prepare the log dir
-    experiment_log_dir = Path(log_dir) / repo_name / experiment_name / datetime.now().strftime("%Y-%m-%d")
+    experiment_log_dir = (
+        Path(log_dir)
+        / repo_name
+        / experiment_name
+        / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+    )
     experiment_log_dir.mkdir(parents=True, exist_ok=True)
 
     # write agent_config to .agent.yaml in the log_dir for record
@@ -102,51 +108,78 @@ def run_agent_for_repo(
     with open(agent_config_log_file, "w") as agent_config_file:
         yaml.dump(agent_config, agent_config_file)
 
+    # TODO: make this path more general
+    commit0_dot_file_path = str(Path(repo_path).parent.parent / ".commit0.yaml")
+
     with DirContext(repo_path):
         if agent_config is None:
             raise ValueError("Invalid input")
 
-        target_edit_files = get_target_edit_files(repo_path)
+        target_edit_files = get_target_edit_files(
+            src_dir, src_prefix=example["src_dir"]
+        )
 
         if agent_config.run_tests:
             # Call the commit0 get-tests command to retrieve test files
             test_files_str = get_tests(repo_name, verbose=0)
             test_files = sorted(list(set([i.split(":")[0] for i in test_files_str])))
 
+            update_queue.put(("start_repo", (repo_name, len(test_files))))
             # when unit test feedback is available, iterate over test files
             for test_file in test_files:
-                test_cmd = (
-                    f"python -m commit0 test {repo_path} {test_file} --branch {experiment_name} --backend {backend}"
-                )
+                update_queue.put(("set_current_file", (repo_name, test_file)))
+                test_cmd = f"python -m commit0 test {repo_path} {test_file} --branch {experiment_name} --backend {backend} --commit0_dot_file_path {commit0_dot_file_path}"
                 test_file_name = test_file.replace(".py", "").replace("/", "__")
                 test_log_dir = experiment_log_dir / test_file_name
                 lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
                 message = get_message(agent_config, repo_path, test_file=test_file)
-                agent.run(
+
+                # display the test file to terminal
+                agent_return = agent.run(
                     message,
                     test_cmd,
                     lint_cmd,
                     target_edit_files,
                     test_log_dir,
                 )
+                # after running the agent, update the money display
+                update_queue.put(
+                    (
+                        "update_money_display",
+                        (repo_name, test_file, agent_return.last_cost),
+                    )
+                )
         else:
             # when unit test feedback is not available, iterate over target files to edit
             message = get_message(
                 agent_config, repo_path, test_dir=example["test"]["test_dir"]
             )
-            
+
+            update_queue.put(("start_repo", (repo_name, len(target_edit_files))))
             for f in target_edit_files:
+                update_queue.put(("set_current_file", (repo_name, f)))
                 file_name = f.replace(".py", "").replace("/", "__")
                 file_log_dir = experiment_log_dir / file_name
                 lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
-                agent.run(message, "", lint_cmd, [f], file_log_dir)
-
+                agent_return = agent.run(message, "", lint_cmd, [f], file_log_dir)
+                update_queue.put(
+                    (
+                        "update_money_display",
+                        (repo_name, file_name, agent_return.last_cost),
+                    )
+                )
+    update_queue.put(("finish_repo", repo_name))
 
-def run_agent(experiment_name: str, override_previous_changes: bool, backend: str, agent_config_file: str, log_dir: str) -> None:
-    """Main function to run Aider for a given repository.
 
-    Will run in parallel for each repo.
-    """
+def run_agent(
+    experiment_name: str,
+    override_previous_changes: bool,
+    backend: str,
+    agent_config_file: str,
+    log_dir: str,
+    max_parallel_repos: int,
+) -> None:
+    """Main function to run Aider for a given repository."""
     config = read_yaml_config(agent_config_file)
 
     agent_config = AgentConfig(**config)
@@ -170,26 +203,86 @@ def run_agent(experiment_name: str, override_previous_changes: bool, backend: st
     ]
     assert len(filtered_dataset) > 0, "No examples available"
 
-    if len(filtered_dataset) > 1:
-        sys.stdout = open(os.devnull, "w")
-
-    run_agent_for_repo(commit0_config["base_dir"], agent_config, filtered_dataset[0], experiment_name=experiment_name, override_previous_changes=override_previous_changes, backend=backend, log_dir=log_dir)
-    # with tqdm(
-    #     total=len(filtered_dataset), smoothing=0, desc="Running Aider for repos"
-    # ) as pbar:
-    #     with multiprocessing.Pool(processes=3) as pool:
-    #         results = []
-
-    #         # Use apply_async to submit jobs and add progress bar updates
-    #         for example in filtered_dataset:
-    #             result = pool.apply_async(
-    #                 run_agent_for_repo,
-    #                 args=(commit0_config["base_dir"], agent_config, example),
-    #                 callback=lambda _: pbar.update(
-    #                     1
-    #                 ),  # Update progress bar on task completion
-    #             )
-    #             results.append(result)
-
-    #         for result in results:
-    #             result.wait()
+    # if len(filtered_dataset) > 1:
+    #     sys.stdout = open(os.devnull, "w")
+
+    with TerminalDisplay(len(filtered_dataset)) as display:
+        not_started_repos = [
+            getattr(example, "repo", "").split("/")[-1] for example in filtered_dataset
+        ]
+        display.set_not_started_repos(not_started_repos)
+
+        display.update_backend_display(backend)
+        display.update_log_dir_display(log_dir)
+        display.update_agent_display(
+            agent_config.agent_name,
+            agent_config.model_name,
+            agent_config.run_tests,
+            agent_config.use_repo_info,
+            agent_config.use_unit_tests_info,
+            agent_config.use_spec_info,
+            agent_config.use_lint_info,
+        )
+
+        with multiprocessing.Manager() as manager:
+            update_queue = manager.Queue()
+            with multiprocessing.Pool(processes=max_parallel_repos) as pool:
+                results = []
+
+                # Use apply_async to submit jobs and add progress bar updates
+                for example in filtered_dataset:
+                    result = pool.apply_async(
+                        run_agent_for_repo,
+                        args=(
+                            commit0_config["base_dir"],
+                            agent_config,
+                            cast(RepoInstance, example),
+                            update_queue,
+                            experiment_name,
+                            override_previous_changes,
+                            backend,
+                            log_dir,
+                        ),
+                    )
+                    results.append(result)
+
+                while any(not r.ready() for r in results):
+                    try:
+                        while not update_queue.empty():
+                            action, data = update_queue.get_nowait()
+                            if action == "start_repo":
+                                repo_name, total_files = data
+                                display.start_repo(repo_name, total_files)
+                            elif action == "finish_repo":
+                                repo_name = data
+                                display.finish_repo(repo_name)
+                            elif action == "set_current_file":
+                                repo_name, file_name = data
+                                display.set_current_file(repo_name, file_name)
+                            elif action == "update_money_display":
+                                repo_name, file_name, money_spent = data
+                                display.update_money_display(
+                                    repo_name, file_name, money_spent
+                                )
+                    except queue.Empty:
+                        pass
+                    time.sleep(0.1)  # Small delay to prevent busy-waiting
+
+                # Final update after all repos are processed
+                while not update_queue.empty():
+                    action, data = update_queue.get()
+                    if action == "start_repo":
+                        repo_name, total_files = data
+                        display.start_repo(repo_name, total_files)
+                    elif action == "finish_repo":
+                        repo_name = data
+                        display.finish_repo(repo_name)
+                    elif action == "set_current_file":
+                        repo_name, file_name = data
+                        display.set_current_file(repo_name, file_name)
+                    elif action == "update_money_display":
+                        repo_name, file_name, money_spent = data
+                        display.update_money_display(repo_name, file_name, money_spent)
+
+                for result in results:
+                    result.get()
diff --git a/agent/run_agent_no_rich.py b/agent/run_agent_no_rich.py
new file mode 100644
index 0000000..aadc99d
--- /dev/null
+++ b/agent/run_agent_no_rich.py
@@ -0,0 +1,212 @@
+import os
+import yaml
+import multiprocessing
+from tqdm import tqdm
+from datasets import load_dataset
+from git import Repo
+from agent.agent_utils import (
+    args2string,
+    create_branch,
+    get_message,
+    get_target_edit_files,
+    get_lint_cmd,
+    read_yaml_config,
+)
+from agent.agents import AiderAgents
+from typing import Optional, Type, cast
+from types import TracebackType
+from agent.class_types import AgentConfig
+from commit0.harness.constants import SPLIT
+from commit0.harness.get_pytest_ids import main as get_tests
+from commit0.harness.constants import RUN_AIDER_LOG_DIR, RepoInstance
+from commit0.cli import read_commit0_dot_file
+from pathlib import Path
+from datetime import datetime
+
+
+class DirContext:
+    def __init__(self, d: str):
+        self.dir = d
+        self.cwd = os.getcwd()
+
+    def __enter__(self):
+        os.chdir(self.dir)
+
+    def __exit__(
+        self,
+        exctype: Optional[Type[BaseException]],
+        excinst: Optional[BaseException],
+        exctb: Optional[TracebackType],
+    ) -> None:
+        os.chdir(self.cwd)
+
+
+def run_agent_for_repo(
+    repo_base_dir: str,
+    agent_config: AgentConfig,
+    example: RepoInstance,
+    experiment_name: Optional[str] = None,
+    override_previous_changes: bool = False,
+    backend: str = "modal",
+    log_dir: str = str(RUN_AIDER_LOG_DIR.resolve()),
+) -> None:
+    """Run Aider for a given repository."""
+    # get repo info
+    _, repo_name = example["repo"].split("/")
+
+    repo_name = repo_name.lower()
+    repo_name = repo_name.replace(".", "-")
+
+    repo_path = os.path.join(repo_base_dir, repo_name)
+    repo_path = os.path.abspath(repo_path)
+
+    src_dir = os.path.join(repo_path, example["src_dir"])
+
+    try:
+        local_repo = Repo(repo_path)
+    except Exception:
+        raise Exception(
+            f"{repo_path} is not a git repo. Check if base_dir is correctly specified."
+        )
+
+    if agent_config.agent_name == "aider":
+        agent = AiderAgents(agent_config.max_iteration, agent_config.model_name)
+    else:
+        raise NotImplementedError(
+            f"{agent_config.agent_name} is not implemented; please add your implementations in baselines/agents.py."
+        )
+
+    # if branch_name is not provided, create a new branch name based on agent_config
+    if experiment_name is None:
+        experiment_name = args2string(agent_config)
+
+    create_branch(local_repo, experiment_name, example["base_commit"])
+
+    # in cases where the latest commit of branch is not commit 0
+    # set it back to commit 0
+    latest_commit = local_repo.commit(experiment_name)
+    if latest_commit.hexsha != example["base_commit"] and override_previous_changes:
+        local_repo.git.reset("--hard", example["base_commit"])
+
+    # prepare the log dir
+    experiment_log_dir = (
+        Path(log_dir)
+        / repo_name
+        / experiment_name
+        / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+    )
+    experiment_log_dir.mkdir(parents=True, exist_ok=True)
+
+    # write agent_config to .agent.yaml in the log_dir for record
+    agent_config_log_file = experiment_log_dir / ".agent.yaml"
+    with open(agent_config_log_file, "w") as agent_config_file:
+        yaml.dump(agent_config, agent_config_file)
+
+    # TODO: make this path more general
+    commit0_dot_file_path = str(Path(repo_path).parent.parent / ".commit0.yaml")
+    with DirContext(repo_path):
+        if agent_config is None:
+            raise ValueError("Invalid input")
+
+        target_edit_files = get_target_edit_files(
+            src_dir, src_prefix=example["src_dir"]
+        )
+
+        if agent_config.run_tests:
+            # Call the commit0 get-tests command to retrieve test files
+            test_files_str = get_tests(repo_name, verbose=0)
+            test_files = sorted(list(set([i.split(":")[0] for i in test_files_str])))
+
+            # when unit test feedback is available, iterate over test files
+            for test_file in test_files:
+                test_cmd = f"python -m commit0 test {repo_path} {test_file} --branch {experiment_name} --backend {backend} --commit0_dot_file_path {commit0_dot_file_path}"
+                test_file_name = test_file.replace(".py", "").replace("/", "__")
+                test_log_dir = experiment_log_dir / test_file_name
+                lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
+                message = get_message(agent_config, repo_path, test_file=test_file)
+                _ = agent.run(
+                    message,
+                    test_cmd,
+                    lint_cmd,
+                    target_edit_files,
+                    test_log_dir,
+                )
+                # cost = agent_return.last_cost
+        else:
+            # when unit test feedback is not available, iterate over target files to edit
+            message = get_message(
+                agent_config, repo_path, test_dir=example["test"]["test_dir"]
+            )
+            for f in target_edit_files:
+                file_name = f.replace(".py", "").replace("/", "__")
+                file_log_dir = experiment_log_dir / file_name
+                lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
+                _ = agent.run(message, "", lint_cmd, [f], file_log_dir)
+                # cost = agent_return.last_cost
+
+
+def run_agent(
+    experiment_name: str,
+    override_previous_changes: bool,
+    backend: str,
+    agent_config_file: str,
+    log_dir: str,
+    max_parallel_repos: int,
+) -> None:
+    """Main function to run Aider for a given repository.
+
+    Will run in parallel for each repo.
+    """
+    config = read_yaml_config(agent_config_file)
+
+    agent_config = AgentConfig(**config)
+
+    commit0_config = read_commit0_dot_file(".commit0.yaml")
+
+    dataset = load_dataset(
+        commit0_config["dataset_name"], split=commit0_config["dataset_split"]
+    )
+    filtered_dataset = [
+        example
+        for example in dataset
+        if commit0_config["repo_split"] == "all"
+        or (
+            isinstance(example, dict)
+            and "repo" in example
+            and isinstance(example["repo"], str)
+            and example["repo"].split("/")[-1]
+            in SPLIT.get(commit0_config["repo_split"], [])
+        )
+    ]
+    assert len(filtered_dataset) > 0, "No examples available"
+
+    # if len(filtered_dataset) > 1:
+    #     sys.stdout = open(os.devnull, "w")
+
+    with tqdm(
+        total=len(filtered_dataset), smoothing=0, desc="Running Aider for repos"
+    ) as pbar:
+        with multiprocessing.Pool(processes=max_parallel_repos) as pool:
+            results = []
+
+            # Use apply_async to submit jobs and add progress bar updates
+            for example in filtered_dataset:
+                result = pool.apply_async(
+                    run_agent_for_repo,
+                    args=(
+                        commit0_config["base_dir"],
+                        agent_config,
+                        cast(RepoInstance, example),
+                        experiment_name,
+                        override_previous_changes,
+                        backend,
+                        log_dir,
+                    ),
+                    callback=lambda _: pbar.update(
+                        1
+                    ),  # Update progress bar on task completion
+                )
+                results.append(result)
+
+            for result in results:
+                result.wait()
diff --git a/agent/run_agent_test.py b/agent/run_agent_test.py
deleted file mode 100644
index c310f34..0000000
--- a/agent/run_agent_test.py
+++ /dev/null
@@ -1,243 +0,0 @@
-import os
-import sys
-import yaml
-import multiprocessing
-from tqdm import tqdm
-import queue  # Add this import
-from datasets import load_dataset
-from git import Repo
-from agent.commit0_utils import (
-    args2string,
-    create_branch,
-    get_message,
-    get_target_edit_files,
-    get_lint_cmd,
-)
-from agent.agents import AiderAgents
-from typing import Optional, Type
-from types import TracebackType
-from agent.class_types import AgentConfig
-from commit0.harness.constants import SPLIT
-from commit0.harness.get_pytest_ids import main as get_tests
-from commit0.harness.constants import RUN_AIDER_LOG_DIR, RepoInstance
-from commit0.cli import read_commit0_dot_file
-import time
-import random
-import multiprocessing
-from agent.display import TerminalDisplay
-
-class DirContext:
-    def __init__(self, d: str):
-        self.dir = d
-        self.cwd = os.getcwd()
-
-    def __enter__(self):
-        os.chdir(self.dir)
-
-    def __exit__(
-        self,
-        exctype: Optional[Type[BaseException]],
-        excinst: Optional[BaseException],
-        exctb: Optional[TracebackType],
-    ) -> None:
-        os.chdir(self.cwd)
-
-
-def read_yaml_config(config_file: str) -> dict:
-    """Read the yaml config from the file."""
-    if not os.path.exists(config_file):
-        raise FileNotFoundError(f"The config file '{config_file}' does not exist.")
-    with open(config_file, "r") as f:
-        return yaml.load(f, Loader=yaml.FullLoader)
-
-
-def run_agent_for_repo(
-    repo_base_dir: str,
-    agent_config: AgentConfig,
-    example: RepoInstance,
-    update_queue: multiprocessing.Queue
-) -> None:
-    """Run Aider for a given repository."""
-    # get repo info
-    _, repo_name = example["repo"].split("/")
-    update_queue.put(("start_repo", (repo_name, 0)))
-    repo_name = repo_name.lower()
-    if repo_name != "web3.py":
-        repo_name = repo_name.replace(".", "-")
-
-    # Call the commit0 get-tests command to retrieve test files
-    test_files_str = get_tests(repo_name, verbose=0)
-    test_files = sorted(list(set([i.split(":")[0] for i in test_files_str])))
-
-    repo_path = os.path.join(repo_base_dir, repo_name)
-    repo_path = os.path.abspath(repo_path)
-    try:
-        local_repo = Repo(repo_path)
-    except Exception:
-        raise Exception(
-            f"{repo_path} is not a git repo. Check if base_dir is correctly specified."
-        )
-
-    if agent_config.agent_name == "aider":
-        agent = AiderAgents(agent_config.max_iteration, agent_config.model_name)
-    else:
-        raise NotImplementedError(
-            f"{agent_config.agent_name} is not implemented; please add your implementations in baselines/agents.py."
-        )
-
-    run_id = args2string(agent_config)
-    run_id = run_id.replace("run_tests-1", "run_tests-0")
-    # print(f"Agent is coding on branch: {run_id}", file=sys.stderr)
-    create_branch(local_repo, run_id, example["base_commit"])
-    latest_commit = local_repo.commit(run_id)
-    # in cases where the latest commit of branch is not commit 0
-    # set it back to commit 0
-    # TODO: ask user for permission
-    if latest_commit.hexsha != example["base_commit"]:
-        local_repo.git.reset("--hard", example["base_commit"])
-    target_edit_files = get_target_edit_files(repo_path)
-
-    # Determine the total number of files (either test files or target files)
-    total_files = len(test_files) if agent_config.run_tests else len(target_edit_files)
-
-    # Notify the display to start tracking this repo, pass the total number of files
-    update_queue.put(("start_repo", (repo_name, total_files)))
-    
-    with DirContext(repo_path):
-        if agent_config is None:
-            raise ValueError("Invalid input")
-
-        if agent_config.run_tests:
-            # when unit test feedback is available, iterate over test files
-            for test_file in test_files:
-                update_queue.put(("set_current_file", (repo_name, test_file)))
-                sleep_time = random.randint(1,3) # Random sleep time between 1 and 5 seconds
-                time.sleep(sleep_time)
-                update_queue.put(("update_money_display", random.random()))
-                continue
-                test_cmd = (
-                    f"python -m commit0 test {repo_path} {test_file} --branch {run_id} --backend {agent_config.backend}"
-                )
-                test_file_name = test_file.replace(".py", "").replace("/", "__")
-                log_dir = RUN_AIDER_LOG_DIR / "with_tests" / test_file_name
-                lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
-                message = get_message(agent_config, repo_path, test_file=test_file)
-                agent.run(
-                    message,
-                    test_cmd,
-                    lint_cmd,
-                    target_edit_files,
-                    log_dir,
-                )
-        else:
-            # when unit test feedback is not available, iterate over target files to edit
-            message = get_message(
-                agent_config, repo_path, test_dir=example["test"]["test_dir"]
-            )
-            agent_config_log_file = os.path.abspath(
-                RUN_AIDER_LOG_DIR / "no_tests" / ".agent.yaml"
-            )
-            os.makedirs(os.path.dirname(agent_config_log_file), exist_ok=True)
-            # write agent_config to .agent.yaml
-            with open(agent_config_log_file, "w") as agent_config_file:
-                yaml.dump(agent_config, agent_config_file)
-
-            for f in target_edit_files:
-                update_queue.put(("set_current_file", (repo_name, f)))
-                sleep_time = random.randint(1,3) # Random sleep time between 1 and 5 seconds
-                time.sleep(sleep_time)
-                update_queue.put(("update_money_display", random.random()))
-                continue
-                file_name = f.replace(".py", "").replace("/", "__")
-                log_dir = RUN_AIDER_LOG_DIR / "no_tests" / file_name
-                lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
-                agent.run(message, "", lint_cmd, [f], log_dir)
-    update_queue.put(("finish_repo", repo_name))
-
-def run_agent_test(agent_config_file: str) -> None:
-    """Main function to run Aider for a given repository."""
-    config = read_yaml_config(agent_config_file)
-    agent_config = AgentConfig(**config)
-    commit0_config = read_commit0_dot_file(".commit0.yaml")
-
-    dataset = load_dataset(
-        commit0_config["dataset_name"], split=commit0_config["dataset_split"]
-    )
-    filtered_dataset = [
-        example
-        for example in dataset
-        if commit0_config["repo_split"] == "all"
-        or (
-            isinstance(example, dict)
-            and "repo" in example
-            and isinstance(example["repo"], str)
-            and example["repo"].split("/")[-1]
-            in SPLIT.get(commit0_config["repo_split"], [])
-        )
-    ]
-
-    assert len(filtered_dataset) > 0, "No examples available"
-
-    with TerminalDisplay(len(filtered_dataset)) as display:
-        unstarted_repos = [example["repo"].split("/")[-1] for example in filtered_dataset]
-        display.set_unstarted_repos(unstarted_repos)
-
-        display.update_backend_display(agent_config.backend)
-        total_money_spent = 0
-        
-        with multiprocessing.Manager() as manager:
-            update_queue = manager.Queue()
-
-            
-
-            update_queue.put(("update_backend_display", agent_config.backend))
-            with multiprocessing.Pool(processes=3) as pool:
-                results = []
-
-                for example in filtered_dataset:
-                    result = pool.apply_async(
-                        run_agent_for_repo,
-                        args=(commit0_config["base_dir"], agent_config, example, update_queue)
-                    )
-                    results.append(result)
-
-                while any(not r.ready() for r in results):
-                    try:
-                        while not update_queue.empty():
-                            action, data = update_queue.get_nowait()
-                            if action == "start_repo":
-                                repo_name, total_files = data
-                                display.start_repo(repo_name, total_files)
-                            elif action == "finish_repo":
-                                repo_name = data
-                                display.finish_repo(repo_name)
-                            elif action == "set_current_file":
-                                repo_name, file_name = data
-                                display.set_current_file(repo_name, file_name)
-                            elif action == "update_money_display":
-                                money_spent = data
-                                total_money_spent += money_spent
-                                display.update_money_display(total_money_spent)
-                    except queue.Empty:
-                        pass
-                    time.sleep(0.1)  # Small delay to prevent busy-waiting
-
-                # Final update after all repos are processed
-                while not update_queue.empty():
-                    action, data = update_queue.get()
-                    if action == "start_repo":
-                        repo_name, total_files = data
-                        display.start_repo(repo_name, total_files)
-                    elif action == "finish_repo":
-                        repo_name = data
-                        display.finish_repo(repo_name)
-                    elif action == "set_current_file":
-                        repo_name, file_name = data
-                        display.set_current_file(repo_name, file_name)
-                    elif action == "update_money_display":
-                        money_spent = data
-                        total_money_spent += money_spent
-                        display.update_money_display(total_money_spent)
-
-                for result in results:
-                    result.get()
\ No newline at end of file
diff --git a/commit0/cli.py b/commit0/cli.py
index e0e61ea..896465b 100644
--- a/commit0/cli.py
+++ b/commit0/cli.py
@@ -15,6 +15,7 @@
 import yaml
 import os
 import sys
+
 commit0_app = typer.Typer(
     no_args_is_help=True,
     add_completion=False,
@@ -284,7 +285,6 @@ def test(
     )
 
 
-
 @commit0_app.command()
 def evaluate(
     branch: Union[str, None] = typer.Option(

From 4cedc348fe3f1bb0778932833c8e7881cd14c9ae Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Tue, 24 Sep 2024 18:42:34 -0400
Subject: [PATCH 05/16] minor update

---
 agent/agents.py    | 26 +++++++++++++-------------
 agent/cli.py       | 43 +++++--------------------------------------
 agent/display.py   | 18 ++++++++++++++----
 agent/run_agent.py |  6 ++++--
 4 files changed, 36 insertions(+), 57 deletions(-)

diff --git a/agent/agents.py b/agent/agents.py
index 053e12c..a52d3f4 100644
--- a/agent/agents.py
+++ b/agent/agents.py
@@ -111,19 +111,19 @@ def run(
         coder.stream = True
 
         # Run the agent
-        # coder.run(message)
-
-        #### TMP
-        import time
-        import random
-
-        time.sleep(random.random() * 5)
-        n = random.random() / 10
-        with open(log_file, "a") as f:
-            f.write(
-                f"> Tokens: 33k sent, 1.3k received. Cost: $0.12 message, ${n} session.  \n"
-            )
-        #### TMP
+        coder.run(message)
+
+        # #### TMP
+        # import time
+        # import random
+
+        # time.sleep(random.random() * 5)
+        # n = random.random() / 10
+        # with open(log_file, "a") as f:
+        #     f.write(
+        #         f"> Tokens: 33k sent, 1.3k received. Cost: $0.12 message, ${n} session.  \n"
+        #     )
+        # #### TMP
 
         # Close redirected stdout and stderr
         sys.stdout.close()
diff --git a/agent/cli.py b/agent/cli.py
index 82d9d40..5e29bcf 100644
--- a/agent/cli.py
+++ b/agent/cli.py
@@ -190,6 +190,10 @@ def run(
         1,
         help="Maximum number of repositories for agent to run in parallel",
     ),
+    display_repo_progress_num: int = typer.Option(
+        5,
+        help="Display the agent progress",
+    ),
 ) -> None:
     """Run the agent on the repository."""
     run_agent(
@@ -199,6 +203,7 @@ def run(
         agent_config_file,
         log_dir,
         max_parallel_repos,
+        display_repo_progress_num,
     )
 
 
@@ -238,41 +243,3 @@ def run_test_no_rich(
         log_dir,
         max_parallel_repos,
     )
-
-
-@agent_app.command()
-def run_test(
-    experiment_name: str = typer.Argument(
-        ...,
-        help="Experiment name of current run",
-    ),
-    override_previous_changes: bool = typer.Option(
-        False,
-        help="If override the previous agent changes on `experiment_name` or run the agent continuously on the new changes",
-    ),
-    backend: str = typer.Option(
-        "modal",
-        help="Test backend to run the agent on, ignore this option if you are not adding `test` option to agent",
-    ),
-    agent_config_file: str = typer.Option(
-        ".agent.yaml",
-        help="Path to the agent config file",
-    ),
-    log_dir: str = typer.Option(
-        str(RUN_AIDER_LOG_DIR.resolve()),
-        help="Log directory to store the logs",
-    ),
-    max_parallel_repos: int = typer.Option(
-        1,
-        help="Maximum number of repositories for agent to run in parallel",
-    ),
-) -> None:
-    """Run the agent on the repository."""
-    run_agent(
-        experiment_name,
-        override_previous_changes,
-        backend,
-        agent_config_file,
-        log_dir,
-        max_parallel_repos,
-    )
diff --git a/agent/display.py b/agent/display.py
index 8b219be..66b2bdb 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -81,6 +81,7 @@ def __init__(self, total_repos: int):
         self.finished_files = {}
         self.total_files_per_repo = {}
         self.repo_money_spent = {}
+        self.display_repo_progress_num = 5
 
         self.overall_progress = Progress(
             SpinnerColumn(),
@@ -157,6 +158,10 @@ def __init__(self, total_repos: int):
             Panel(Layout(name="ongoing"), title="Ongoing", border_style="yellow")
         )
 
+    def update_repo_progress_num(self, display_repo_progress_num: int) -> None:
+        """Update the number of repositories to display in the ongoing section."""
+        self.display_repo_progress_num = display_repo_progress_num
+
     def update_agent_display(
         self,
         agent_name: str,
@@ -262,15 +267,20 @@ def update(self) -> None:
 
         if ongoing_panels:
             ongoing_layout = Layout()
-            for i, panel in enumerate(ongoing_panels[:5]):
+            for i, panel in enumerate(ongoing_panels[: self.display_repo_progress_num]):
                 ongoing_layout.add_split(Layout(panel, name=f"repo_{i}"))
-            ongoing_layout.split_row(
-                *[ongoing_layout[f"repo_{i}"] for i in range(len(ongoing_panels[:5]))]
+            ongoing_layout.split_column(
+                *[
+                    ongoing_layout[f"repo_{i}"]
+                    for i in range(
+                        len(ongoing_panels[: self.display_repo_progress_num])
+                    )
+                ]
             )
             self.layout["main"]["right"].update(
                 Panel(
                     ongoing_layout,
-                    title="Ongoing(Only first 5 shown if more than 5)",
+                    title=f"Ongoing(only show at most {self.display_repo_progress_num} repos, set with `--display_repo_progress_num` flag)",
                     border_style="yellow",
                 )
             )
diff --git a/agent/run_agent.py b/agent/run_agent.py
index 232f023..bcb474f 100644
--- a/agent/run_agent.py
+++ b/agent/run_agent.py
@@ -178,6 +178,7 @@ def run_agent(
     agent_config_file: str,
     log_dir: str,
     max_parallel_repos: int,
+    display_repo_progress_num: int,
 ) -> None:
     """Main function to run Aider for a given repository."""
     config = read_yaml_config(agent_config_file)
@@ -208,10 +209,12 @@ def run_agent(
 
     with TerminalDisplay(len(filtered_dataset)) as display:
         not_started_repos = [
-            getattr(example, "repo", "").split("/")[-1] for example in filtered_dataset
+            cast(RepoInstance, example)["repo"].split("/")[-1]
+            for example in filtered_dataset
         ]
         display.set_not_started_repos(not_started_repos)
 
+        display.update_repo_progress_num(display_repo_progress_num)
         display.update_backend_display(backend)
         display.update_log_dir_display(log_dir)
         display.update_agent_display(
@@ -223,7 +226,6 @@ def run_agent(
             agent_config.use_spec_info,
             agent_config.use_lint_info,
         )
-
         with multiprocessing.Manager() as manager:
             update_queue = manager.Queue()
             with multiprocessing.Pool(processes=max_parallel_repos) as pool:

From 49998303a23bb9eb0481da6015d76a804437a01d Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Tue, 24 Sep 2024 20:12:20 -0400
Subject: [PATCH 06/16] minor update

---
 agent/display.py   | 23 +++++++++++++++++++++++
 agent/run_agent.py | 23 ++++++++++++++++++++++-
 2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/agent/display.py b/agent/display.py
index 66b2bdb..dae865c 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -100,12 +100,17 @@ def __init__(self, total_repos: int):
         )
         self.layout["progress"].split_row(
             Layout(name="pbar", ratio=4),
+            Layout(name="time", ratio=1),
             Layout(name="money", ratio=1),
         )
 
         self.layout["progress"]["pbar"].update(
             Panel(self.overall_progress, title="Overall Progress", border_style="blue")
         )
+        self.time_display = Text("Time Spent So Far: 0s", justify="center")
+        self.layout["progress"]["time"].update(
+            Panel(self.time_display, title="$$$$", border_style="blue")
+        )
         self.money_display = Text("Money Spent So Far: $0.00", justify="center")
         self.layout["progress"]["money"].update(
             Panel(self.money_display, title="$$$$", border_style="blue")
@@ -190,6 +195,24 @@ def update_agent_display(
                 Panel(text, title=title, border_style="blue")
             )
 
+    def update_time_display(self, time_in_seconds: int) -> None:
+        """Update the time display with the given time."""
+        days, remainder = divmod(time_in_seconds, 86400)
+        hours, remainder = divmod(remainder, 3600)
+        minutes, seconds = divmod(remainder, 60)
+        if days > 0:
+            time_str = f"{days}d {hours:02d}h {minutes:02d}m {seconds:02d}s"
+        elif hours > 0:
+            time_str = f"{hours:02d}h {minutes:02d}m {seconds:02d}s"
+        elif minutes > 0:
+            time_str = f"{minutes:02d}m {seconds:02d}s"
+        else:
+            time_str = f"{seconds:02d}s"
+        self.time_display = Text(f"Time Spent So Far: {time_str}", justify="center")
+        self.layout["progress"]["time"].update(
+            Panel(self.time_display, title="Time", border_style="blue")
+        )
+
     def update_backend_display(self, backend: str) -> None:
         """Update the backend display with the given backend."""
         self.backend_display = Text(f"Backend Using: {backend}", justify="center")
diff --git a/agent/run_agent.py b/agent/run_agent.py
index bcb474f..da9ac17 100644
--- a/agent/run_agent.py
+++ b/agent/run_agent.py
@@ -66,6 +66,10 @@ def run_agent_for_repo(
     repo_path = os.path.join(repo_base_dir, repo_name)
     repo_path = os.path.abspath(repo_path)
 
+    # TODO: remove this to make web3.py work
+    if repo_name == "web3-py":
+        repo_path = repo_path.replace("web3-py", "web3.py")
+
     src_dir = os.path.join(repo_path, example["src_dir"])
 
     try:
@@ -214,7 +218,11 @@ def run_agent(
         ]
         display.set_not_started_repos(not_started_repos)
 
-        display.update_repo_progress_num(display_repo_progress_num)
+        start_time = time.time()
+
+        display.update_repo_progress_num(
+            min(display_repo_progress_num, max_parallel_repos)
+        )
         display.update_backend_display(backend)
         display.update_log_dir_display(log_dir)
         display.update_agent_display(
@@ -248,6 +256,7 @@ def run_agent(
                     )
                     results.append(result)
 
+                last_time_update = 0
                 while any(not r.ready() for r in results):
                     try:
                         while not update_queue.empty():
@@ -268,6 +277,14 @@ def run_agent(
                                 )
                     except queue.Empty:
                         pass
+
+                    # Update time display every second
+                    current_time = time.time()
+                    if current_time - last_time_update >= 1:
+                        elapsed_time = int(current_time - start_time)
+                        display.update_time_display(elapsed_time)
+                        last_time_update = current_time
+
                     time.sleep(0.1)  # Small delay to prevent busy-waiting
 
                 # Final update after all repos are processed
@@ -286,5 +303,9 @@ def run_agent(
                         repo_name, file_name, money_spent = data
                         display.update_money_display(repo_name, file_name, money_spent)
 
+                # Final time update
+                elapsed_time = int(time.time() - start_time)
+                display.update_time_display(elapsed_time)
+
                 for result in results:
                     result.get()

From 910e5c5a214b4d3189b5b6f25b6ba6a084aae311 Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Tue, 24 Sep 2024 20:17:54 -0400
Subject: [PATCH 07/16] change experiment name to branch

---
 agent/cli.py               | 16 ++++++++--------
 agent/run_agent.py         | 18 +++++++++---------
 agent/run_agent_no_rich.py | 18 +++++++++---------
 3 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/agent/cli.py b/agent/cli.py
index 5e29bcf..5e17ea3 100644
--- a/agent/cli.py
+++ b/agent/cli.py
@@ -166,13 +166,13 @@ def config(
 
 @agent_app.command()
 def run(
-    experiment_name: str = typer.Argument(
+    branch: str = typer.Argument(
         ...,
-        help="Experiment name of current run",
+        help="Branch name of current run",
     ),
     override_previous_changes: bool = typer.Option(
         False,
-        help="If override the previous agent changes on `experiment_name` or run the agent continuously on the new changes",
+        help="If override the previous agent changes on `branch` or run the agent continuously on the new changes",
     ),
     backend: str = typer.Option(
         "modal",
@@ -197,7 +197,7 @@ def run(
 ) -> None:
     """Run the agent on the repository."""
     run_agent(
-        experiment_name,
+        branch,
         override_previous_changes,
         backend,
         agent_config_file,
@@ -209,13 +209,13 @@ def run(
 
 @agent_app.command()
 def run_test_no_rich(
-    experiment_name: str = typer.Argument(
+    branch: str = typer.Argument(
         ...,
-        help="Experiment name of current run",
+        help="Branch name of current run",
     ),
     override_previous_changes: bool = typer.Option(
         False,
-        help="If override the previous agent changes on `experiment_name` or run the agent continuously on the new changes",
+        help="If override the previous agent changes on `branch` or run the agent continuously on the new changes",
     ),
     backend: str = typer.Option(
         "modal",
@@ -236,7 +236,7 @@ def run_test_no_rich(
 ) -> None:
     """Run the agent on the repository."""
     run_agent_no_rich(
-        experiment_name,
+        branch,
         override_previous_changes,
         backend,
         agent_config_file,
diff --git a/agent/run_agent.py b/agent/run_agent.py
index da9ac17..4ca0f05 100644
--- a/agent/run_agent.py
+++ b/agent/run_agent.py
@@ -48,7 +48,7 @@ def run_agent_for_repo(
     agent_config: AgentConfig,
     example: RepoInstance,
     update_queue: multiprocessing.Queue,
-    experiment_name: Optional[str] = None,
+    branch: Optional[str] = None,
     override_previous_changes: bool = False,
     backend: str = "modal",
     log_dir: str = str(RUN_AIDER_LOG_DIR.resolve()),
@@ -87,14 +87,14 @@ def run_agent_for_repo(
         )
 
     # if branch_name is not provided, create a new branch name based on agent_config
-    if experiment_name is None:
-        experiment_name = args2string(agent_config)
+    if branch is None:
+        branch = args2string(agent_config)
 
-    create_branch(local_repo, experiment_name, example["base_commit"])
+    create_branch(local_repo, branch, example["base_commit"])
 
     # in cases where the latest commit of branch is not commit 0
     # set it back to commit 0
-    latest_commit = local_repo.commit(experiment_name)
+    latest_commit = local_repo.commit(branch)
     if latest_commit.hexsha != example["base_commit"] and override_previous_changes:
         local_repo.git.reset("--hard", example["base_commit"])
 
@@ -102,7 +102,7 @@ def run_agent_for_repo(
     experiment_log_dir = (
         Path(log_dir)
         / repo_name
-        / experiment_name
+        / branch
         / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
     )
     experiment_log_dir.mkdir(parents=True, exist_ok=True)
@@ -132,7 +132,7 @@ def run_agent_for_repo(
             # when unit test feedback is available, iterate over test files
             for test_file in test_files:
                 update_queue.put(("set_current_file", (repo_name, test_file)))
-                test_cmd = f"python -m commit0 test {repo_path} {test_file} --branch {experiment_name} --backend {backend} --commit0_dot_file_path {commit0_dot_file_path}"
+                test_cmd = f"python -m commit0 test {repo_path} {test_file} --branch {branch} --backend {backend} --commit0_dot_file_path {commit0_dot_file_path}"
                 test_file_name = test_file.replace(".py", "").replace("/", "__")
                 test_log_dir = experiment_log_dir / test_file_name
                 lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
@@ -176,7 +176,7 @@ def run_agent_for_repo(
 
 
 def run_agent(
-    experiment_name: str,
+    branch: str,
     override_previous_changes: bool,
     backend: str,
     agent_config_file: str,
@@ -248,7 +248,7 @@ def run_agent(
                             agent_config,
                             cast(RepoInstance, example),
                             update_queue,
-                            experiment_name,
+                            branch,
                             override_previous_changes,
                             backend,
                             log_dir,
diff --git a/agent/run_agent_no_rich.py b/agent/run_agent_no_rich.py
index aadc99d..256903a 100644
--- a/agent/run_agent_no_rich.py
+++ b/agent/run_agent_no_rich.py
@@ -45,7 +45,7 @@ def run_agent_for_repo(
     repo_base_dir: str,
     agent_config: AgentConfig,
     example: RepoInstance,
-    experiment_name: Optional[str] = None,
+    branch: Optional[str] = None,
     override_previous_changes: bool = False,
     backend: str = "modal",
     log_dir: str = str(RUN_AIDER_LOG_DIR.resolve()),
@@ -77,14 +77,14 @@ def run_agent_for_repo(
         )
 
     # if branch_name is not provided, create a new branch name based on agent_config
-    if experiment_name is None:
-        experiment_name = args2string(agent_config)
+    if branch is None:
+        branch = args2string(agent_config)
 
-    create_branch(local_repo, experiment_name, example["base_commit"])
+    create_branch(local_repo, branch, example["base_commit"])
 
     # in cases where the latest commit of branch is not commit 0
     # set it back to commit 0
-    latest_commit = local_repo.commit(experiment_name)
+    latest_commit = local_repo.commit(branch)
     if latest_commit.hexsha != example["base_commit"] and override_previous_changes:
         local_repo.git.reset("--hard", example["base_commit"])
 
@@ -92,7 +92,7 @@ def run_agent_for_repo(
     experiment_log_dir = (
         Path(log_dir)
         / repo_name
-        / experiment_name
+        / branch
         / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
     )
     experiment_log_dir.mkdir(parents=True, exist_ok=True)
@@ -119,7 +119,7 @@ def run_agent_for_repo(
 
             # when unit test feedback is available, iterate over test files
             for test_file in test_files:
-                test_cmd = f"python -m commit0 test {repo_path} {test_file} --branch {experiment_name} --backend {backend} --commit0_dot_file_path {commit0_dot_file_path}"
+                test_cmd = f"python -m commit0 test {repo_path} {test_file} --branch {branch} --backend {backend} --commit0_dot_file_path {commit0_dot_file_path}"
                 test_file_name = test_file.replace(".py", "").replace("/", "__")
                 test_log_dir = experiment_log_dir / test_file_name
                 lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
@@ -146,7 +146,7 @@ def run_agent_for_repo(
 
 
 def run_agent(
-    experiment_name: str,
+    branch: str,
     override_previous_changes: bool,
     backend: str,
     agent_config_file: str,
@@ -197,7 +197,7 @@ def run_agent(
                         commit0_config["base_dir"],
                         agent_config,
                         cast(RepoInstance, example),
-                        experiment_name,
+                        branch,
                         override_previous_changes,
                         backend,
                         log_dir,

From 9212dcbc0eb150638718e14f7c11fd1de1a66cec Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Tue, 24 Sep 2024 21:03:05 -0400
Subject: [PATCH 08/16] fix small error

---
 agent/agent_utils.py      |   2 +-
 agent/cli.py              |  39 +++++++
 agent/run_agent.py        |  12 +-
 agent/run_agent_joblit.py | 225 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 273 insertions(+), 5 deletions(-)
 create mode 100644 agent/run_agent_joblit.py

diff --git a/agent/agent_utils.py b/agent/agent_utils.py
index 3df8360..6184d92 100644
--- a/agent/agent_utils.py
+++ b/agent/agent_utils.py
@@ -126,7 +126,7 @@ def get_target_edit_files(target_dir: str, src_prefix: str) -> list[str]:
         for filename in filenames:
             if filename.endswith(".py"):
                 file_path = os.path.join(root, filename)
-                with open(file_path, "r") as file:
+                with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
                     if "    pass" in file.read():
                         files.append(file_path)
 
diff --git a/agent/cli.py b/agent/cli.py
index 5e17ea3..531683b 100644
--- a/agent/cli.py
+++ b/agent/cli.py
@@ -1,6 +1,7 @@
 import typer
 from agent.run_agent_no_rich import run_agent as run_agent_no_rich
 from agent.run_agent import run_agent
+from agent.run_agent_joblit import run_agent as run_agent_joblit
 from commit0.harness.constants import RUN_AIDER_LOG_DIR
 import subprocess
 from agent.agent_utils import write_agent_config
@@ -243,3 +244,41 @@ def run_test_no_rich(
         log_dir,
         max_parallel_repos,
     )
+
+
+@agent_app.command()
+def run_test_joblit(
+    branch: str = typer.Argument(
+        ...,
+        help="Branch name of current run",
+    ),
+    override_previous_changes: bool = typer.Option(
+        False,
+        help="If override the previous agent changes on `branch` or run the agent continuously on the new changes",
+    ),
+    backend: str = typer.Option(
+        "modal",
+        help="Test backend to run the agent on, ignore this option if you are not adding `test` option to agent",
+    ),
+    agent_config_file: str = typer.Option(
+        ".agent.yaml",
+        help="Path to the agent config file",
+    ),
+    log_dir: str = typer.Option(
+        str(RUN_AIDER_LOG_DIR.resolve()),
+        help="Log directory to store the logs",
+    ),
+    max_parallel_repos: int = typer.Option(
+        1,
+        help="Maximum number of repositories for agent to run in parallel",
+    ),
+) -> None:
+    """Run the agent on the repository."""
+    run_agent_joblit(
+        branch,
+        override_previous_changes,
+        backend,
+        agent_config_file,
+        log_dir,
+        max_parallel_repos,
+    )
diff --git a/agent/run_agent.py b/agent/run_agent.py
index 4ca0f05..4b613bd 100644
--- a/agent/run_agent.py
+++ b/agent/run_agent.py
@@ -57,11 +57,13 @@ def run_agent_for_repo(
     # get repo info
     _, repo_name = example["repo"].split("/")
 
+    original_repo_name = repo_name
+
     repo_name = repo_name.lower()
     repo_name = repo_name.replace(".", "-")
 
     # before starting, display all information to terminal
-    update_queue.put(("start_repo", (repo_name, 0)))
+    update_queue.put(("start_repo", (original_repo_name, 0)))
 
     repo_path = os.path.join(repo_base_dir, repo_name)
     repo_path = os.path.abspath(repo_path)
@@ -128,7 +130,7 @@ def run_agent_for_repo(
             test_files_str = get_tests(repo_name, verbose=0)
             test_files = sorted(list(set([i.split(":")[0] for i in test_files_str])))
 
-            update_queue.put(("start_repo", (repo_name, len(test_files))))
+            update_queue.put(("start_repo", (original_repo_name, len(test_files))))
             # when unit test feedback is available, iterate over test files
             for test_file in test_files:
                 update_queue.put(("set_current_file", (repo_name, test_file)))
@@ -159,7 +161,9 @@ def run_agent_for_repo(
                 agent_config, repo_path, test_dir=example["test"]["test_dir"]
             )
 
-            update_queue.put(("start_repo", (repo_name, len(target_edit_files))))
+            update_queue.put(
+                ("start_repo", (original_repo_name, len(target_edit_files)))
+            )
             for f in target_edit_files:
                 update_queue.put(("set_current_file", (repo_name, f)))
                 file_name = f.replace(".py", "").replace("/", "__")
@@ -172,7 +176,7 @@ def run_agent_for_repo(
                         (repo_name, file_name, agent_return.last_cost),
                     )
                 )
-    update_queue.put(("finish_repo", repo_name))
+    update_queue.put(("finish_repo", original_repo_name))
 
 
 def run_agent(
diff --git a/agent/run_agent_joblit.py b/agent/run_agent_joblit.py
new file mode 100644
index 0000000..61d89a1
--- /dev/null
+++ b/agent/run_agent_joblit.py
@@ -0,0 +1,225 @@
+import os
+import yaml
+import multiprocessing
+from tqdm import tqdm
+from datasets import load_dataset
+from git import Repo
+from agent.agent_utils import (
+    args2string,
+    create_branch,
+    get_message,
+    get_target_edit_files,
+    get_lint_cmd,
+    read_yaml_config,
+)
+from agent.agents import AiderAgents
+from typing import Optional, Type, cast
+from types import TracebackType
+from agent.class_types import AgentConfig
+from commit0.harness.constants import SPLIT
+from commit0.harness.get_pytest_ids import main as get_tests
+from commit0.harness.constants import RUN_AIDER_LOG_DIR, RepoInstance
+from commit0.cli import read_commit0_dot_file
+from pathlib import Path
+from datetime import datetime
+
+
+class DirContext:
+    def __init__(self, d: str):
+        self.dir = d
+        self.cwd = os.getcwd()
+
+    def __enter__(self):
+        os.chdir(self.dir)
+
+    def __exit__(
+        self,
+        exctype: Optional[Type[BaseException]],
+        excinst: Optional[BaseException],
+        exctb: Optional[TracebackType],
+    ) -> None:
+        os.chdir(self.cwd)
+
+
+def run_agent_for_repo(
+    repo_base_dir: str,
+    agent_config: AgentConfig,
+    example: RepoInstance,
+    branch: Optional[str] = None,
+    override_previous_changes: bool = False,
+    backend: str = "modal",
+    log_dir: str = str(RUN_AIDER_LOG_DIR.resolve()),
+) -> None:
+    """Run Aider for a given repository."""
+    # get repo info
+    _, repo_name = example["repo"].split("/")
+
+    repo_name = repo_name.lower()
+    repo_name = repo_name.replace(".", "-")
+
+    repo_path = os.path.join(repo_base_dir, repo_name)
+    repo_path = os.path.abspath(repo_path)
+
+    src_dir = os.path.join(repo_path, example["src_dir"])
+
+    try:
+        local_repo = Repo(repo_path)
+    except Exception:
+        raise Exception(
+            f"{repo_path} is not a git repo. Check if base_dir is correctly specified."
+        )
+
+    if agent_config.agent_name == "aider":
+        agent = AiderAgents(agent_config.max_iteration, agent_config.model_name)
+    else:
+        raise NotImplementedError(
+            f"{agent_config.agent_name} is not implemented; please add your implementations in baselines/agents.py."
+        )
+
+    # if branch_name is not provided, create a new branch name based on agent_config
+    if branch is None:
+        branch = args2string(agent_config)
+
+    create_branch(local_repo, branch, example["base_commit"])
+
+    # in cases where the latest commit of branch is not commit 0
+    # set it back to commit 0
+    latest_commit = local_repo.commit(branch)
+    if latest_commit.hexsha != example["base_commit"] and override_previous_changes:
+        local_repo.git.reset("--hard", example["base_commit"])
+
+    # prepare the log dir
+    experiment_log_dir = (
+        Path(log_dir)
+        / repo_name
+        / branch
+        / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+    )
+    experiment_log_dir.mkdir(parents=True, exist_ok=True)
+
+    # write agent_config to .agent.yaml in the log_dir for record
+    agent_config_log_file = experiment_log_dir / ".agent.yaml"
+    with open(agent_config_log_file, "w") as agent_config_file:
+        yaml.dump(agent_config, agent_config_file)
+
+    # TODO: make this path more general
+    commit0_dot_file_path = str(Path(repo_path).parent.parent / ".commit0.yaml")
+    with DirContext(repo_path):
+        if agent_config is None:
+            raise ValueError("Invalid input")
+
+        target_edit_files = get_target_edit_files(
+            src_dir, src_prefix=example["src_dir"]
+        )
+
+        if agent_config.run_tests:
+            # Call the commit0 get-tests command to retrieve test files
+            test_files_str = get_tests(repo_name, verbose=0)
+            test_files = sorted(list(set([i.split(":")[0] for i in test_files_str])))
+
+            # when unit test feedback is available, iterate over test files
+            for test_file in test_files:
+                test_cmd = f"python -m commit0 test {repo_path} {test_file} --branch {branch} --backend {backend} --commit0_dot_file_path {commit0_dot_file_path}"
+                test_file_name = test_file.replace(".py", "").replace("/", "__")
+                test_log_dir = experiment_log_dir / test_file_name
+                lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
+                message = get_message(agent_config, repo_path, test_file=test_file)
+                _ = agent.run(
+                    message,
+                    test_cmd,
+                    lint_cmd,
+                    target_edit_files,
+                    test_log_dir,
+                )
+                # cost = agent_return.last_cost
+        else:
+            # when unit test feedback is not available, iterate over target files to edit
+            message = get_message(
+                agent_config, repo_path, test_dir=example["test"]["test_dir"]
+            )
+            for f in target_edit_files:
+                file_name = f.replace(".py", "").replace("/", "__")
+                file_log_dir = experiment_log_dir / file_name
+                lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
+                _ = agent.run(message, "", lint_cmd, [f], file_log_dir)
+                # cost = agent_return.last_cost
+
+
+def run_agent(
+    branch: str,
+    override_previous_changes: bool,
+    backend: str,
+    agent_config_file: str,
+    log_dir: str,
+    max_parallel_repos: int,
+) -> None:
+    """Main function to run Aider for a given repository.
+
+    Will run in parallel for each repo.
+    """
+    config = read_yaml_config(agent_config_file)
+
+    agent_config = AgentConfig(**config)
+
+    commit0_config = read_commit0_dot_file(".commit0.yaml")
+
+    dataset = load_dataset(
+        commit0_config["dataset_name"], split=commit0_config["dataset_split"]
+    )
+    filtered_dataset = [
+        example
+        for example in dataset
+        if commit0_config["repo_split"] == "all"
+        or (
+            isinstance(example, dict)
+            and "repo" in example
+            and isinstance(example["repo"], str)
+            and example["repo"].split("/")[-1]
+            in SPLIT.get(commit0_config["repo_split"], [])
+        )
+    ]
+    assert len(filtered_dataset) > 0, "No examples available"
+
+    # if len(filtered_dataset) > 1:
+    #     sys.stdout = open(os.devnull, "w")
+    print("jere")
+    print(filtered_dataset[0])
+    for example in filtered_dataset:
+        if "joblib" in example["repo"]:
+            print(example)
+            run_agent_for_repo(
+                commit0_config["base_dir"],
+                agent_config,
+                cast(RepoInstance, example),
+                branch,
+                override_previous_changes,
+                backend,
+                log_dir,
+            )
+    # with tqdm(
+    #     total=len(filtered_dataset), smoothing=0, desc="Running Aider for repos"
+    # ) as pbar:
+    #     with multiprocessing.Pool(processes=max_parallel_repos) as pool:
+    #         results = []
+
+    #         # Use apply_async to submit jobs and add progress bar updates
+    #         for example in filtered_dataset:
+    #             result = pool.apply_async(
+    #                 run_agent_for_repo,
+    #                 args=(
+    #                     commit0_config["base_dir"],
+    #                     agent_config,
+    #                     cast(RepoInstance, example),
+    #                     branch,
+    #                     override_previous_changes,
+    #                     backend,
+    #                     log_dir,
+    #                 ),
+    #                 callback=lambda _: pbar.update(
+    #                     1
+    #                 ),  # Update progress bar on task completion
+    #             )
+    #             results.append(result)
+
+    #         for result in results:
+    #             result.wait()

From 42454d222768528595065aa67dd9d0dedb0fcdef Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Tue, 24 Sep 2024 21:03:34 -0400
Subject: [PATCH 09/16] fix small error

---
 agent/run_agent.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/agent/run_agent.py b/agent/run_agent.py
index 4b613bd..9b62bcf 100644
--- a/agent/run_agent.py
+++ b/agent/run_agent.py
@@ -57,13 +57,13 @@ def run_agent_for_repo(
     # get repo info
     _, repo_name = example["repo"].split("/")
 
+    # before starting, display all information to terminal
     original_repo_name = repo_name
+    update_queue.put(("start_repo", (original_repo_name, 0)))
 
     repo_name = repo_name.lower()
     repo_name = repo_name.replace(".", "-")
 
-    # before starting, display all information to terminal
-    update_queue.put(("start_repo", (original_repo_name, 0)))
 
     repo_path = os.path.join(repo_base_dir, repo_name)
     repo_path = os.path.abspath(repo_path)

From 9f57083991194cfb970288b42a288c827d0c8e21 Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Tue, 24 Sep 2024 21:12:11 -0400
Subject: [PATCH 10/16] fix small error

---
 agent/cli.py              |  39 -------
 agent/run_agent.py        |   1 -
 agent/run_agent_joblit.py | 225 --------------------------------------
 3 files changed, 265 deletions(-)
 delete mode 100644 agent/run_agent_joblit.py

diff --git a/agent/cli.py b/agent/cli.py
index 531683b..5e17ea3 100644
--- a/agent/cli.py
+++ b/agent/cli.py
@@ -1,7 +1,6 @@
 import typer
 from agent.run_agent_no_rich import run_agent as run_agent_no_rich
 from agent.run_agent import run_agent
-from agent.run_agent_joblit import run_agent as run_agent_joblit
 from commit0.harness.constants import RUN_AIDER_LOG_DIR
 import subprocess
 from agent.agent_utils import write_agent_config
@@ -244,41 +243,3 @@ def run_test_no_rich(
         log_dir,
         max_parallel_repos,
     )
-
-
-@agent_app.command()
-def run_test_joblit(
-    branch: str = typer.Argument(
-        ...,
-        help="Branch name of current run",
-    ),
-    override_previous_changes: bool = typer.Option(
-        False,
-        help="If override the previous agent changes on `branch` or run the agent continuously on the new changes",
-    ),
-    backend: str = typer.Option(
-        "modal",
-        help="Test backend to run the agent on, ignore this option if you are not adding `test` option to agent",
-    ),
-    agent_config_file: str = typer.Option(
-        ".agent.yaml",
-        help="Path to the agent config file",
-    ),
-    log_dir: str = typer.Option(
-        str(RUN_AIDER_LOG_DIR.resolve()),
-        help="Log directory to store the logs",
-    ),
-    max_parallel_repos: int = typer.Option(
-        1,
-        help="Maximum number of repositories for agent to run in parallel",
-    ),
-) -> None:
-    """Run the agent on the repository."""
-    run_agent_joblit(
-        branch,
-        override_previous_changes,
-        backend,
-        agent_config_file,
-        log_dir,
-        max_parallel_repos,
-    )
diff --git a/agent/run_agent.py b/agent/run_agent.py
index 9b62bcf..5415f92 100644
--- a/agent/run_agent.py
+++ b/agent/run_agent.py
@@ -64,7 +64,6 @@ def run_agent_for_repo(
     repo_name = repo_name.lower()
     repo_name = repo_name.replace(".", "-")
 
-
     repo_path = os.path.join(repo_base_dir, repo_name)
     repo_path = os.path.abspath(repo_path)
 
diff --git a/agent/run_agent_joblit.py b/agent/run_agent_joblit.py
deleted file mode 100644
index 61d89a1..0000000
--- a/agent/run_agent_joblit.py
+++ /dev/null
@@ -1,225 +0,0 @@
-import os
-import yaml
-import multiprocessing
-from tqdm import tqdm
-from datasets import load_dataset
-from git import Repo
-from agent.agent_utils import (
-    args2string,
-    create_branch,
-    get_message,
-    get_target_edit_files,
-    get_lint_cmd,
-    read_yaml_config,
-)
-from agent.agents import AiderAgents
-from typing import Optional, Type, cast
-from types import TracebackType
-from agent.class_types import AgentConfig
-from commit0.harness.constants import SPLIT
-from commit0.harness.get_pytest_ids import main as get_tests
-from commit0.harness.constants import RUN_AIDER_LOG_DIR, RepoInstance
-from commit0.cli import read_commit0_dot_file
-from pathlib import Path
-from datetime import datetime
-
-
-class DirContext:
-    def __init__(self, d: str):
-        self.dir = d
-        self.cwd = os.getcwd()
-
-    def __enter__(self):
-        os.chdir(self.dir)
-
-    def __exit__(
-        self,
-        exctype: Optional[Type[BaseException]],
-        excinst: Optional[BaseException],
-        exctb: Optional[TracebackType],
-    ) -> None:
-        os.chdir(self.cwd)
-
-
-def run_agent_for_repo(
-    repo_base_dir: str,
-    agent_config: AgentConfig,
-    example: RepoInstance,
-    branch: Optional[str] = None,
-    override_previous_changes: bool = False,
-    backend: str = "modal",
-    log_dir: str = str(RUN_AIDER_LOG_DIR.resolve()),
-) -> None:
-    """Run Aider for a given repository."""
-    # get repo info
-    _, repo_name = example["repo"].split("/")
-
-    repo_name = repo_name.lower()
-    repo_name = repo_name.replace(".", "-")
-
-    repo_path = os.path.join(repo_base_dir, repo_name)
-    repo_path = os.path.abspath(repo_path)
-
-    src_dir = os.path.join(repo_path, example["src_dir"])
-
-    try:
-        local_repo = Repo(repo_path)
-    except Exception:
-        raise Exception(
-            f"{repo_path} is not a git repo. Check if base_dir is correctly specified."
-        )
-
-    if agent_config.agent_name == "aider":
-        agent = AiderAgents(agent_config.max_iteration, agent_config.model_name)
-    else:
-        raise NotImplementedError(
-            f"{agent_config.agent_name} is not implemented; please add your implementations in baselines/agents.py."
-        )
-
-    # if branch_name is not provided, create a new branch name based on agent_config
-    if branch is None:
-        branch = args2string(agent_config)
-
-    create_branch(local_repo, branch, example["base_commit"])
-
-    # in cases where the latest commit of branch is not commit 0
-    # set it back to commit 0
-    latest_commit = local_repo.commit(branch)
-    if latest_commit.hexsha != example["base_commit"] and override_previous_changes:
-        local_repo.git.reset("--hard", example["base_commit"])
-
-    # prepare the log dir
-    experiment_log_dir = (
-        Path(log_dir)
-        / repo_name
-        / branch
-        / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-    )
-    experiment_log_dir.mkdir(parents=True, exist_ok=True)
-
-    # write agent_config to .agent.yaml in the log_dir for record
-    agent_config_log_file = experiment_log_dir / ".agent.yaml"
-    with open(agent_config_log_file, "w") as agent_config_file:
-        yaml.dump(agent_config, agent_config_file)
-
-    # TODO: make this path more general
-    commit0_dot_file_path = str(Path(repo_path).parent.parent / ".commit0.yaml")
-    with DirContext(repo_path):
-        if agent_config is None:
-            raise ValueError("Invalid input")
-
-        target_edit_files = get_target_edit_files(
-            src_dir, src_prefix=example["src_dir"]
-        )
-
-        if agent_config.run_tests:
-            # Call the commit0 get-tests command to retrieve test files
-            test_files_str = get_tests(repo_name, verbose=0)
-            test_files = sorted(list(set([i.split(":")[0] for i in test_files_str])))
-
-            # when unit test feedback is available, iterate over test files
-            for test_file in test_files:
-                test_cmd = f"python -m commit0 test {repo_path} {test_file} --branch {branch} --backend {backend} --commit0_dot_file_path {commit0_dot_file_path}"
-                test_file_name = test_file.replace(".py", "").replace("/", "__")
-                test_log_dir = experiment_log_dir / test_file_name
-                lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
-                message = get_message(agent_config, repo_path, test_file=test_file)
-                _ = agent.run(
-                    message,
-                    test_cmd,
-                    lint_cmd,
-                    target_edit_files,
-                    test_log_dir,
-                )
-                # cost = agent_return.last_cost
-        else:
-            # when unit test feedback is not available, iterate over target files to edit
-            message = get_message(
-                agent_config, repo_path, test_dir=example["test"]["test_dir"]
-            )
-            for f in target_edit_files:
-                file_name = f.replace(".py", "").replace("/", "__")
-                file_log_dir = experiment_log_dir / file_name
-                lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
-                _ = agent.run(message, "", lint_cmd, [f], file_log_dir)
-                # cost = agent_return.last_cost
-
-
-def run_agent(
-    branch: str,
-    override_previous_changes: bool,
-    backend: str,
-    agent_config_file: str,
-    log_dir: str,
-    max_parallel_repos: int,
-) -> None:
-    """Main function to run Aider for a given repository.
-
-    Will run in parallel for each repo.
-    """
-    config = read_yaml_config(agent_config_file)
-
-    agent_config = AgentConfig(**config)
-
-    commit0_config = read_commit0_dot_file(".commit0.yaml")
-
-    dataset = load_dataset(
-        commit0_config["dataset_name"], split=commit0_config["dataset_split"]
-    )
-    filtered_dataset = [
-        example
-        for example in dataset
-        if commit0_config["repo_split"] == "all"
-        or (
-            isinstance(example, dict)
-            and "repo" in example
-            and isinstance(example["repo"], str)
-            and example["repo"].split("/")[-1]
-            in SPLIT.get(commit0_config["repo_split"], [])
-        )
-    ]
-    assert len(filtered_dataset) > 0, "No examples available"
-
-    # if len(filtered_dataset) > 1:
-    #     sys.stdout = open(os.devnull, "w")
-    print("jere")
-    print(filtered_dataset[0])
-    for example in filtered_dataset:
-        if "joblib" in example["repo"]:
-            print(example)
-            run_agent_for_repo(
-                commit0_config["base_dir"],
-                agent_config,
-                cast(RepoInstance, example),
-                branch,
-                override_previous_changes,
-                backend,
-                log_dir,
-            )
-    # with tqdm(
-    #     total=len(filtered_dataset), smoothing=0, desc="Running Aider for repos"
-    # ) as pbar:
-    #     with multiprocessing.Pool(processes=max_parallel_repos) as pool:
-    #         results = []
-
-    #         # Use apply_async to submit jobs and add progress bar updates
-    #         for example in filtered_dataset:
-    #             result = pool.apply_async(
-    #                 run_agent_for_repo,
-    #                 args=(
-    #                     commit0_config["base_dir"],
-    #                     agent_config,
-    #                     cast(RepoInstance, example),
-    #                     branch,
-    #                     override_previous_changes,
-    #                     backend,
-    #                     log_dir,
-    #                 ),
-    #                 callback=lambda _: pbar.update(
-    #                     1
-    #                 ),  # Update progress bar on task completion
-    #             )
-    #             results.append(result)
-
-    #         for result in results:
-    #             result.wait()

From 8f2a1a75231ee451585532f1641c0608567b6c7e Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Wed, 25 Sep 2024 02:02:01 -0400
Subject: [PATCH 11/16] fix small display error and update get file method

---
 agent/README.md            | 21 +++++++++
 agent/agent_utils.py       | 95 ++++++++++++++++++++++++++++++++------
 agent/run_agent.py         | 16 +++----
 agent/run_agent_no_rich.py |  4 +-
 4 files changed, 111 insertions(+), 25 deletions(-)

diff --git a/agent/README.md b/agent/README.md
index c46fe43..e3b91cf 100644
--- a/agent/README.md
+++ b/agent/README.md
@@ -37,3 +37,24 @@ python baselines/run_aider.py
 - `max_unit_tests_info_length`: Max length of unit tests info. Default: `10000`.
 - `max_reference_info_length`: Max length of reference info. Default: `10000`.
 - `max_lint_info_length`: Max length of lint info. Default: `10000`.
+
+
+
+
+
+
+
+
+
+
+Error Section
+
+
+
+Running the agent
+
+Run with Tmux!
+
+process_max_worker set to 3....
+
+currently not handling file with more than 1500 lines...
\ No newline at end of file
diff --git a/agent/agent_utils.py b/agent/agent_utils.py
index 6184d92..35a3352 100644
--- a/agent/agent_utils.py
+++ b/agent/agent_utils.py
@@ -119,24 +119,93 @@ def get_file_info(file_path: Path, prefix: str = "") -> str:
     return "\n".join(filter(None, tree_string))
 
 
-def get_target_edit_files(target_dir: str, src_prefix: str) -> list[str]:
+def collect_test_files(directory: str) -> list[str]:
+    """Collect all the test files in the directory."""
+    test_files = []
+    subdirs = []
+
+    # Walk through the directory
+    for root, dirs, files in os.walk(directory):
+        if root.endswith("/"):
+            root = root[:-1]
+        # Check if 'test' is part of the folder name
+        if (
+            "test" in os.path.basename(root).lower()
+            or os.path.basename(root) in subdirs
+        ):
+            for file in files:
+                # Process only Python files
+                if file.endswith(".py"):
+                    file_path = os.path.join(root, file)
+                    test_files.append(file_path)
+            for d in dirs:
+                subdirs.append(d)
+
+    return test_files
+
+
+def collect_python_files(directory: str) -> list[str]:
+    """List to store all the .py filenames"""
+    python_files = []
+
+    # Walk through the directory recursively
+    for root, _, files in os.walk(directory):
+        for file in files:
+            # Check if the file ends with '.py'
+            if file.endswith(".py"):
+                file_path = os.path.join(root, file)
+                python_files.append(file_path)
+
+    return python_files
+
+
+def _find_files_to_edit(base_dir: str, src_dir: str, test_dir: str) -> list[str]:
+    """Identify files to remove content by heuristics.
+    We assume source code is under [lib]/[lib] or [lib]/src.
+    We exclude test code. This function would not work
+    if test code doesn't have its own directory.
+
+    Args:
+    ----
+        base_dir (str): The path to local library.
+        src_dir (str): The directory containing source code.
+        test_dir (str): The directory containing test code.
+
+    Returns:
+    -------
+        list[str]: A list of files to be edited.
+
+    """
+    files = collect_python_files(os.path.join(base_dir, src_dir))
+    test_files = collect_test_files(os.path.join(base_dir, test_dir))
+    files = list(set(files) - set(test_files))
+
+    # don't edit __init__ files
+    files = [f for f in files if "__init__" not in f]
+    # don't edit __main__ files
+    files = [f for f in files if "__main__" not in f]
+    # don't edit confest.py files
+    files = [f for f in files if "conftest.py" not in f]
+    return files
+
+
+def get_target_edit_files(target_dir: str, src_dir: str, test_dir: str) -> list[str]:
     """Find the files with functions with the pass statement."""
-    files = []
-    for root, _, filenames in os.walk(target_dir):
-        for filename in filenames:
-            if filename.endswith(".py"):
-                file_path = os.path.join(root, filename)
-                with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
-                    if "    pass" in file.read():
-                        files.append(file_path)
+    files = _find_files_to_edit(target_dir, src_dir, test_dir)
+    filtered_files = []
+    for file_path in files:
+        with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
+            content = file.read()
+            if len(content.splitlines()) < 1500:
+                filtered_files.append(file_path)
 
     # Remove the base_dir prefix
-    files = [file.replace(target_dir, "").lstrip("/") for file in files]
-    files = [src_prefix + file for file in files]
+    filtered_files = [
+        file.replace(target_dir, "").lstrip("/") for file in filtered_files
+    ]
     # Only keep python files
-    files = [file for file in files if file.endswith(".py")]
 
-    return files
+    return filtered_files
 
 
 def get_message(
diff --git a/agent/run_agent.py b/agent/run_agent.py
index 5415f92..758fd42 100644
--- a/agent/run_agent.py
+++ b/agent/run_agent.py
@@ -61,17 +61,15 @@ def run_agent_for_repo(
     original_repo_name = repo_name
     update_queue.put(("start_repo", (original_repo_name, 0)))
 
-    repo_name = repo_name.lower()
-    repo_name = repo_name.replace(".", "-")
+    # repo_name = repo_name.lower()
+    # repo_name = repo_name.replace(".", "-")
 
     repo_path = os.path.join(repo_base_dir, repo_name)
     repo_path = os.path.abspath(repo_path)
 
-    # TODO: remove this to make web3.py work
-    if repo_name == "web3-py":
-        repo_path = repo_path.replace("web3-py", "web3.py")
-
-    src_dir = os.path.join(repo_path, example["src_dir"])
+    # # TODO: remove this to make web3.py work
+    # if repo_name == "web3-py":
+    #     repo_path = repo_path.replace("web3-py", "web3.py")
 
     try:
         local_repo = Repo(repo_path)
@@ -121,7 +119,7 @@ def run_agent_for_repo(
             raise ValueError("Invalid input")
 
         target_edit_files = get_target_edit_files(
-            src_dir, src_prefix=example["src_dir"]
+            repo_path, example["src_dir"], example["test"]["test_dir"]
         )
 
         if agent_config.run_tests:
@@ -133,7 +131,7 @@ def run_agent_for_repo(
             # when unit test feedback is available, iterate over test files
             for test_file in test_files:
                 update_queue.put(("set_current_file", (repo_name, test_file)))
-                test_cmd = f"python -m commit0 test {repo_path} {test_file} --branch {branch} --backend {backend} --commit0_dot_file_path {commit0_dot_file_path}"
+                test_cmd = f"python -m commit0 test {repo_path} {test_file} --branch {branch} --backend {backend} --commit0-dot-file-path {commit0_dot_file_path}"
                 test_file_name = test_file.replace(".py", "").replace("/", "__")
                 test_log_dir = experiment_log_dir / test_file_name
                 lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
diff --git a/agent/run_agent_no_rich.py b/agent/run_agent_no_rich.py
index 256903a..0b76025 100644
--- a/agent/run_agent_no_rich.py
+++ b/agent/run_agent_no_rich.py
@@ -60,8 +60,6 @@ def run_agent_for_repo(
     repo_path = os.path.join(repo_base_dir, repo_name)
     repo_path = os.path.abspath(repo_path)
 
-    src_dir = os.path.join(repo_path, example["src_dir"])
-
     try:
         local_repo = Repo(repo_path)
     except Exception:
@@ -109,7 +107,7 @@ def run_agent_for_repo(
             raise ValueError("Invalid input")
 
         target_edit_files = get_target_edit_files(
-            src_dir, src_prefix=example["src_dir"]
+            repo_path, example["src_dir"], example["test"]["test_dir"]
         )
 
         if agent_config.run_tests:

From feeefa1efffe54c964e8b798b1e2a4fd9cc54d4d Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Wed, 25 Sep 2024 11:29:19 -0400
Subject: [PATCH 12/16] fix small display error and update get file method

---
 agent/agent_utils.py |  4 +++-
 agent/display.py     | 30 +++++++++++++++++++++++++++++-
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/agent/agent_utils.py b/agent/agent_utils.py
index 35a3352..fee9376 100644
--- a/agent/agent_utils.py
+++ b/agent/agent_utils.py
@@ -196,7 +196,9 @@ def get_target_edit_files(target_dir: str, src_dir: str, test_dir: str) -> list[
     for file_path in files:
         with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
             content = file.read()
-            if len(content.splitlines()) < 1500:
+            if len(content.splitlines()) > 1500:
+                continue
+            if "    pass" in content:
                 filtered_files.append(file_path)
 
     # Remove the base_dir prefix
diff --git a/agent/display.py b/agent/display.py
index dae865c..3935327 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -1,3 +1,4 @@
+import time
 from rich.console import Console, Group
 from rich.panel import Panel
 from rich.progress import (
@@ -82,6 +83,9 @@ def __init__(self, total_repos: int):
         self.total_files_per_repo = {}
         self.repo_money_spent = {}
         self.display_repo_progress_num = 5
+        self.start_time_per_repo = {}
+        self.end_time_per_repo = {}
+        self.total_time_spent = 0
 
         self.overall_progress = Progress(
             SpinnerColumn(),
@@ -208,6 +212,7 @@ def update_time_display(self, time_in_seconds: int) -> None:
             time_str = f"{minutes:02d}m {seconds:02d}s"
         else:
             time_str = f"{seconds:02d}s"
+        self.total_time_spent = time_in_seconds
         self.time_display = Text(f"Time Spent So Far: {time_str}", justify="center")
         self.layout["progress"]["time"].update(
             Panel(self.time_display, title="Time", border_style="blue")
@@ -323,6 +328,7 @@ def start_repo(self, repo_name: str, total_files: int = 0) -> None:
         self.ongoing_repos[repo_name] = ""
         self.finished_files[repo_name] = []
         self.total_files_per_repo[repo_name] = total_files
+        self.start_time_per_repo[repo_name] = time.time()
         self.update()
 
     def finish_repo(self, repo_name: str) -> None:
@@ -333,6 +339,7 @@ def finish_repo(self, repo_name: str) -> None:
         if repo_name in self.finished_files:
             del self.finished_files[repo_name]
         self.overall_progress.update(self.overall_task, advance=1)
+        self.end_time_per_repo[repo_name] = time.time()
         self.update()
 
     def set_not_started_repos(self, repos: list[str]) -> None:
@@ -354,4 +361,25 @@ def __exit__(
         exc_tb: TracebackType | None,
     ):
         self.live.stop()
-        print("Agent finished running")
+        print("\nSummary of Repository Processing:")
+        print("-" * 80)
+        print(
+            f"{'Repository':<30} {'Time Spent':<15} {'Files Processed':<20} {'Money Spent':<15}"
+        )
+        print("-" * 80)
+        total_files = 0
+        total_money = 0
+        for repo_name, end_time in self.end_time_per_repo.items():
+            time_spent = end_time - self.start_time_per_repo[repo_name]
+            files_processed = self.total_files_per_repo[repo_name]
+            money_spent = sum(self.repo_money_spent.get(repo_name, {}).values())
+            print(
+                f"{repo_name:<30} {time_spent:>13.2f}s {files_processed:>18} {money_spent:>13.2f}$"
+            )
+            total_files += files_processed
+            total_money += money_spent
+        print("-" * 80)
+        print(
+            f"{'Total':<30} {self.total_time_spent:>13.2f}s {total_files:>18} {total_money:>13.2f}$"
+        )
+        print("-" * 80)

From cee4f8a507e738b5d1581ce862c360a94d22cf67 Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Wed, 25 Sep 2024 12:24:42 -0400
Subject: [PATCH 13/16] add repo number in display bar

---
 agent/display.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/agent/display.py b/agent/display.py
index 3935327..e382ce2 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -38,6 +38,14 @@ def render(self, task: Task) -> Text:
         return Text(f"{int(task.completed or 0)}/{int(task.total or 1)}")
 
 
+class RepoCountColumn(ProgressColumn):
+    """Custom progress column for displaying the count of finished repositories."""
+
+    def render(self, task: Task) -> Text:
+        """Render the count of finished repositories."""
+        return Text(f"{int(task.completed or 0)}/{int(task.total or 1)}")
+
+
 class OngoingRepo:
     def __init__(
         self, name: str, current_file: str, finished_files: list[str], total_files: int
@@ -90,6 +98,7 @@ def __init__(self, total_repos: int):
         self.overall_progress = Progress(
             SpinnerColumn(),
             BarColumn(bar_width=None),
+            RepoCountColumn(),
             TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
         )
         self.overall_task = self.overall_progress.add_task(

From 88cdc032ab9e70b6e31315ae116f3aa426aed3dc Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Wed, 25 Sep 2024 14:29:53 -0400
Subject: [PATCH 14/16] update agent display and log path

---
 agent/README.md            | 107 ++++++++++++++++++-------------------
 agent/cli.py               |  12 ++---
 agent/display.py           |   6 +--
 agent/run_agent.py         |   4 +-
 agent/run_agent_no_rich.py |   4 +-
 5 files changed, 62 insertions(+), 71 deletions(-)

diff --git a/agent/README.md b/agent/README.md
index e3b91cf..99425fd 100644
--- a/agent/README.md
+++ b/agent/README.md
@@ -1,60 +1,55 @@
-# How to run baseline
-
-Step 1: Go to `config/aider.yaml` and change the config
-
-Step 2: Run the following command
+# Agent for Commit0
+`agent config [OPTIONS] AGENT_NAME`: Setup the config you want agent to run with
+`agent run [OPTIONS] BRANCH`: running agent on specific branch
 
+You can also run the following command to know more information
 ```bash
-python baselines/run_aider.py
+agent -h
+agent config -h
+agent run -h
 ```
+## Configure Agent
+Here are all configs you can choose when you run `agent config [OPTIONS] AGENT_NAME`
+
+`--agent_name: str`: Agent to use, we only support [aider](https://aider.chat/) for now. [Default: `aider`]
+`--model-name: str`: Model to use, check [here](https://aider.chat/docs/llms.html) for more information. [Default: `claude-3-5-sonnet-20240620`]
+`--use-user-prompt: bool`: Use the user prompt instead of the default prompt. [Default: `False`]
+`--user-prompt: str`: The prompt sent to agent. [Default: Refer to code.]
+`--run-tests: bool`: Run the tests after the agent modified the code to get feedback. [Default `False`]
+`--max-iteration: int`: Maximum number of iterations for agent to run. [Default: `3`]
+`--use-repo-info: bool`: Use the repository information. [Default: `False`]
+`--max-repo-info-length: int`: Maximum length of the repository information to use. [Default: `10000`]
+`--use-unit-tests-info: bool`: Use the unit tests information. [Default: `False`]
+`--max-unit-tests-info-length: int`: Maximum length of the unit tests information to use. [Default: `10000`]
+`--use-spec-info: bool`: Use the spec information. [Default: `False`]
+`--max-spec-info-length: int`: Maximum length of the spec information to use. [Default: `10000`]
+`--use-lint-info: bool`: Use the lint information. [Default: `False`]
+`--max-lint-info-length: int`: Maximum length of the lint information to use. [Default: `10000`]
+`--pre-commit-config-path: str`: Path to the pre-commit config file. [Default: `.pre-commit-config.yaml`]
+`--agent-config-file: str`: Path to write the agent config. [Default: `.agent.yaml`]
+
+## Running Agent
+Here are all configs you can choose when you run `agent run [OPTIONS] BRANCH`
+
+`--branch: str`: Branch to run the agent on, you can specific the name of the branch
+`--backend: str`: Test backend to run the agent on, ignore this option if you are not adding `run_tests` option to agent. [Default: `modal`]
+`--log-dir: str`: Log directory to store the logs. [Default: `logs/aider`]
+`--max-parallel-repos: int`: Maximum number of repositories for agent to run in parallel. Running in sequential if set to 1. [Default: `1`]
+`--display-repo-progress-num: int`: Number of repo progress displayed when running. [Default: `5`]
+
+
+### Agent Example: aider
+Step 1: `agent config aider`
+Step 2: `agent run aider_branch`
+
+### Other Agent:
+Refer to `class Agents` in `agent/agents.py`. You can design your own agent by inheriting `Agents` class and implement the `run` method.
+
+## Notes
+
+### Automatically retry
+Please refer to [here](https://github.com/paul-gauthier/aider/blob/75e1d519da9b328b0eca8a73ee27278f1289eadb/aider/sendchat.py#L17) for the type fo API error that aider will automatically retry.
+
+### Large files in repo
+Currently, agent will skip file with more than 1500 lines.(check `agent/agent_utils.py#L199`)
 
-## Config
-
-`commit0_config`:
-
-- `base_dir`: Repos dir. Default `repos`.
-- `dataset_name`: commit0 HF dataset name. Default: `wentingzhao/commit0_docstring`.
-- `dataset_split`: commit0 dataset split. Default: `test`.
-- `repo_split`: commit0 repo split. Default: `simpy`.
-- `num_workers`: number of workers to run in parallel. Default: `10`.
-
-`aider_config`:
-
-- `llm_name`: LLM model name. Default: `claude-3-5-sonnet-20240620`.
-- `use_user_prompt`: Whether to use user prompt. Default: `false`.
-- `user_prompt`: User prompt. Default: `""`.
-- `use_repo_info`: Whether to use repo info. Default: `false`.
-  - Repo info
-  - skeleton of the repo(filenames under each dir)
-  - function stubs
-
-- `use_unit_tests_info`: Whether to use unit tests: unit_tests that target will be tested with. Default: `false`.
-- `use_reference_info`: Whether to use reference: reference doc/pdf/website. Default: `false`.
-- `use_lint_info`: Whether to use lint: lint info. Default: `false`.
-- `pre_commit_config_path`: Path to pre-commit config. Default: `.pre-commit-config.yaml`.
-- `run_tests`: Whether to run tests. Default: `true`.
-- `max_repo_info_length`: Max length of repo info. Default: `10000`.
-- `max_unit_tests_info_length`: Max length of unit tests info. Default: `10000`.
-- `max_reference_info_length`: Max length of reference info. Default: `10000`.
-- `max_lint_info_length`: Max length of lint info. Default: `10000`.
-
-
-
-
-
-
-
-
-
-
-Error Section
-
-
-
-Running the agent
-
-Run with Tmux!
-
-process_max_worker set to 3....
-
-currently not handling file with more than 1500 lines...
\ No newline at end of file
diff --git a/agent/cli.py b/agent/cli.py
index 5e17ea3..f14ba05 100644
--- a/agent/cli.py
+++ b/agent/cli.py
@@ -1,7 +1,7 @@
 import typer
 from agent.run_agent_no_rich import run_agent as run_agent_no_rich
 from agent.run_agent import run_agent
-from commit0.harness.constants import RUN_AIDER_LOG_DIR
+from commit0.harness.constants import RUN_AGENT_LOG_DIR
 import subprocess
 from agent.agent_utils import write_agent_config
 
@@ -135,11 +135,7 @@ def config(
     """Configure the agent."""
     if agent_name == "aider":
         check_aider_path()
-    else:
-        raise typer.BadParameter(
-            f"Invalid {highlight('AGENT', Colors.RED)}. We only support aider for now",
-            param_hint="AGENT",
-        )
+
     if use_user_prompt:
         user_prompt = typer.prompt("Please enter your user prompt")
 
@@ -183,7 +179,7 @@ def run(
         help="Path to the agent config file",
     ),
     log_dir: str = typer.Option(
-        str(RUN_AIDER_LOG_DIR.resolve()),
+        str(RUN_AGENT_LOG_DIR.resolve()),
         help="Log directory to store the logs",
     ),
     max_parallel_repos: int = typer.Option(
@@ -226,7 +222,7 @@ def run_test_no_rich(
         help="Path to the agent config file",
     ),
     log_dir: str = typer.Option(
-        str(RUN_AIDER_LOG_DIR.resolve()),
+        str(RUN_AGENT_LOG_DIR.resolve()),
         help="Log directory to store the logs",
     ),
     max_parallel_repos: int = typer.Option(
diff --git a/agent/display.py b/agent/display.py
index e382ce2..ff36c6d 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -120,9 +120,9 @@ def __init__(self, total_repos: int):
         self.layout["progress"]["pbar"].update(
             Panel(self.overall_progress, title="Overall Progress", border_style="blue")
         )
-        self.time_display = Text("Time Spent So Far: 0s", justify="center")
+        self.time_display = Text("Time Taken So Far: 0s", justify="center")
         self.layout["progress"]["time"].update(
-            Panel(self.time_display, title="$$$$", border_style="blue")
+            Panel(self.time_display, title="Time", border_style="blue")
         )
         self.money_display = Text("Money Spent So Far: $0.00", justify="center")
         self.layout["progress"]["money"].update(
@@ -373,7 +373,7 @@ def __exit__(
         print("\nSummary of Repository Processing:")
         print("-" * 80)
         print(
-            f"{'Repository':<30} {'Time Spent':<15} {'Files Processed':<20} {'Money Spent':<15}"
+            f"{'Repository':<30} {'Time Taken':<15} {'Files Processed':<20} {'Money Spent':<15}"
         )
         print("-" * 80)
         total_files = 0
diff --git a/agent/run_agent.py b/agent/run_agent.py
index 758fd42..f77a85b 100644
--- a/agent/run_agent.py
+++ b/agent/run_agent.py
@@ -17,7 +17,7 @@
 from agent.class_types import AgentConfig
 from commit0.harness.constants import SPLIT
 from commit0.harness.get_pytest_ids import main as get_tests
-from commit0.harness.constants import RUN_AIDER_LOG_DIR, RepoInstance
+from commit0.harness.constants import RUN_AGENT_LOG_DIR, RepoInstance
 from commit0.cli import read_commit0_dot_file
 from pathlib import Path
 from datetime import datetime
@@ -51,7 +51,7 @@ def run_agent_for_repo(
     branch: Optional[str] = None,
     override_previous_changes: bool = False,
     backend: str = "modal",
-    log_dir: str = str(RUN_AIDER_LOG_DIR.resolve()),
+    log_dir: str = str(RUN_AGENT_LOG_DIR.resolve()),
 ) -> None:
     """Run Aider for a given repository."""
     # get repo info
diff --git a/agent/run_agent_no_rich.py b/agent/run_agent_no_rich.py
index 0b76025..0b6879d 100644
--- a/agent/run_agent_no_rich.py
+++ b/agent/run_agent_no_rich.py
@@ -18,7 +18,7 @@
 from agent.class_types import AgentConfig
 from commit0.harness.constants import SPLIT
 from commit0.harness.get_pytest_ids import main as get_tests
-from commit0.harness.constants import RUN_AIDER_LOG_DIR, RepoInstance
+from commit0.harness.constants import RUN_AGENT_LOG_DIR, RepoInstance
 from commit0.cli import read_commit0_dot_file
 from pathlib import Path
 from datetime import datetime
@@ -48,7 +48,7 @@ def run_agent_for_repo(
     branch: Optional[str] = None,
     override_previous_changes: bool = False,
     backend: str = "modal",
-    log_dir: str = str(RUN_AIDER_LOG_DIR.resolve()),
+    log_dir: str = str(RUN_AGENT_LOG_DIR.resolve()),
 ) -> None:
     """Run Aider for a given repository."""
     # get repo info

From a68f94e70ee42283f14fb3c3f94e66c33f607752 Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Wed, 25 Sep 2024 14:30:04 -0400
Subject: [PATCH 15/16] fix test bugs

---
 commit0/harness/constants.py      | 2 +-
 commit0/harness/run_pytest_ids.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/commit0/harness/constants.py b/commit0/harness/constants.py
index 1392c3d..0a15364 100644
--- a/commit0/harness/constants.py
+++ b/commit0/harness/constants.py
@@ -23,7 +23,7 @@ class Files(TypedDict):
 BASE_IMAGE_BUILD_DIR = Path("logs/build_images/base")
 REPO_IMAGE_BUILD_DIR = Path("logs/build_images/repo")
 RUN_PYTEST_LOG_DIR = Path("logs/pytest")
-RUN_AIDER_LOG_DIR = Path("logs/aider")
+RUN_AGENT_LOG_DIR = Path("logs/agent")
 
 # Constants - Test Types, Statuses, Commands
 FAIL_TO_PASS = "FAIL_TO_PASS"
diff --git a/commit0/harness/run_pytest_ids.py b/commit0/harness/run_pytest_ids.py
index 0863796..657ac82 100644
--- a/commit0/harness/run_pytest_ids.py
+++ b/commit0/harness/run_pytest_ids.py
@@ -71,7 +71,7 @@ def main(
     try:
         local_repo = git.Repo(repo_or_repo_dir)
         logger.info(f"Loaded a git repo from {repo_or_repo_dir}")
-    except git.exc.NoSuchPathError:  # type: ignore
+    except (git.exc.NoSuchPathError, git.exc.InvalidGitRepositoryError):  # type: ignore
         repo_dir = os.path.join(base_dir, repo_name)
         logger.error(f"{repo_or_repo_dir} is not a git dir, trying {repo_dir} again")
         try:

From 3e55151bba344e9884db0106985002ae73449d93 Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Wed, 25 Sep 2024 14:32:30 -0400
Subject: [PATCH 16/16] added description for stdin for test

---
 commit0/cli.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/commit0/cli.py b/commit0/cli.py
index 896465b..f0afb54 100644
--- a/commit0/cli.py
+++ b/commit0/cli.py
@@ -239,7 +239,11 @@ def test(
         help="Set this to 2 for more logging information",
         count=True,
     ),
-    stdin: bool = typer.Option(False, "--stdin", help="Read test names from stdin"),
+    stdin: bool = typer.Option(
+        False,
+        "--stdin",
+        help="Read test names from stdin. Example: `echo 'test_mod.py' | commit0 test REPO --branch BRANCH`",
+    ),
 ) -> None:
     """Run tests on a Commit0 repository."""
     check_commit0_path()