All-Hands-AI · xingyaoww · Apr 23, 2024 · Apr 20, 2024 · Apr 20, 2024 · Apr 20, 2024
diff --git a/Makefile b/Makefile
@@ -51,7 +51,7 @@ check-system:
 		echo "$(RED)Unsupported system detected. Please use macOS, Linux, or Windows Subsystem for Linux (WSL).$(RESET)"; \
 		exit 1; \
 	fi
-		
+
 check-python:
 	@echo "$(YELLOW)Checking Python installation...$(RESET)"
 	@if command -v python3.11 > /dev/null; then \
@@ -218,6 +218,12 @@ setup-config-prompts:
 	 workspace_dir=$${workspace_dir:-$(DEFAULT_WORKSPACE_DIR)}; \
 	 echo "WORKSPACE_BASE=\"$$workspace_dir\"" >> $(CONFIG_FILE).tmp
 
+# Clean up all caches
+clean:
+	@echo "$(YELLOW)Cleaning up caches...$(RESET)"
+	@rm -rf opendevin/.cache
+	@echo "$(GREEN)Caches cleaned up successfully.$(RESET)"
+
 # Help
 help:
 	@echo "$(BLUE)Usage: make [target]$(RESET)"

diff --git a/agenthub/codeact_agent/codeact_agent.py b/agenthub/codeact_agent/codeact_agent.py
@@ -15,6 +15,7 @@
 )
 from opendevin.parse_commands import parse_command_file
 from opendevin.state import State
+from opendevin.sandbox.plugins import PluginRequirement, JupyterRequirement
 
 COMMAND_DOCS = parse_command_file()
 COMMAND_SEGMENT = (
@@ -69,6 +70,8 @@ class CodeActAgent(Agent):
     The agent works by passing the model a list of action-observation pairs and prompting the model to take the next step.
     """
 
+    sandbox_plugins: List[PluginRequirement] = [JupyterRequirement()]
+
     def __init__(
         self,
         llm: LLM,

diff --git a/opendevin/agent.py b/opendevin/agent.py
@@ -6,6 +6,7 @@
     from opendevin.state import State
 from opendevin.llm.llm import LLM
 from opendevin.exceptions import AgentAlreadyRegisteredError, AgentNotRegisteredError
+from opendevin.sandbox.plugins import PluginRequirement
 
 
 class Agent(ABC):
@@ -17,6 +18,7 @@ class Agent(ABC):
     """
 
     _registry: Dict[str, Type['Agent']] = {}
+    sandbox_plugins: List[PluginRequirement] = []
 
     def __init__(
             self,

diff --git a/opendevin/config.py b/opendevin/config.py
@@ -1,7 +1,7 @@
 import os
-
 import argparse
 import toml
+import pathlib
 from dotenv import load_dotenv
 
 from opendevin.schema import ConfigType
@@ -18,6 +18,7 @@
     ConfigType.WORKSPACE_MOUNT_PATH: None,
     ConfigType.WORKSPACE_MOUNT_PATH_IN_SANDBOX: '/workspace',
     ConfigType.WORKSPACE_MOUNT_REWRITE: None,
+    ConfigType.CACHE_DIR: os.path.join(os.path.dirname(os.path.abspath(__file__)), '.cache'),
     ConfigType.LLM_MODEL: 'gpt-3.5-turbo-1106',
     ConfigType.SANDBOX_CONTAINER_IMAGE: 'ghcr.io/opendevin/sandbox',
     ConfigType.RUN_AS_DEVIN: 'true',
@@ -145,3 +146,8 @@ def get(key: str, required: bool = False):
     if not value and required:
         raise KeyError(f"Please set '{key}' in `config.toml` or `.env`.")
     return value
+
+
+_cache_dir = config.get('CACHE_DIR')
+if _cache_dir:
+    pathlib.Path(_cache_dir).mkdir(parents=True, exist_ok=True)
diff --git a/opendevin/controller/action_manager.py b/opendevin/controller/action_manager.py
@@ -14,6 +14,7 @@
     AgentErrorObservation,
     NullObservation,
 )
+from opendevin.sandbox.plugins import PluginRequirement
 
 
 class ActionManager:
@@ -41,6 +42,9 @@ def __init__(
         else:
             raise ValueError(f'Invalid sandbox type: {sandbox_type}')
 
+    def init_sandbox_plugins(self, plugins: List[PluginRequirement]):
+        self.sandbox.init_plugins(plugins)
+
     async def run_action(self, action: Action, agent_controller) -> Observation:
         observation: Observation = NullObservation('')
         if not action.executable:

diff --git a/opendevin/controller/agent_controller.py b/opendevin/controller/agent_controller.py
@@ -53,6 +53,8 @@ def __init__(
         self.action_manager = ActionManager(self.id, container_image)
         self.max_chars = max_chars
         self.callbacks = callbacks
+        # Initialize agent-required plugins for sandbox (if any)
+        self.action_manager.init_sandbox_plugins(agent.sandbox_plugins)
 
     def update_state_for_step(self, i):
         if self.state is None:

diff --git a/opendevin/sandbox/docker/exec_box.py b/opendevin/sandbox/docker/exec_box.py
@@ -4,6 +4,8 @@
 import sys
 import time
 import uuid
+import tarfile
+from glob import glob
 from collections import namedtuple
 from typing import Dict, List, Tuple
 
@@ -122,6 +124,37 @@ def run_command(container, command):
                 return -1, f'Command: "{cmd}" timed out'
         return exit_code, logs.decode('utf-8')
 
+    def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
+        # mkdir -p sandbox_dest if it doesn't exist
+        exit_code, logs = self.container.exec_run(
+            ['/bin/bash', '-c', f'mkdir -p {sandbox_dest}'],
+            workdir=SANDBOX_WORKSPACE_DIR,
+        )
+        if exit_code != 0:
+            raise Exception(
+                f'Failed to create directory {sandbox_dest} in sandbox: {logs}')
+
+        if recursive:
+            assert os.path.isdir(host_src), 'Source must be a directory when recursive is True'
+            files = glob(host_src + '/**/*', recursive=True)
+            srcname = os.path.basename(host_src)
+            tar_filename = os.path.join(os.path.dirname(host_src), srcname + '.tar')
+            with tarfile.open(tar_filename, mode='w') as tar:
+                for file in files:
+                    tar.add(file, arcname=os.path.relpath(file, os.path.dirname(host_src)))
+        else:
+            assert os.path.isfile(host_src), 'Source must be a file when recursive is False'
+            srcname = os.path.basename(host_src)
+            tar_filename = os.path.join(os.path.dirname(host_src), srcname + '.tar')
+            with tarfile.open(tar_filename, mode='w') as tar:
+                tar.add(host_src, arcname=srcname)
+
+        with open(tar_filename, 'rb') as f:
+            data = f.read()
+
+        self.container.put_archive(os.path.dirname(sandbox_dest), data)
+        os.remove(tar_filename)
+
     def execute_in_background(self, cmd: str) -> Process:
         result = self.container.exec_run(
             self.get_exec_cmd(cmd), socket=True, workdir=SANDBOX_WORKSPACE_DIR

diff --git a/opendevin/sandbox/docker/local_box.py b/opendevin/sandbox/docker/local_box.py
@@ -39,6 +39,25 @@ def execute(self, cmd: str) -> Tuple[int, str]:
         except subprocess.TimeoutExpired:
             return -1, 'Command timed out'
 
+    def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
+        # mkdir -p sandbox_dest if it doesn't exist
+        res = subprocess.run(f'mkdir -p {sandbox_dest}', shell=True, text=True, cwd=config.get('WORKSPACE_BASE'))
+        if res.returncode != 0:
+            raise RuntimeError(f'Failed to create directory {sandbox_dest} in sandbox')
+
+        if recursive:
+            res = subprocess.run(
+                f'cp -r {host_src} {sandbox_dest}', shell=True, text=True, cwd=config.get('WORKSPACE_BASE')
+            )
+            if res.returncode != 0:
+                raise RuntimeError(f'Failed to copy {host_src} to {sandbox_dest} in sandbox')
+        else:
+            res = subprocess.run(
+                f'cp {host_src} {sandbox_dest}', shell=True, text=True, cwd=config.get('WORKSPACE_BASE')
+            )
+            if res.returncode != 0:
+                raise RuntimeError(f'Failed to copy {host_src} to {sandbox_dest} in sandbox')
+
     def execute_in_background(self, cmd: str) -> Process:
         process = subprocess.Popen(
             cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,

diff --git a/opendevin/sandbox/docker/ssh_box.py b/opendevin/sandbox/docker/ssh_box.py
@@ -4,6 +4,8 @@
 import sys
 import time
 import uuid
+import tarfile
+from glob import glob
 from collections import namedtuple
 from typing import Dict, List, Tuple, Union
 
@@ -15,6 +17,7 @@
 from opendevin.sandbox.sandbox import Sandbox
 from opendevin.sandbox.process import Process
 from opendevin.sandbox.docker.process import DockerProcess
+from opendevin.sandbox.plugins.jupyter import JupyterRequirement
 from opendevin.schema import ConfigType
 from opendevin.utils import find_available_tcp_port
 from opendevin.exceptions import SandboxInvalidBackgroundCommandError
@@ -58,10 +61,10 @@ class DockerSSHBox(Sandbox):
     background_commands: Dict[int, Process] = {}
 
     def __init__(
-            self,
-            container_image: str | None = None,
-            timeout: int = 120,
-            sid: str | None = None,
+        self,
+        container_image: str | None = None,
+        timeout: int = 120,
+        sid: str | None = None,
     ):
         # Initialize docker client. Throws an exception if Docker is not reachable.
         try:
@@ -137,6 +140,22 @@ def setup_user(self):
             )
             if exit_code != 0:
                 raise Exception(f'Failed to set password in sandbox: {logs}')
+
+            # chown the home directory
+            exit_code, logs = self.container.exec_run(
+                ['/bin/bash', '-c', 'chown opendevin:root /home/opendevin'],
+                workdir=SANDBOX_WORKSPACE_DIR,
+            )
+            if exit_code != 0:
+                raise Exception(
+                    f'Failed to chown home directory for opendevin in sandbox: {logs}')
+            exit_code, logs = self.container.exec_run(
+                ['/bin/bash', '-c', f'chown opendevin:root {SANDBOX_WORKSPACE_DIR}'],
+                workdir=SANDBOX_WORKSPACE_DIR,
+            )
+            if exit_code != 0:
+                raise Exception(
+                    f'Failed to chown workspace directory for opendevin in sandbox: {logs}')
         else:
             exit_code, logs = self.container.exec_run(
                 # change password for root
@@ -208,6 +227,37 @@ def execute(self, cmd: str) -> Tuple[int, str]:
         exit_code = int(exit_code.lstrip('echo $?').strip())
         return exit_code, command_output
 
+    def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
+        # mkdir -p sandbox_dest if it doesn't exist
+        exit_code, logs = self.container.exec_run(
+            ['/bin/bash', '-c', f'mkdir -p {sandbox_dest}'],
+            workdir=SANDBOX_WORKSPACE_DIR,
+        )
+        if exit_code != 0:
+            raise Exception(
+                f'Failed to create directory {sandbox_dest} in sandbox: {logs}')
+
+        if recursive:
+            assert os.path.isdir(host_src), 'Source must be a directory when recursive is True'
+            files = glob(host_src + '/**/*', recursive=True)
+            srcname = os.path.basename(host_src)
+            tar_filename = os.path.join(os.path.dirname(host_src), srcname + '.tar')
+            with tarfile.open(tar_filename, mode='w') as tar:
+                for file in files:
+                    tar.add(file, arcname=os.path.relpath(file, os.path.dirname(host_src)))
+        else:
+            assert os.path.isfile(host_src), 'Source must be a file when recursive is False'
+            srcname = os.path.basename(host_src)
+            tar_filename = os.path.join(os.path.dirname(host_src), srcname + '.tar')
+            with tarfile.open(tar_filename, mode='w') as tar:
+                tar.add(host_src, arcname=srcname)
+
+        with open(tar_filename, 'rb') as f:
+            data = f.read()
+
+        self.container.put_archive(os.path.dirname(sandbox_dest), data)
+        os.remove(tar_filename)
+
     def execute_in_background(self, cmd: str) -> Process:
         result = self.container.exec_run(
             self.get_exec_cmd(cmd), socket=True, workdir=SANDBOX_WORKSPACE_DIR
@@ -307,6 +357,11 @@ def restart_docker_container(self):
                         'bind': SANDBOX_WORKSPACE_DIR,
                         'mode': 'rw'
                     },
+                    # mount cache directory to /home/opendevin/.cache for pip cache reuse
+                    config.get('CACHE_DIR'): {
+                        'bind': '/home/opendevin/.cache' if RUN_AS_DEVIN else '/root/.cache',
+                        'mode': 'rw'
+                    },
                 },
             )
             logger.info('Container started')
@@ -355,8 +410,11 @@ def close(self):
     logger.info(
         "Interactive Docker container started. Type 'exit' or use Ctrl+C to exit.")
 
+    # Initialize required plugins
+    ssh_box.init_plugins([JupyterRequirement()])
+
     bg_cmd = ssh_box.execute_in_background(
-        "while true; do echo 'dot ' && sleep 1; done"
+        "while true; do echo 'dot ' && sleep 10; done"
     )
 
     sys.stdout.flush()

diff --git a/opendevin/sandbox/e2b/sandbox.py b/opendevin/sandbox/e2b/sandbox.py
@@ -61,6 +61,10 @@ def execute(self, cmd: str) -> Tuple[int, str]:
         assert process_output.exit_code is not None
         return process_output.exit_code, logs_str
 
+    def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
+        # FIXME
+        raise NotImplementedError('Copying files to E2B sandbox is not implemented yet')
+
     def execute_in_background(self, cmd: str) -> Process:
         process = self.sandbox.process.start(cmd)
         e2b_process = E2BProcess(process, cmd)

diff --git a/opendevin/sandbox/plugins/__init__.py b/opendevin/sandbox/plugins/__init__.py
@@ -0,0 +1,7 @@
+from .mixin import PluginMixin
+from .requirement import PluginRequirement
+
+# Requirements
+from .jupyter import JupyterRequirement
+
+__all__ = ['PluginMixin', 'PluginRequirement', 'JupyterRequirement']
diff --git a/opendevin/sandbox/plugins/jupyter/__init__.py b/opendevin/sandbox/plugins/jupyter/__init__.py
@@ -0,0 +1,11 @@
+import os
+from dataclasses import dataclass
+from opendevin.sandbox.plugins.requirement import PluginRequirement
+
+
+@dataclass
+class JupyterRequirement(PluginRequirement):
+    name: str = 'jupyter'
+    host_src: str = os.path.dirname(os.path.abspath(__file__))  # The directory of this file (sandbox/plugins/jupyter)
+    sandbox_dest: str = '/opendevin/plugins/jupyter'
+    bash_script_path: str = 'setup.sh'
diff --git a/opendevin/sandbox/plugins/jupyter/execute_cli b/opendevin/sandbox/plugins/jupyter/execute_cli
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+import os
+import sys
+import time
+import requests
+
+# Read the Python code from STDIN
+code = sys.stdin.read()
+
+# Set the default kernel ID
+kernel_id = 'default'
+
+# try 5 times until success
+PORT = os.environ.get('JUPYTER_EXEC_SERVER_PORT')
+POST_URL = f'http://localhost:{PORT}/execute'
+
+for i in range(5):
+    response = requests.post(POST_URL, json={'kernel_id': kernel_id, 'code': code})
+    # if "500: Internal Server Error" is not in the response, break the loop
+    if '500: Internal Server Error' not in response.text:
+        break
+    time.sleep(1)
+
+# Print the response
+print(str(response.text))