-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(sandbox): Candidate Implementation of Sandbox Plugin to Support Jupyter #1255
Changes from all commits
7c76759
ba07415
fdc5109
1ada575
9bed95f
abf0fbb
bbac1a6
18cf4ba
b7f2f60
cf33615
3908ae7
fadb609
d3c829c
9481c7b
456d9cb
0d788ef
a436c8b
bac492a
3f5030d
a7b4f2e
6be6ac8
c1a4e56
76b5cfa
da1b3ba
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,8 @@ | |
import sys | ||
import time | ||
import uuid | ||
import tarfile | ||
from glob import glob | ||
from collections import namedtuple | ||
from typing import Dict, List, Tuple | ||
|
||
|
@@ -122,6 +124,37 @@ def run_command(container, command): | |
return -1, f'Command: "{cmd}" timed out' | ||
return exit_code, logs.decode('utf-8') | ||
|
||
def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Code duplication from |
||
# mkdir -p sandbox_dest if it doesn't exist | ||
exit_code, logs = self.container.exec_run( | ||
['/bin/bash', '-c', f'mkdir -p {sandbox_dest}'], | ||
workdir=SANDBOX_WORKSPACE_DIR, | ||
) | ||
if exit_code != 0: | ||
raise Exception( | ||
f'Failed to create directory {sandbox_dest} in sandbox: {logs}') | ||
|
||
if recursive: | ||
assert os.path.isdir(host_src), 'Source must be a directory when recursive is True' | ||
files = glob(host_src + '/**/*', recursive=True) | ||
srcname = os.path.basename(host_src) | ||
tar_filename = os.path.join(os.path.dirname(host_src), srcname + '.tar') | ||
with tarfile.open(tar_filename, mode='w') as tar: | ||
for file in files: | ||
tar.add(file, arcname=os.path.relpath(file, os.path.dirname(host_src))) | ||
else: | ||
assert os.path.isfile(host_src), 'Source must be a file when recursive is False' | ||
srcname = os.path.basename(host_src) | ||
tar_filename = os.path.join(os.path.dirname(host_src), srcname + '.tar') | ||
with tarfile.open(tar_filename, mode='w') as tar: | ||
tar.add(host_src, arcname=srcname) | ||
|
||
with open(tar_filename, 'rb') as f: | ||
data = f.read() | ||
|
||
self.container.put_archive(os.path.dirname(sandbox_dest), data) | ||
os.remove(tar_filename) | ||
|
||
def execute_in_background(self, cmd: str) -> Process: | ||
result = self.container.exec_run( | ||
self.get_exec_cmd(cmd), socket=True, workdir=SANDBOX_WORKSPACE_DIR | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,8 @@ | |
import sys | ||
import time | ||
import uuid | ||
import tarfile | ||
from glob import glob | ||
from collections import namedtuple | ||
from typing import Dict, List, Tuple, Union | ||
|
||
|
@@ -15,6 +17,7 @@ | |
from opendevin.sandbox.sandbox import Sandbox | ||
from opendevin.sandbox.process import Process | ||
from opendevin.sandbox.docker.process import DockerProcess | ||
from opendevin.sandbox.plugins.jupyter import JupyterRequirement | ||
xingyaoww marked this conversation as resolved.
Show resolved
Hide resolved
|
||
from opendevin.schema import ConfigType | ||
from opendevin.utils import find_available_tcp_port | ||
from opendevin.exceptions import SandboxInvalidBackgroundCommandError | ||
|
@@ -58,10 +61,10 @@ class DockerSSHBox(Sandbox): | |
background_commands: Dict[int, Process] = {} | ||
|
||
def __init__( | ||
self, | ||
container_image: str | None = None, | ||
timeout: int = 120, | ||
sid: str | None = None, | ||
self, | ||
container_image: str | None = None, | ||
timeout: int = 120, | ||
sid: str | None = None, | ||
): | ||
# Initialize docker client. Throws an exception if Docker is not reachable. | ||
try: | ||
|
@@ -137,6 +140,22 @@ def setup_user(self): | |
) | ||
if exit_code != 0: | ||
raise Exception(f'Failed to set password in sandbox: {logs}') | ||
|
||
# chown the home directory | ||
exit_code, logs = self.container.exec_run( | ||
['/bin/bash', '-c', 'chown opendevin:root /home/opendevin'], | ||
workdir=SANDBOX_WORKSPACE_DIR, | ||
) | ||
if exit_code != 0: | ||
raise Exception( | ||
f'Failed to chown home directory for opendevin in sandbox: {logs}') | ||
Comment on lines
+144
to
+151
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we want to pull this out in to the core Sandbox implementation, as (TBH there's probably a lot of logic we could deduplicate between SSH, Exec, and Local) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree! I was also thinking heavily about this while implementing this, maybe we should make a BaseClass to keep all the common logic (including this one!) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. well... just realized this might just work on There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm good point. I could see putting this logic in the base class, and then having a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess once we deprecate |
||
exit_code, logs = self.container.exec_run( | ||
['/bin/bash', '-c', f'chown opendevin:root {SANDBOX_WORKSPACE_DIR}'], | ||
workdir=SANDBOX_WORKSPACE_DIR, | ||
) | ||
if exit_code != 0: | ||
raise Exception( | ||
f'Failed to chown workspace directory for opendevin in sandbox: {logs}') | ||
else: | ||
exit_code, logs = self.container.exec_run( | ||
# change password for root | ||
|
@@ -208,6 +227,37 @@ def execute(self, cmd: str) -> Tuple[int, str]: | |
exit_code = int(exit_code.lstrip('echo $?').strip()) | ||
return exit_code, command_output | ||
|
||
def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False): | ||
# mkdir -p sandbox_dest if it doesn't exist | ||
exit_code, logs = self.container.exec_run( | ||
['/bin/bash', '-c', f'mkdir -p {sandbox_dest}'], | ||
workdir=SANDBOX_WORKSPACE_DIR, | ||
) | ||
if exit_code != 0: | ||
raise Exception( | ||
f'Failed to create directory {sandbox_dest} in sandbox: {logs}') | ||
|
||
if recursive: | ||
assert os.path.isdir(host_src), 'Source must be a directory when recursive is True' | ||
files = glob(host_src + '/**/*', recursive=True) | ||
srcname = os.path.basename(host_src) | ||
tar_filename = os.path.join(os.path.dirname(host_src), srcname + '.tar') | ||
with tarfile.open(tar_filename, mode='w') as tar: | ||
for file in files: | ||
tar.add(file, arcname=os.path.relpath(file, os.path.dirname(host_src))) | ||
else: | ||
assert os.path.isfile(host_src), 'Source must be a file when recursive is False' | ||
srcname = os.path.basename(host_src) | ||
tar_filename = os.path.join(os.path.dirname(host_src), srcname + '.tar') | ||
with tarfile.open(tar_filename, mode='w') as tar: | ||
tar.add(host_src, arcname=srcname) | ||
|
||
with open(tar_filename, 'rb') as f: | ||
data = f.read() | ||
|
||
self.container.put_archive(os.path.dirname(sandbox_dest), data) | ||
os.remove(tar_filename) | ||
|
||
def execute_in_background(self, cmd: str) -> Process: | ||
result = self.container.exec_run( | ||
self.get_exec_cmd(cmd), socket=True, workdir=SANDBOX_WORKSPACE_DIR | ||
|
@@ -307,6 +357,11 @@ def restart_docker_container(self): | |
'bind': SANDBOX_WORKSPACE_DIR, | ||
'mode': 'rw' | ||
}, | ||
# mount cache directory to /home/opendevin/.cache for pip cache reuse | ||
config.get('CACHE_DIR'): { | ||
'bind': '/home/opendevin/.cache' if RUN_AS_DEVIN else '/root/.cache', | ||
'mode': 'rw' | ||
}, | ||
}, | ||
) | ||
logger.info('Container started') | ||
|
@@ -355,8 +410,11 @@ def close(self): | |
logger.info( | ||
"Interactive Docker container started. Type 'exit' or use Ctrl+C to exit.") | ||
|
||
# Initialize required plugins | ||
ssh_box.init_plugins([JupyterRequirement()]) | ||
|
||
bg_cmd = ssh_box.execute_in_background( | ||
"while true; do echo 'dot ' && sleep 1; done" | ||
"while true; do echo 'dot ' && sleep 10; done" | ||
) | ||
|
||
sys.stdout.flush() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,6 +61,10 @@ def execute(self, cmd: str) -> Tuple[int, str]: | |
assert process_output.exit_code is not None | ||
return process_output.exit_code, logs_str | ||
|
||
def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False): | ||
# FIXME | ||
raise NotImplementedError('Copying files to E2B sandbox is not implemented yet') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess this shouldn't be so hard to implement for E2B since we already have There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @xingyaoww yes, shouldn't be hard. It's possible to upload files (up to 100MB) to the sandbox or one can just write to a file There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @rbren let me quickly implement this before merging |
||
|
||
def execute_in_background(self, cmd: str) -> Process: | ||
process = self.sandbox.process.start(cmd) | ||
e2b_process = E2BProcess(process, cmd) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from .mixin import PluginMixin | ||
from .requirement import PluginRequirement | ||
|
||
# Requirements | ||
from .jupyter import JupyterRequirement | ||
|
||
__all__ = ['PluginMixin', 'PluginRequirement', 'JupyterRequirement'] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import os | ||
from dataclasses import dataclass | ||
from opendevin.sandbox.plugins.requirement import PluginRequirement | ||
|
||
|
||
@dataclass | ||
class JupyterRequirement(PluginRequirement): | ||
name: str = 'jupyter' | ||
host_src: str = os.path.dirname(os.path.abspath(__file__)) # The directory of this file (sandbox/plugins/jupyter) | ||
sandbox_dest: str = '/opendevin/plugins/jupyter' | ||
bash_script_path: str = 'setup.sh' |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#!/usr/bin/env python3 | ||
import os | ||
import sys | ||
import time | ||
import requests | ||
|
||
# Read the Python code from STDIN | ||
code = sys.stdin.read() | ||
|
||
# Set the default kernel ID | ||
kernel_id = 'default' | ||
|
||
# try 5 times until success | ||
PORT = os.environ.get('JUPYTER_EXEC_SERVER_PORT') | ||
POST_URL = f'http://localhost:{PORT}/execute' | ||
|
||
for i in range(5): | ||
response = requests.post(POST_URL, json={'kernel_id': kernel_id, 'code': code}) | ||
# if "500: Internal Server Error" is not in the response, break the loop | ||
if '500: Internal Server Error' not in response.text: | ||
break | ||
time.sleep(1) | ||
|
||
# Print the response | ||
print(str(response.text)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not entirely certain of the purpose of cache dir--do we still need this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need this (at least for now) to cache the
pip install
commands in Jupyter requirement dependency - without this, you need to re-download thewhl
for every sandbox initialization, which can be way too slow. I still felt it is a bit slow with cache (you still need to spend the CPU hours)... we probably need to figure out a way to cache it in the future further (e.g., save the docker image with the requirement installed and re-use it as much as possible)