Skip to content

Commit

Permalink
Enable users to copy both files and directories
Browse files Browse the repository at this point in the history
  • Loading branch information
mmbugua committed Jan 30, 2023
1 parent 39326fe commit d6a8eba
Show file tree
Hide file tree
Showing 3 changed files with 347 additions and 13 deletions.
233 changes: 232 additions & 1 deletion jupyter_server/services/contents/filemanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
import errno
import math
import mimetypes
import os
import shutil
import stat
import subprocess
import sys
import warnings
from datetime import datetime
Expand All @@ -25,7 +27,7 @@

from .filecheckpoints import AsyncFileCheckpoints, FileCheckpoints
from .fileio import AsyncFileManagerMixin, FileManagerMixin
from .manager import AsyncContentsManager, ContentsManager
from .manager import AsyncContentsManager, ContentsManager, copy_pat

try:
from os.path import samefile
Expand Down Expand Up @@ -602,6 +604,119 @@ def get_kernel_path(self, path, model=None):
parent_dir = ""
return parent_dir

def copy(self, from_path: str, to_path=None):
"""
Copy an existing file or directory and return its new model.
If to_path not specified, it will be the parent directory of from_path.
If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
from_path must be a full path to a file or directory.
"""
to_path_original = str(to_path)
path = from_path.strip("/")
if to_path is not None:
to_path = to_path.strip("/")

if "/" in path:
from_dir, from_name = path.rsplit("/", 1)
else:
from_dir = ""
from_name = path

model = self.get(path)
# limit the size of folders being copied to prevent a timeout error
if model["type"] == "directory":
self.check_folder_size(path)
else:
# let the super class handle copying files
return super().copy(from_path=from_path, to_path=to_path)

is_destination_specified = to_path is not None
to_name = copy_pat.sub(".", from_name)
if not is_destination_specified:
to_path = from_dir
if self.dir_exists(to_path):
name = copy_pat.sub(".", from_name)
to_name = super().increment_filename(name, to_path, insert="-Copy")
to_path = f"{to_path}/{to_name}"

return self._copy_dir(
from_path=from_path,
to_path_original=to_path_original,
to_name=to_name,
to_path=to_path,
)

def _copy_dir(self, from_path: str, to_path_original: str, to_name: str, to_path: str):
"""
handles copying directories
returns the model for the copied directory
"""
try:
os_from_path = self._get_os_path(from_path.strip("/"))
os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
shutil.copytree(os_from_path, os_to_path)
model = self.get(to_path, content=False)
except OSError as err:
self.log.error(f"OSError in _copy_dir: {err}")
raise web.HTTPError(
400,
f"Can't copy '{from_path}' into Folder '{to_path}'",
) from err

return model

def check_folder_size(self, path: str):
"""
limit the size of folders being copied to prevent a timeout error
"""
limit_mb = 100
limit_str = f"{limit_mb}MB"
limit_bytes = limit_mb * 1024 * 1024
size = int(self._get_dir_size(self._get_os_path(path)))
if size > limit_bytes:
raise web.HTTPError(
400,
f"""
Can't copy folders larger than {limit_str},
"{path}" is {self._human_readable_size(size)}
""",
)

def _get_dir_size(self, path: str = "."):
"""
calls the command line program du to get the directory size
"""
try:
result = subprocess.run(
["du", "-s", "--block-size=1", path], capture_output=True
).stdout.split()
self.log.info(f"current status of du command {result}")
size = result[0].decode("utf-8")
except Exception as err:
self.log.error(f"Error during directory copy: {err}")
raise web.HTTPError(
400,
f"""
Unexpected error during copy operation,
not able to get the size of the {path} directory
""",
) from err
return size

def _human_readable_size(self, size: int):
"""
returns folder size in a human readable format
"""
if size == 0:
return "0 Bytes"

units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
order = int(math.log2(size) / 10) if size else 0

return "{:.4g} {}".format(size / (1 << (order * 10)), units[order])


class AsyncFileContentsManager(FileContentsManager, AsyncFileManagerMixin, AsyncContentsManager):
"""An async file contents manager."""
Expand Down Expand Up @@ -959,3 +1074,119 @@ async def get_kernel_path(self, path, model=None):
else:
parent_dir = ""
return parent_dir

async def copy(self, from_path: str, to_path=None) -> dict:
"""
Copy an existing file or directory and return its new model.
If to_path not specified, it will be the parent directory of from_path.
If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
from_path must be a full path to a file or directory.
"""
to_path_original = str(to_path)
path = from_path.strip("/")
if to_path is not None:
to_path = to_path.strip("/")

if "/" in path:
from_dir, from_name = path.rsplit("/", 1)
else:
from_dir = ""
from_name = path

model = await self.get(path)
# limit the size of folders being copied to prevent a timeout error
if model["type"] == "directory":
await self.check_folder_size(path)
else:
# let the super class handle copying files
return await AsyncContentsManager.copy(self, from_path=from_path, to_path=to_path)

is_destination_specified = to_path is not None
to_name = copy_pat.sub(".", from_name)
if not is_destination_specified:
to_path = from_dir
if await self.dir_exists(to_path):
name = copy_pat.sub(".", from_name)
to_name = await super().increment_filename(name, to_path, insert="-Copy")
to_path = f"{to_path}/{to_name}"

return await self._copy_dir(
from_path=from_path,
to_path_original=to_path_original,
to_name=to_name,
to_path=to_path,
)

async def _copy_dir(
self, from_path: str, to_path_original: str, to_name: str, to_path: str
) -> dict:
"""
handles copying directories
returns the model for the copied directory
"""
try:
os_from_path = self._get_os_path(from_path.strip("/"))
os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
shutil.copytree(os_from_path, os_to_path)
model = await self.get(to_path, content=False)
except OSError as err:
self.log.error(f"OSError in _copy_dir: {err}")
raise web.HTTPError(
400,
f"Can't copy '{from_path}' into read-only Folder '{to_path}'",
) from err

return model

async def check_folder_size(self, path: str) -> None:
"""
limit the size of folders being copied to prevent a timeout error
"""
limit_mb = 100
limit_str = f"{limit_mb}MB"
limit_bytes = limit_mb * 1024 * 1024
size = int(await self._get_dir_size(self._get_os_path(path)))
if size > limit_bytes:
raise web.HTTPError(
400,
f"""
Can't copy folders larger than {limit_str},
"{path}" is {await self._human_readable_size(size)}
""",
)

async def _get_dir_size(self, path: str = ".") -> str:
"""
calls the command line program du to get the directory size
"""
try:
result = subprocess.run(
["du", "-s", "--block-size=1", path], capture_output=True
).stdout.split()
self.log.info(f"current status of du command {result}")
size = result[0].decode("utf-8")
except Exception as err:
self.log.error(f"Error during directory copy: {err}")
raise web.HTTPError(
400,
f"""
Unexpected error during copy operation,
not able to get the size of the {path} directory
""",
) from err
return size

async def _human_readable_size(self, size: int) -> str:
"""
returns folder size in a human readable format
"""
if size == 0:
return "0 Bytes"

units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
order = int(math.log2(size) / 10) if size else 0

return "{:.4g} {}".format(size / (1 << (order * 10)), units[order])
33 changes: 21 additions & 12 deletions tests/services/contents/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,27 @@ async def test_copy(jp_fetch, contents, contents_dir, _check_created):
_check_created(r, str(contents_dir), path, copy3, type="notebook")


async def test_copy_dir(jp_fetch, contents, contents_dir, _check_created):
# created a nest copy of a the original folder
dest_dir = "foo"
path = "parent"
response = await jp_fetch(
"api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir})
)

_check_created(response, str(contents_dir), path, dest_dir, type="directory")

# copy to a folder where a similar name exists
dest_dir = "foo"
path = "parent"
copy_dir = f"{dest_dir}-Copy1"
response = await jp_fetch(
"api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir})
)

_check_created(response, str(contents_dir), path, copy_dir, type="directory")


async def test_copy_path(jp_fetch, contents, contents_dir, _check_created):
path1 = "foo"
path2 = "å b"
Expand Down Expand Up @@ -577,18 +598,6 @@ async def test_copy_put_400_hidden(
assert expected_http_error(e, 400)


async def test_copy_dir_400(jp_fetch, contents, contents_dir, _check_created):
with pytest.raises(tornado.httpclient.HTTPClientError) as e:
await jp_fetch(
"api",
"contents",
"foo",
method="POST",
body=json.dumps({"copy_from": "å b"}),
)
assert expected_http_error(e, 400)


@pytest.mark.skipif(sys.platform == "win32", reason="Disabled copying hidden files on Windows")
async def test_copy_400_hidden(
jp_fetch,
Expand Down
Loading

0 comments on commit d6a8eba

Please sign in to comment.