From d6a8eba35e49b3a085bfa3e2c6221aa3d3ecada7 Mon Sep 17 00:00:00 2001 From: mmbugua Date: Fri, 27 Jan 2023 09:59:27 -0800 Subject: [PATCH] Enable users to copy both files and directories --- .../services/contents/filemanager.py | 233 +++++++++++++++++- tests/services/contents/test_api.py | 33 ++- tests/services/contents/test_manager.py | 94 +++++++ 3 files changed, 347 insertions(+), 13 deletions(-) diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index de0117a558..8732a4cdb1 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -2,10 +2,12 @@ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. import errno +import math import mimetypes import os import shutil import stat +import subprocess import sys import warnings from datetime import datetime @@ -25,7 +27,7 @@ from .filecheckpoints import AsyncFileCheckpoints, FileCheckpoints from .fileio import AsyncFileManagerMixin, FileManagerMixin -from .manager import AsyncContentsManager, ContentsManager +from .manager import AsyncContentsManager, ContentsManager, copy_pat try: from os.path import samefile @@ -602,6 +604,119 @@ def get_kernel_path(self, path, model=None): parent_dir = "" return parent_dir + def copy(self, from_path: str, to_path=None): + """ + Copy an existing file or directory and return its new model. + If to_path not specified, it will be the parent directory of from_path. + If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`. + Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`. + For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. + from_path must be a full path to a file or directory. + """ + to_path_original = str(to_path) + path = from_path.strip("/") + if to_path is not None: + to_path = to_path.strip("/") + + if "/" in path: + from_dir, from_name = path.rsplit("/", 1) + else: + from_dir = "" + from_name = path + + model = self.get(path) + # limit the size of folders being copied to prevent a timeout error + if model["type"] == "directory": + self.check_folder_size(path) + else: + # let the super class handle copying files + return super().copy(from_path=from_path, to_path=to_path) + + is_destination_specified = to_path is not None + to_name = copy_pat.sub(".", from_name) + if not is_destination_specified: + to_path = from_dir + if self.dir_exists(to_path): + name = copy_pat.sub(".", from_name) + to_name = super().increment_filename(name, to_path, insert="-Copy") + to_path = f"{to_path}/{to_name}" + + return self._copy_dir( + from_path=from_path, + to_path_original=to_path_original, + to_name=to_name, + to_path=to_path, + ) + + def _copy_dir(self, from_path: str, to_path_original: str, to_name: str, to_path: str): + """ + handles copying directories + returns the model for the copied directory + """ + try: + os_from_path = self._get_os_path(from_path.strip("/")) + os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}' + shutil.copytree(os_from_path, os_to_path) + model = self.get(to_path, content=False) + except OSError as err: + self.log.error(f"OSError in _copy_dir: {err}") + raise web.HTTPError( + 400, + f"Can't copy '{from_path}' into Folder '{to_path}'", + ) from err + + return model + + def check_folder_size(self, path: str): + """ + limit the size of folders being copied to prevent a timeout error + """ + limit_mb = 100 + limit_str = f"{limit_mb}MB" + limit_bytes = limit_mb * 1024 * 1024 + size = int(self._get_dir_size(self._get_os_path(path))) + if size > limit_bytes: + raise web.HTTPError( + 400, + f""" + Can't copy folders larger than {limit_str}, + "{path}" is {self._human_readable_size(size)} + """, + ) + + def _get_dir_size(self, path: str = "."): + """ + calls the command line program du to get the directory size + """ + try: + result = subprocess.run( + ["du", "-s", "--block-size=1", path], capture_output=True + ).stdout.split() + self.log.info(f"current status of du command {result}") + size = result[0].decode("utf-8") + except Exception as err: + self.log.error(f"Error during directory copy: {err}") + raise web.HTTPError( + 400, + f""" + Unexpected error during copy operation, + not able to get the size of the {path} directory + """, + ) from err + return size + + def _human_readable_size(self, size: int): + """ + returns folder size in a human readable format + """ + if size == 0: + return "0 Bytes" + + units = ["Bytes", "KB", "MB", "GB", "TB", "PB"] + order = int(math.log2(size) / 10) if size else 0 + + return "{:.4g} {}".format(size / (1 << (order * 10)), units[order]) + class AsyncFileContentsManager(FileContentsManager, AsyncFileManagerMixin, AsyncContentsManager): """An async file contents manager.""" @@ -959,3 +1074,119 @@ async def get_kernel_path(self, path, model=None): else: parent_dir = "" return parent_dir + + async def copy(self, from_path: str, to_path=None) -> dict: + """ + Copy an existing file or directory and return its new model. + If to_path not specified, it will be the parent directory of from_path. + If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`. + Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`. + For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot. + from_path must be a full path to a file or directory. + """ + to_path_original = str(to_path) + path = from_path.strip("/") + if to_path is not None: + to_path = to_path.strip("/") + + if "/" in path: + from_dir, from_name = path.rsplit("/", 1) + else: + from_dir = "" + from_name = path + + model = await self.get(path) + # limit the size of folders being copied to prevent a timeout error + if model["type"] == "directory": + await self.check_folder_size(path) + else: + # let the super class handle copying files + return await AsyncContentsManager.copy(self, from_path=from_path, to_path=to_path) + + is_destination_specified = to_path is not None + to_name = copy_pat.sub(".", from_name) + if not is_destination_specified: + to_path = from_dir + if await self.dir_exists(to_path): + name = copy_pat.sub(".", from_name) + to_name = await super().increment_filename(name, to_path, insert="-Copy") + to_path = f"{to_path}/{to_name}" + + return await self._copy_dir( + from_path=from_path, + to_path_original=to_path_original, + to_name=to_name, + to_path=to_path, + ) + + async def _copy_dir( + self, from_path: str, to_path_original: str, to_name: str, to_path: str + ) -> dict: + """ + handles copying directories + returns the model for the copied directory + """ + try: + os_from_path = self._get_os_path(from_path.strip("/")) + os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}' + shutil.copytree(os_from_path, os_to_path) + model = await self.get(to_path, content=False) + except OSError as err: + self.log.error(f"OSError in _copy_dir: {err}") + raise web.HTTPError( + 400, + f"Can't copy '{from_path}' into read-only Folder '{to_path}'", + ) from err + + return model + + async def check_folder_size(self, path: str) -> None: + """ + limit the size of folders being copied to prevent a timeout error + + """ + limit_mb = 100 + limit_str = f"{limit_mb}MB" + limit_bytes = limit_mb * 1024 * 1024 + size = int(await self._get_dir_size(self._get_os_path(path))) + if size > limit_bytes: + raise web.HTTPError( + 400, + f""" + Can't copy folders larger than {limit_str}, + "{path}" is {await self._human_readable_size(size)} + """, + ) + + async def _get_dir_size(self, path: str = ".") -> str: + """ + calls the command line program du to get the directory size + """ + try: + result = subprocess.run( + ["du", "-s", "--block-size=1", path], capture_output=True + ).stdout.split() + self.log.info(f"current status of du command {result}") + size = result[0].decode("utf-8") + except Exception as err: + self.log.error(f"Error during directory copy: {err}") + raise web.HTTPError( + 400, + f""" + Unexpected error during copy operation, + not able to get the size of the {path} directory + """, + ) from err + return size + + async def _human_readable_size(self, size: int) -> str: + """ + returns folder size in a human readable format + """ + if size == 0: + return "0 Bytes" + + units = ["Bytes", "KB", "MB", "GB", "TB", "PB"] + order = int(math.log2(size) / 10) if size else 0 + + return "{:.4g} {}".format(size / (1 << (order * 10)), units[order]) diff --git a/tests/services/contents/test_api.py b/tests/services/contents/test_api.py index 3201e28d2f..d8d006386e 100644 --- a/tests/services/contents/test_api.py +++ b/tests/services/contents/test_api.py @@ -494,6 +494,27 @@ async def test_copy(jp_fetch, contents, contents_dir, _check_created): _check_created(r, str(contents_dir), path, copy3, type="notebook") +async def test_copy_dir(jp_fetch, contents, contents_dir, _check_created): + # created a nest copy of a the original folder + dest_dir = "foo" + path = "parent" + response = await jp_fetch( + "api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir}) + ) + + _check_created(response, str(contents_dir), path, dest_dir, type="directory") + + # copy to a folder where a similar name exists + dest_dir = "foo" + path = "parent" + copy_dir = f"{dest_dir}-Copy1" + response = await jp_fetch( + "api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir}) + ) + + _check_created(response, str(contents_dir), path, copy_dir, type="directory") + + async def test_copy_path(jp_fetch, contents, contents_dir, _check_created): path1 = "foo" path2 = "å b" @@ -577,18 +598,6 @@ async def test_copy_put_400_hidden( assert expected_http_error(e, 400) -async def test_copy_dir_400(jp_fetch, contents, contents_dir, _check_created): - with pytest.raises(tornado.httpclient.HTTPClientError) as e: - await jp_fetch( - "api", - "contents", - "foo", - method="POST", - body=json.dumps({"copy_from": "å b"}), - ) - assert expected_http_error(e, 400) - - @pytest.mark.skipif(sys.platform == "win32", reason="Disabled copying hidden files on Windows") async def test_copy_400_hidden( jp_fetch, diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py index 1864341f89..54b32b35c7 100644 --- a/tests/services/contents/test_manager.py +++ b/tests/services/contents/test_manager.py @@ -1,4 +1,5 @@ import os +import shutil import sys import time from itertools import combinations @@ -52,6 +53,52 @@ def _make_dir(jp_contents_manager, api_path): print("Directory already exists: %r" % os_path) +def _make_big_dir(contents_manager, api_path): + # make a directory that is over 100 MB in size + os_path = contents_manager._get_os_path(api_path) + try: + os.makedirs(os_path) + # textFile = open(f"{os_path}/demofile.txt", "a") + # textFile.write( + # """ + # Lorem ipsum dolor sit amet, consectetur adipiscing elit, + # sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. + # Ut enim ad minim veniam, quis nostrud exercitation ullamco + # laboris nisi ut aliquip ex ea commodo consequat. + # Duis aute irure dolor in reprehenderit in voluptate + # velit esse cillum dolore eu fugiat nulla pariatur. + # Excepteur sint occaecat cupidatat non proident, + # sunt in culpa qui officia deserunt mollit anim id est laborum. + # """ + # ) + # textFile.close() + + with open(f"{os_path}/demofile.txt", "a") as textFile: + textFile.write( + """ + Lorem ipsum dolor sit amet, consectetur adipiscing elit, + sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. + Ut enim ad minim veniam, quis nostrud exercitation ullamco + laboris nisi ut aliquip ex ea commodo consequat. + Duis aute irure dolor in reprehenderit in voluptate + velit esse cillum dolore eu fugiat nulla pariatur. + Excepteur sint occaecat cupidatat non proident, + sunt in culpa qui officia deserunt mollit anim id est laborum. + """ + ) + + for i in range(200): + os.makedirs(f"{os_path}/subfolder-{i}") + for j in range(200): + shutil.copy( + f"{os_path}/demofile.txt", + f"{os_path}/subfolder-{i}/testfile{j}.txt", + ) + + except OSError as err: + print("Directory already exists", err) + + def symlink(jp_contents_manager, src, dst): """Make a symlink to src from dst @@ -816,6 +863,53 @@ async def test_copy(jp_contents_manager): assert copy3["path"] == "copy 3.ipynb" +async def test_copy_dir(jp_contents_manager): + cm = jp_contents_manager + destDir = "Untitled Folder 1" + sourceDir = "Morningstar Notebooks" + nonExistantDir = "FolderDoesNotExist" + + _make_dir(cm, destDir) + _make_dir(cm, sourceDir) + + nestedDir = f"{destDir}/{sourceDir}" + + # copy one folder insider another folder + copy = await ensure_async(cm.copy(from_path=sourceDir, to_path=destDir)) + assert copy["path"] == nestedDir + + # need to test when copying in a directory where the another folder with the same name exists + _make_dir(cm, nestedDir) + copy = await ensure_async(cm.copy(from_path=sourceDir, to_path=destDir)) + assert copy["path"] == f"{nestedDir}-Copy1" + + # need to test for when copying in the same path as the sourceDir + copy = await ensure_async(cm.copy(from_path=sourceDir, to_path="")) + assert copy["path"] == f"{sourceDir}-Copy1" + + # ensure its still possible to copy a folder to another folder that doesn't exist + copy = await ensure_async( + cm.copy( + from_path=sourceDir, + to_path=nonExistantDir, + ) + ) + assert copy["path"] == f"{nonExistantDir}/{sourceDir}" + + +async def test_copy_big_dir(jp_contents_manager): + # this tests how the Content API limits prevents copying folders that more than 100MB in size + cm = jp_contents_manager + destDir = "Untitled Folder 1" + sourceDir = "Morningstar Notebooks" + _make_dir(cm, destDir) + _make_big_dir(contents_manager=cm, api_path=sourceDir) + with pytest.raises(HTTPError) as exc_info: + await ensure_async(cm.copy(from_path=sourceDir, to_path=destDir)) + + assert exc_info.type is HTTPError + + async def test_mark_trusted_cells(jp_contents_manager): cm = jp_contents_manager nb, name, path = await new_notebook(cm)