Enable users to copy both files and directories

jupyter-server · Jan 30, 2023 · d6a8eba · d6a8eba
1 parent 39326fe
commit d6a8eba
Show file tree

Hide file tree

Showing 3 changed files with 347 additions and 13 deletions.
diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py
@@ -2,10 +2,12 @@
 # Copyright (c) Jupyter Development Team.
 # Distributed under the terms of the Modified BSD License.
 import errno
+import math
 import mimetypes
 import os
 import shutil
 import stat
+import subprocess
 import sys
 import warnings
 from datetime import datetime
@@ -25,7 +27,7 @@
 
 from .filecheckpoints import AsyncFileCheckpoints, FileCheckpoints
 from .fileio import AsyncFileManagerMixin, FileManagerMixin
-from .manager import AsyncContentsManager, ContentsManager
+from .manager import AsyncContentsManager, ContentsManager, copy_pat
 
 try:
     from os.path import samefile
@@ -602,6 +604,119 @@ def get_kernel_path(self, path, model=None):
             parent_dir = ""
         return parent_dir
 
+    def copy(self, from_path: str, to_path=None):
+        """
+        Copy an existing file or directory and return its new model.
+        If to_path not specified, it will be the parent directory of from_path.
+        If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
+        Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
+        For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
+        from_path must be a full path to a file or directory.
+        """
+        to_path_original = str(to_path)
+        path = from_path.strip("/")
+        if to_path is not None:
+            to_path = to_path.strip("/")
+
+        if "/" in path:
+            from_dir, from_name = path.rsplit("/", 1)
+        else:
+            from_dir = ""
+            from_name = path
+
+        model = self.get(path)
+        # limit the size of folders being copied to prevent a timeout error
+        if model["type"] == "directory":
+            self.check_folder_size(path)
+        else:
+            # let the super class handle copying files
+            return super().copy(from_path=from_path, to_path=to_path)
+
+        is_destination_specified = to_path is not None
+        to_name = copy_pat.sub(".", from_name)
+        if not is_destination_specified:
+            to_path = from_dir
+        if self.dir_exists(to_path):
+            name = copy_pat.sub(".", from_name)
+            to_name = super().increment_filename(name, to_path, insert="-Copy")
+        to_path = f"{to_path}/{to_name}"
+
+        return self._copy_dir(
+            from_path=from_path,
+            to_path_original=to_path_original,
+            to_name=to_name,
+            to_path=to_path,
+        )
+
+    def _copy_dir(self, from_path: str, to_path_original: str, to_name: str, to_path: str):
+        """
+        handles copying directories
+        returns the model for the copied directory
+        """
+        try:
+            os_from_path = self._get_os_path(from_path.strip("/"))
+            os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
+            shutil.copytree(os_from_path, os_to_path)
+            model = self.get(to_path, content=False)
+        except OSError as err:
+            self.log.error(f"OSError in _copy_dir: {err}")
+            raise web.HTTPError(
+                400,
+                f"Can't copy '{from_path}' into Folder '{to_path}'",
+            ) from err
+
+        return model
+
+    def check_folder_size(self, path: str):
+        """
+        limit the size of folders being copied to prevent a timeout error
+        """
+        limit_mb = 100
+        limit_str = f"{limit_mb}MB"
+        limit_bytes = limit_mb * 1024 * 1024
+        size = int(self._get_dir_size(self._get_os_path(path)))
+        if size > limit_bytes:
+            raise web.HTTPError(
+                400,
+                f"""
+                    Can't copy folders larger than {limit_str},
+                    "{path}" is {self._human_readable_size(size)}
+                """,
+            )
+
+    def _get_dir_size(self, path: str = "."):
+        """
+        calls the command line program du to get the directory size
+        """
+        try:
+            result = subprocess.run(
+                ["du", "-s", "--block-size=1", path], capture_output=True
+            ).stdout.split()
+            self.log.info(f"current status of du command {result}")
+            size = result[0].decode("utf-8")
+        except Exception as err:
+            self.log.error(f"Error during directory copy: {err}")
+            raise web.HTTPError(
+                400,
+                f"""
+                Unexpected error during copy operation,
+                not able to get the size of the {path} directory
+                """,
+            ) from err
+        return size
+
+    def _human_readable_size(self, size: int):
+        """
+        returns folder size in a human readable format
+        """
+        if size == 0:
+            return "0 Bytes"
+
+        units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
+        order = int(math.log2(size) / 10) if size else 0
+
+        return "{:.4g} {}".format(size / (1 << (order * 10)), units[order])
+
 
 class AsyncFileContentsManager(FileContentsManager, AsyncFileManagerMixin, AsyncContentsManager):
     """An async file contents manager."""
@@ -959,3 +1074,119 @@ async def get_kernel_path(self, path, model=None):
         else:
             parent_dir = ""
         return parent_dir
+
+    async def copy(self, from_path: str, to_path=None) -> dict:
+        """
+        Copy an existing file or directory and return its new model.
+        If to_path not specified, it will be the parent directory of from_path.
+        If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
+        Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
+        For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
+        from_path must be a full path to a file or directory.
+        """
+        to_path_original = str(to_path)
+        path = from_path.strip("/")
+        if to_path is not None:
+            to_path = to_path.strip("/")
+
+        if "/" in path:
+            from_dir, from_name = path.rsplit("/", 1)
+        else:
+            from_dir = ""
+            from_name = path
+
+        model = await self.get(path)
+        # limit the size of folders being copied to prevent a timeout error
+        if model["type"] == "directory":
+            await self.check_folder_size(path)
+        else:
+            # let the super class handle copying files
+            return await AsyncContentsManager.copy(self, from_path=from_path, to_path=to_path)
+
+        is_destination_specified = to_path is not None
+        to_name = copy_pat.sub(".", from_name)
+        if not is_destination_specified:
+            to_path = from_dir
+        if await self.dir_exists(to_path):
+            name = copy_pat.sub(".", from_name)
+            to_name = await super().increment_filename(name, to_path, insert="-Copy")
+        to_path = f"{to_path}/{to_name}"
+
+        return await self._copy_dir(
+            from_path=from_path,
+            to_path_original=to_path_original,
+            to_name=to_name,
+            to_path=to_path,
+        )
+
+    async def _copy_dir(
+        self, from_path: str, to_path_original: str, to_name: str, to_path: str
+    ) -> dict:
+        """
+        handles copying directories
+        returns the model for the copied directory
+        """
+        try:
+            os_from_path = self._get_os_path(from_path.strip("/"))
+            os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
+            shutil.copytree(os_from_path, os_to_path)
+            model = await self.get(to_path, content=False)
+        except OSError as err:
+            self.log.error(f"OSError in _copy_dir: {err}")
+            raise web.HTTPError(
+                400,
+                f"Can't copy '{from_path}' into read-only Folder '{to_path}'",
+            ) from err
+
+        return model
+
+    async def check_folder_size(self, path: str) -> None:
+        """
+        limit the size of folders being copied to prevent a timeout error
+
+        """
+        limit_mb = 100
+        limit_str = f"{limit_mb}MB"
+        limit_bytes = limit_mb * 1024 * 1024
+        size = int(await self._get_dir_size(self._get_os_path(path)))
+        if size > limit_bytes:
+            raise web.HTTPError(
+                400,
+                f"""
+                    Can't copy folders larger than {limit_str},
+                    "{path}" is {await self._human_readable_size(size)}
+                """,
+            )
+
+    async def _get_dir_size(self, path: str = ".") -> str:
+        """
+        calls the command line program du to get the directory size
+        """
+        try:
+            result = subprocess.run(
+                ["du", "-s", "--block-size=1", path], capture_output=True
+            ).stdout.split()
+            self.log.info(f"current status of du command {result}")
+            size = result[0].decode("utf-8")
+        except Exception as err:
+            self.log.error(f"Error during directory copy: {err}")
+            raise web.HTTPError(
+                400,
+                f"""
+                Unexpected error during copy operation,
+                not able to get the size of the {path} directory
+                """,
+            ) from err
+        return size
+
+    async def _human_readable_size(self, size: int) -> str:
+        """
+        returns folder size in a human readable format
+        """
+        if size == 0:
+            return "0 Bytes"
+
+        units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
+        order = int(math.log2(size) / 10) if size else 0
+
+        return "{:.4g} {}".format(size / (1 << (order * 10)), units[order])
diff --git a/tests/services/contents/test_api.py b/tests/services/contents/test_api.py
@@ -494,6 +494,27 @@ async def test_copy(jp_fetch, contents, contents_dir, _check_created):
     _check_created(r, str(contents_dir), path, copy3, type="notebook")
 
 
+async def test_copy_dir(jp_fetch, contents, contents_dir, _check_created):
+    # created a nest copy of a the original folder
+    dest_dir = "foo"
+    path = "parent"
+    response = await jp_fetch(
+        "api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir})
+    )
+
+    _check_created(response, str(contents_dir), path, dest_dir, type="directory")
+
+    # copy to a folder where a similar name exists
+    dest_dir = "foo"
+    path = "parent"
+    copy_dir = f"{dest_dir}-Copy1"
+    response = await jp_fetch(
+        "api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir})
+    )
+
+    _check_created(response, str(contents_dir), path, copy_dir, type="directory")
+
+
 async def test_copy_path(jp_fetch, contents, contents_dir, _check_created):
     path1 = "foo"
     path2 = "å b"
@@ -577,18 +598,6 @@ async def test_copy_put_400_hidden(
     assert expected_http_error(e, 400)
 
 
-async def test_copy_dir_400(jp_fetch, contents, contents_dir, _check_created):
-    with pytest.raises(tornado.httpclient.HTTPClientError) as e:
-        await jp_fetch(
-            "api",
-            "contents",
-            "foo",
-            method="POST",
-            body=json.dumps({"copy_from": "å b"}),
-        )
-    assert expected_http_error(e, 400)
-
-
 @pytest.mark.skipif(sys.platform == "win32", reason="Disabled copying hidden files on Windows")
 async def test_copy_400_hidden(
     jp_fetch,