From d6a8eba35e49b3a085bfa3e2c6221aa3d3ecada7 Mon Sep 17 00:00:00 2001
From: mmbugua <moses.mbugua@morningstar.com>
Date: Fri, 27 Jan 2023 09:59:27 -0800
Subject: [PATCH] Enable users to copy both files and directories

---
 .../services/contents/filemanager.py          | 233 +++++++++++++++++-
 tests/services/contents/test_api.py           |  33 ++-
 tests/services/contents/test_manager.py       |  94 +++++++
 3 files changed, 347 insertions(+), 13 deletions(-)

diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py
index de0117a558..8732a4cdb1 100644
--- a/jupyter_server/services/contents/filemanager.py
+++ b/jupyter_server/services/contents/filemanager.py
@@ -2,10 +2,12 @@
 # Copyright (c) Jupyter Development Team.
 # Distributed under the terms of the Modified BSD License.
 import errno
+import math
 import mimetypes
 import os
 import shutil
 import stat
+import subprocess
 import sys
 import warnings
 from datetime import datetime
@@ -25,7 +27,7 @@
 
 from .filecheckpoints import AsyncFileCheckpoints, FileCheckpoints
 from .fileio import AsyncFileManagerMixin, FileManagerMixin
-from .manager import AsyncContentsManager, ContentsManager
+from .manager import AsyncContentsManager, ContentsManager, copy_pat
 
 try:
     from os.path import samefile
@@ -602,6 +604,119 @@ def get_kernel_path(self, path, model=None):
             parent_dir = ""
         return parent_dir
 
+    def copy(self, from_path: str, to_path=None):
+        """
+        Copy an existing file or directory and return its new model.
+        If to_path not specified, it will be the parent directory of from_path.
+        If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
+        Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
+        For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
+        from_path must be a full path to a file or directory.
+        """
+        to_path_original = str(to_path)
+        path = from_path.strip("/")
+        if to_path is not None:
+            to_path = to_path.strip("/")
+
+        if "/" in path:
+            from_dir, from_name = path.rsplit("/", 1)
+        else:
+            from_dir = ""
+            from_name = path
+
+        model = self.get(path)
+        # limit the size of folders being copied to prevent a timeout error
+        if model["type"] == "directory":
+            self.check_folder_size(path)
+        else:
+            # let the super class handle copying files
+            return super().copy(from_path=from_path, to_path=to_path)
+
+        is_destination_specified = to_path is not None
+        to_name = copy_pat.sub(".", from_name)
+        if not is_destination_specified:
+            to_path = from_dir
+        if self.dir_exists(to_path):
+            name = copy_pat.sub(".", from_name)
+            to_name = super().increment_filename(name, to_path, insert="-Copy")
+        to_path = f"{to_path}/{to_name}"
+
+        return self._copy_dir(
+            from_path=from_path,
+            to_path_original=to_path_original,
+            to_name=to_name,
+            to_path=to_path,
+        )
+
+    def _copy_dir(self, from_path: str, to_path_original: str, to_name: str, to_path: str):
+        """
+        handles copying directories
+        returns the model for the copied directory
+        """
+        try:
+            os_from_path = self._get_os_path(from_path.strip("/"))
+            os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
+            shutil.copytree(os_from_path, os_to_path)
+            model = self.get(to_path, content=False)
+        except OSError as err:
+            self.log.error(f"OSError in _copy_dir: {err}")
+            raise web.HTTPError(
+                400,
+                f"Can't copy '{from_path}' into Folder '{to_path}'",
+            ) from err
+
+        return model
+
+    def check_folder_size(self, path: str):
+        """
+        limit the size of folders being copied to prevent a timeout error
+        """
+        limit_mb = 100
+        limit_str = f"{limit_mb}MB"
+        limit_bytes = limit_mb * 1024 * 1024
+        size = int(self._get_dir_size(self._get_os_path(path)))
+        if size > limit_bytes:
+            raise web.HTTPError(
+                400,
+                f"""
+                    Can't copy folders larger than {limit_str},
+                    "{path}" is {self._human_readable_size(size)}
+                """,
+            )
+
+    def _get_dir_size(self, path: str = "."):
+        """
+        calls the command line program du to get the directory size
+        """
+        try:
+            result = subprocess.run(
+                ["du", "-s", "--block-size=1", path], capture_output=True
+            ).stdout.split()
+            self.log.info(f"current status of du command {result}")
+            size = result[0].decode("utf-8")
+        except Exception as err:
+            self.log.error(f"Error during directory copy: {err}")
+            raise web.HTTPError(
+                400,
+                f"""
+                Unexpected error during copy operation,
+                not able to get the size of the {path} directory
+                """,
+            ) from err
+        return size
+
+    def _human_readable_size(self, size: int):
+        """
+        returns folder size in a human readable format
+        """
+        if size == 0:
+            return "0 Bytes"
+
+        units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
+        order = int(math.log2(size) / 10) if size else 0
+
+        return "{:.4g} {}".format(size / (1 << (order * 10)), units[order])
+
 
 class AsyncFileContentsManager(FileContentsManager, AsyncFileManagerMixin, AsyncContentsManager):
     """An async file contents manager."""
@@ -959,3 +1074,119 @@ async def get_kernel_path(self, path, model=None):
         else:
             parent_dir = ""
         return parent_dir
+
+    async def copy(self, from_path: str, to_path=None) -> dict:
+        """
+        Copy an existing file or directory and return its new model.
+        If to_path not specified, it will be the parent directory of from_path.
+        If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
+        Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
+        For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
+        from_path must be a full path to a file or directory.
+        """
+        to_path_original = str(to_path)
+        path = from_path.strip("/")
+        if to_path is not None:
+            to_path = to_path.strip("/")
+
+        if "/" in path:
+            from_dir, from_name = path.rsplit("/", 1)
+        else:
+            from_dir = ""
+            from_name = path
+
+        model = await self.get(path)
+        # limit the size of folders being copied to prevent a timeout error
+        if model["type"] == "directory":
+            await self.check_folder_size(path)
+        else:
+            # let the super class handle copying files
+            return await AsyncContentsManager.copy(self, from_path=from_path, to_path=to_path)
+
+        is_destination_specified = to_path is not None
+        to_name = copy_pat.sub(".", from_name)
+        if not is_destination_specified:
+            to_path = from_dir
+        if await self.dir_exists(to_path):
+            name = copy_pat.sub(".", from_name)
+            to_name = await super().increment_filename(name, to_path, insert="-Copy")
+        to_path = f"{to_path}/{to_name}"
+
+        return await self._copy_dir(
+            from_path=from_path,
+            to_path_original=to_path_original,
+            to_name=to_name,
+            to_path=to_path,
+        )
+
+    async def _copy_dir(
+        self, from_path: str, to_path_original: str, to_name: str, to_path: str
+    ) -> dict:
+        """
+        handles copying directories
+        returns the model for the copied directory
+        """
+        try:
+            os_from_path = self._get_os_path(from_path.strip("/"))
+            os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
+            shutil.copytree(os_from_path, os_to_path)
+            model = await self.get(to_path, content=False)
+        except OSError as err:
+            self.log.error(f"OSError in _copy_dir: {err}")
+            raise web.HTTPError(
+                400,
+                f"Can't copy '{from_path}' into read-only Folder '{to_path}'",
+            ) from err
+
+        return model
+
+    async def check_folder_size(self, path: str) -> None:
+        """
+        limit the size of folders being copied to prevent a timeout error
+
+        """
+        limit_mb = 100
+        limit_str = f"{limit_mb}MB"
+        limit_bytes = limit_mb * 1024 * 1024
+        size = int(await self._get_dir_size(self._get_os_path(path)))
+        if size > limit_bytes:
+            raise web.HTTPError(
+                400,
+                f"""
+                    Can't copy folders larger than {limit_str},
+                    "{path}" is {await self._human_readable_size(size)}
+                """,
+            )
+
+    async def _get_dir_size(self, path: str = ".") -> str:
+        """
+        calls the command line program du to get the directory size
+        """
+        try:
+            result = subprocess.run(
+                ["du", "-s", "--block-size=1", path], capture_output=True
+            ).stdout.split()
+            self.log.info(f"current status of du command {result}")
+            size = result[0].decode("utf-8")
+        except Exception as err:
+            self.log.error(f"Error during directory copy: {err}")
+            raise web.HTTPError(
+                400,
+                f"""
+                Unexpected error during copy operation,
+                not able to get the size of the {path} directory
+                """,
+            ) from err
+        return size
+
+    async def _human_readable_size(self, size: int) -> str:
+        """
+        returns folder size in a human readable format
+        """
+        if size == 0:
+            return "0 Bytes"
+
+        units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
+        order = int(math.log2(size) / 10) if size else 0
+
+        return "{:.4g} {}".format(size / (1 << (order * 10)), units[order])
diff --git a/tests/services/contents/test_api.py b/tests/services/contents/test_api.py
index 3201e28d2f..d8d006386e 100644
--- a/tests/services/contents/test_api.py
+++ b/tests/services/contents/test_api.py
@@ -494,6 +494,27 @@ async def test_copy(jp_fetch, contents, contents_dir, _check_created):
     _check_created(r, str(contents_dir), path, copy3, type="notebook")
 
 
+async def test_copy_dir(jp_fetch, contents, contents_dir, _check_created):
+    # created a nest copy of a the original folder
+    dest_dir = "foo"
+    path = "parent"
+    response = await jp_fetch(
+        "api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir})
+    )
+
+    _check_created(response, str(contents_dir), path, dest_dir, type="directory")
+
+    # copy to a folder where a similar name exists
+    dest_dir = "foo"
+    path = "parent"
+    copy_dir = f"{dest_dir}-Copy1"
+    response = await jp_fetch(
+        "api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir})
+    )
+
+    _check_created(response, str(contents_dir), path, copy_dir, type="directory")
+
+
 async def test_copy_path(jp_fetch, contents, contents_dir, _check_created):
     path1 = "foo"
     path2 = "å b"
@@ -577,18 +598,6 @@ async def test_copy_put_400_hidden(
     assert expected_http_error(e, 400)
 
 
-async def test_copy_dir_400(jp_fetch, contents, contents_dir, _check_created):
-    with pytest.raises(tornado.httpclient.HTTPClientError) as e:
-        await jp_fetch(
-            "api",
-            "contents",
-            "foo",
-            method="POST",
-            body=json.dumps({"copy_from": "å b"}),
-        )
-    assert expected_http_error(e, 400)
-
-
 @pytest.mark.skipif(sys.platform == "win32", reason="Disabled copying hidden files on Windows")
 async def test_copy_400_hidden(
     jp_fetch,
diff --git a/tests/services/contents/test_manager.py b/tests/services/contents/test_manager.py
index 1864341f89..54b32b35c7 100644
--- a/tests/services/contents/test_manager.py
+++ b/tests/services/contents/test_manager.py
@@ -1,4 +1,5 @@
 import os
+import shutil
 import sys
 import time
 from itertools import combinations
@@ -52,6 +53,52 @@ def _make_dir(jp_contents_manager, api_path):
         print("Directory already exists: %r" % os_path)
 
 
+def _make_big_dir(contents_manager, api_path):
+    # make a directory that is over 100 MB in size
+    os_path = contents_manager._get_os_path(api_path)
+    try:
+        os.makedirs(os_path)
+        # textFile = open(f"{os_path}/demofile.txt", "a")
+        # textFile.write(
+        #     """
+        # Lorem ipsum dolor sit amet, consectetur adipiscing elit,
+        # sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
+        # Ut enim ad minim veniam, quis nostrud exercitation ullamco
+        # laboris nisi ut aliquip ex ea commodo consequat.
+        #  Duis aute irure dolor in reprehenderit in voluptate
+        #  velit esse cillum dolore eu fugiat nulla pariatur.
+        #  Excepteur sint occaecat cupidatat non proident,
+        #  sunt in culpa qui officia deserunt mollit anim id est laborum.
+        # """
+        # )
+        # textFile.close()
+
+        with open(f"{os_path}/demofile.txt", "a") as textFile:
+            textFile.write(
+                """
+            Lorem ipsum dolor sit amet, consectetur adipiscing elit,
+            sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
+            Ut enim ad minim veniam, quis nostrud exercitation ullamco
+            laboris nisi ut aliquip ex ea commodo consequat.
+            Duis aute irure dolor in reprehenderit in voluptate
+            velit esse cillum dolore eu fugiat nulla pariatur.
+            Excepteur sint occaecat cupidatat non proident,
+            sunt in culpa qui officia deserunt mollit anim id est laborum.
+            """
+            )
+
+        for i in range(200):
+            os.makedirs(f"{os_path}/subfolder-{i}")
+            for j in range(200):
+                shutil.copy(
+                    f"{os_path}/demofile.txt",
+                    f"{os_path}/subfolder-{i}/testfile{j}.txt",
+                )
+
+    except OSError as err:
+        print("Directory already exists", err)
+
+
 def symlink(jp_contents_manager, src, dst):
     """Make a symlink to src from dst
 
@@ -816,6 +863,53 @@ async def test_copy(jp_contents_manager):
     assert copy3["path"] == "copy 3.ipynb"
 
 
+async def test_copy_dir(jp_contents_manager):
+    cm = jp_contents_manager
+    destDir = "Untitled Folder 1"
+    sourceDir = "Morningstar Notebooks"
+    nonExistantDir = "FolderDoesNotExist"
+
+    _make_dir(cm, destDir)
+    _make_dir(cm, sourceDir)
+
+    nestedDir = f"{destDir}/{sourceDir}"
+
+    # copy one folder insider another folder
+    copy = await ensure_async(cm.copy(from_path=sourceDir, to_path=destDir))
+    assert copy["path"] == nestedDir
+
+    # need to test when copying in a directory where the another folder with the same name exists
+    _make_dir(cm, nestedDir)
+    copy = await ensure_async(cm.copy(from_path=sourceDir, to_path=destDir))
+    assert copy["path"] == f"{nestedDir}-Copy1"
+
+    # need to test for when copying in the same path as the sourceDir
+    copy = await ensure_async(cm.copy(from_path=sourceDir, to_path=""))
+    assert copy["path"] == f"{sourceDir}-Copy1"
+
+    # ensure its still possible to copy a folder to another folder that doesn't exist
+    copy = await ensure_async(
+        cm.copy(
+            from_path=sourceDir,
+            to_path=nonExistantDir,
+        )
+    )
+    assert copy["path"] == f"{nonExistantDir}/{sourceDir}"
+
+
+async def test_copy_big_dir(jp_contents_manager):
+    # this tests how the Content API limits prevents copying folders that more than 100MB in size
+    cm = jp_contents_manager
+    destDir = "Untitled Folder 1"
+    sourceDir = "Morningstar Notebooks"
+    _make_dir(cm, destDir)
+    _make_big_dir(contents_manager=cm, api_path=sourceDir)
+    with pytest.raises(HTTPError) as exc_info:
+        await ensure_async(cm.copy(from_path=sourceDir, to_path=destDir))
+
+    assert exc_info.type is HTTPError
+
+
 async def test_mark_trusted_cells(jp_contents_manager):
     cm = jp_contents_manager
     nb, name, path = await new_notebook(cm)