Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Follow symlinks and exclude hidden directories #36

Merged
merged 10 commits into from
Jul 11, 2020
26 changes: 23 additions & 3 deletions jupyter_archive/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,21 @@ def get(self, archive_path, include_body=False):
if archive_format not in SUPPORTED_FORMAT:
self.log.error("Unsupported format {}.".format(archive_format))
raise web.HTTPError(404)
# Because urls can only pass strings, must check if string value is true
# or false. If it is not either value, then it is an invalid argument
# and raise http error 400.
if self.get_argument("followSymlinks", "true") == "true":
follow_symlinks = True
elif self.get_argument("followSymlinks", "true") == "false":
follow_symlinks = False
else:
raise web.HTTPError(400)
if self.get_argument("downloadHidden", "false") == "true":
download_hidden = True
elif self.get_argument("downloadHidden", "false") == "false":
download_hidden = False
else:
raise web.HTTPError(400)

archive_path = os.path.join(cm.root_dir, url2path(archive_path))

Expand All @@ -113,7 +128,7 @@ def get(self, archive_path, include_body=False):
self.flush_cb = ioloop.PeriodicCallback(self.flush, ARCHIVE_DOWNLOAD_FLUSH_DELAY)
self.flush_cb.start()

args = (archive_path, archive_format, archive_token)
args = (archive_path, archive_format, archive_token, follow_symlinks, download_hidden)
yield ioloop.IOLoop.current().run_in_executor(None, self.archive_and_download, *args)

if self.canceled:
Expand All @@ -126,11 +141,16 @@ def get(self, archive_path, include_body=False):
self.flush_cb.stop()
self.finish()

def archive_and_download(self, archive_path, archive_format, archive_token):
def archive_and_download(self, archive_path, archive_format, archive_token, follow_symlinks, download_hidden):

with make_writer(self, archive_format) as archive:
prefix = len(str(archive_path.parent)) + len(os.path.sep)
for root, _, files in os.walk(archive_path):
for root, dirs, files in os.walk(archive_path, followlinks=follow_symlinks):
# This ensures that if download_hidden is false, then the
# hidden files are skipped when walking the directory.
if not download_hidden:
files = [f for f in files if not f[0] == '.']
dirs[:] = [d for d in dirs if not d[0] == '.']
for file_ in files:
file_name = os.path.join(root, file_)
if not self.canceled:
Expand Down
68 changes: 52 additions & 16 deletions jupyter_archive/tests/test_archive_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,48 @@ def test_download(self):
f.write("hello2")
with open(pjoin(archive_dir_path, "test3.md"), "w") as f:
f.write("hello3")
with open(pjoin(archive_dir_path, ".test4.md"), "w") as f:
f.write("hello4")
os.makedirs(pjoin(archive_dir_path, ".test-hidden-folder"))
with open(pjoin(archive_dir_path, ".test-hidden-folder", "test5.md"), "w") as f:
f.write("hello5")
symlink_dir_path = pjoin(nbdir, "symlink-archive-dir")
os.makedirs(symlink_dir_path)
with open(pjoin(symlink_dir_path, "test6.md"), "w") as f:
f.write("hello6")
os.symlink(symlink_dir_path, pjoin(archive_dir_path, "symlink-test-dir"))

# Try to download the created folder.
archive_relative_path = os.path.basename(archive_dir_path)
url_template = "directories/{}?archiveToken=564646&archiveFormat={}"
url_template = "directories/{}?archiveToken=564646&archiveFormat={}&followSymlinks={}&downloadHidden={}"

file_lists = {
"download-archive-dir/test2.txt",
"download-archive-dir/test1.txt",
"download-archive-dir/test3.md",
"falsefalse": {
"download-archive-dir/test2.txt",
"download-archive-dir/test1.txt",
"download-archive-dir/test3.md",
},
"falsetrue": {
"download-archive-dir/test2.txt",
"download-archive-dir/test1.txt",
"download-archive-dir/test3.md",
"download-archive-dir/.test4.md",
"download-archive-dir/.test-hidden-folder/test5.md",
},
"truefalse": {
"download-archive-dir/test2.txt",
"download-archive-dir/test1.txt",
"download-archive-dir/test3.md",
"download-archive-dir/symlink-test-dir/test6.md"
},
"truetrue": {
"download-archive-dir/test2.txt",
"download-archive-dir/test1.txt",
"download-archive-dir/test3.md",
"download-archive-dir/.test4.md",
"download-archive-dir/.test-hidden-folder/test5.md",
"download-archive-dir/symlink-test-dir/test6.md"
}
}

format_mode = {
Expand All @@ -52,18 +85,21 @@ def test_download(self):
"tar.xz": "r|xz",
}

for format, mode in format_mode.items():
url = url_template.format(archive_relative_path, format)
r = self.request("GET", url)
assert r.status_code == 200
assert r.headers["content-type"] == "application/octet-stream"
assert r.headers["cache-control"] == "no-cache"
if format == "zip":
with zipfile.ZipFile(io.BytesIO(r.content), mode=mode) as zf:
assert set(zf.namelist()) == file_lists
else:
with tarfile.open(fileobj=io.BytesIO(r.content), mode=mode) as tf:
assert set(map(lambda m: m.name, tf.getmembers())) == file_lists
for followSymlinks in ["true", "false"]:
for download_hidden in ["true", "false"]:
file_list = file_lists[followSymlinks + download_hidden]
for format, mode in format_mode.items():
url = url_template.format(archive_relative_path, format, followSymlinks, download_hidden)
r = self.request("GET", url)
assert r.status_code == 200
assert r.headers["content-type"] == "application/octet-stream"
assert r.headers["cache-control"] == "no-cache"
if format == "zip":
with zipfile.ZipFile(io.BytesIO(r.content), mode=mode) as zf:
assert set(zf.namelist()) == file_list
else:
with tarfile.open(fileobj=io.BytesIO(r.content), mode=mode) as tf:
assert set(map(lambda m: m.name, tf.getmembers())) == file_list

def test_extract(self):

Expand Down
14 changes: 14 additions & 0 deletions schema/archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,20 @@
"title": "Archive format",
"description": "Archive format for compressing folder; one of [null (submenu), 'zip', 'tgz', 'tar.gz', 'tbz', 'tbz2', 'tar.bz', 'tar.bz2', 'txz', 'tar.xz']",
"default": "zip"
},
"followSymlinks": {
"type": ["string"],
"enum": ["true", "false"],
"title": "Follow Symlinks",
"description": "Whether or not to resolve symlinks and add resulting files to the archive; one of ['true', 'false']",
"default": "true"
},
"downloadHidden": {
"type": ["string"],
"enum": ["true", "false"],
"title": "Download Hidden Files",
"description": "Whether or not to add hidden files to the archive when downloading; one of ['true', 'false']",
"default": "false"
}
},
"additionalProperties": false,
Expand Down
20 changes: 17 additions & 3 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ namespace CommandIDs {

function downloadArchiveRequest(
path: string,
archiveFormat: ArchiveFormat
archiveFormat: ArchiveFormat,
followSymlinks: string,
downloadHidden: string
): Promise<void> {
const settings = ServerConnection.makeSettings();

Expand All @@ -53,6 +55,8 @@ function downloadArchiveRequest(

fullurl.searchParams.append("archiveToken", token(20));
fullurl.searchParams.append("archiveFormat", archiveFormat);
fullurl.searchParams.append("followSymlinks", followSymlinks);
fullurl.searchParams.append("downloadHidden", downloadHidden);

const xsrfTokenMatch = document.cookie.match("\\b_xsrf=([^;]*)\\b");
if (xsrfTokenMatch) {
Expand Down Expand Up @@ -133,6 +137,8 @@ const extension: JupyterFrontEndPlugin<void> = {
".tar.xz"
];
let archiveFormat: ArchiveFormat; // Default value read from settings
let followSymlinks: string; // Default value read from settings
let downloadHidden: string; // Default value read from settings

// matches anywhere on filebrowser
const selectorContent = ".jp-DirListing-content";
Expand Down Expand Up @@ -222,10 +228,14 @@ const extension: JupyterFrontEndPlugin<void> = {
settings.changed.connect(settings => {
const newFormat = settings.get("format").composite as ArchiveFormat;
updateFormat(newFormat, archiveFormat);
followSymlinks = settings.get("followSymlinks").composite as string;
downloadHidden = settings.get("downloadHidden").composite as string;
});

const newFormat = settings.get("format").composite as ArchiveFormat;
updateFormat(newFormat, archiveFormat);
followSymlinks = settings.get("followSymlinks").composite as string;
downloadHidden = settings.get("downloadHidden").composite as string;
})
.catch(reason => {
console.error(reason);
Expand All @@ -247,7 +257,9 @@ const extension: JupyterFrontEndPlugin<void> = {
item.path,
allowedArchiveExtensions.indexOf("." + format) >= 0
? format
: archiveFormat
: archiveFormat,
followSymlinks,
downloadHidden
);
}
});
Expand Down Expand Up @@ -309,7 +321,9 @@ const extension: JupyterFrontEndPlugin<void> = {
widget.model.path,
allowedArchiveExtensions.indexOf("." + format) >= 0
? format
: archiveFormat
: archiveFormat,
followSymlinks,
downloadHidden
);
}
},
Expand Down