Skip to content

Commit

Permalink
Merge pull request #1454 from dandi/bf-download-directory
Browse files Browse the repository at this point in the history
Normalize path while requesting list of assets from the server
  • Loading branch information
yarikoptic authored Jun 14, 2024
2 parents 59f5da0 + 0aabf16 commit da90c49
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 22 deletions.
39 changes: 38 additions & 1 deletion dandi/dandiapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import json
import os.path
from pathlib import Path, PurePosixPath
import posixpath
import re
from time import sleep, time
from types import TracebackType
Expand Down Expand Up @@ -937,6 +938,29 @@ def from_data(cls, client: DandiAPIClient, data: dict[str, Any]) -> RemoteDandis
client=client, identifier=data["identifier"], version=version, data=data
)

@staticmethod
def _normalize_path(path: str) -> str:
"""
Helper to normalize path before passing it to the server.
We and API call it "path" but it is really a "prefix" with inherent
semantics of containing directory divider '/' and referring to a
directory when terminated with '/'.
"""
# Server (now) expects path to be a proper prefix, so to account for user
# possibly specifying ./ or some other relative paths etc, let's normalize
# the path.
# Ref: https://github.com/dandi/dandi-cli/issues/1452
path_normed = posixpath.normpath(path)
if path_normed == ".":
path_normed = ""
elif path.endswith("/"):
# we need to make sure that we have a trailing slash if we had it before
path_normed += "/"
if path_normed != path:
lgr.debug("Normalized path %r to %r", path, path_normed)
return path_normed

def json_dict(self) -> dict[str, Any]:
"""
Convert to a JSONable `dict`, omitting the ``client`` attribute and
Expand Down Expand Up @@ -1154,6 +1178,9 @@ def get_assets_with_path_prefix(
Returns an iterator of all assets in this version of the Dandiset whose
`~RemoteAsset.path` attributes start with ``path``
``path`` is normalized first to possibly remove leading ``./`` or relative
paths (e.g., ``../``) within it.
Assets can be sorted by a given field by passing the name of that field
as the ``order`` parameter. The accepted field names are
``"created"``, ``"modified"``, and ``"path"``. Prepend a hyphen to the
Expand All @@ -1162,7 +1189,7 @@ def get_assets_with_path_prefix(
try:
for a in self.client.paginate(
f"{self.version_api_path}assets/",
params={"path": path, "order": order},
params={"path": self._normalize_path(path), "order": order},
):
yield RemoteAsset.from_data(self, a)
except HTTP404Error:
Expand Down Expand Up @@ -1200,7 +1227,11 @@ def get_asset_by_path(self, path: str) -> RemoteAsset:
Fetch the asset in this version of the Dandiset whose
`~RemoteAsset.path` equals ``path``. If the given asset does not
exist, a `NotFoundError` is raised.
``path`` is normalized first to possibly remove leading ``./`` or relative
paths (e.g., ``../``) within it.
"""
path = self._normalize_path(path)
try:
# Weed out any assets that happen to have the given path as a
# proper prefix:
Expand All @@ -1221,9 +1252,15 @@ def download_directory(
"""
Download all assets under the virtual directory ``assets_dirpath`` to
the directory ``dirpath``. Downloads are synchronous.
``path`` is normalized first to possibly remove leading ``./`` or relative
paths (e.g., ``../``) within it.
"""
if assets_dirpath and not assets_dirpath.endswith("/"):
assets_dirpath += "/"
# need to normalize explicitly since we do use it below also
# to deduce portion of the path to strip off.
assets_dirpath = self._normalize_path(assets_dirpath)
assets = list(self.get_assets_with_path_prefix(assets_dirpath))
for a in assets:
filepath = Path(dirpath, a.path[len(assets_dirpath) :])
Expand Down
66 changes: 45 additions & 21 deletions dandi/tests/test_dandiapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import random
import re
from shutil import rmtree
from typing import Any

import anys
import click
Expand Down Expand Up @@ -46,7 +47,7 @@ def test_upload(
d.upload_raw_asset(simple1_nwb, {"path": "testing/simple1.nwb"})
(asset,) = d.get_assets()
assert asset.path == "testing/simple1.nwb"
d.download_directory("", tmp_path)
d.download_directory("./", tmp_path)
paths = list_paths(tmp_path)
assert paths == [tmp_path / "testing" / "simple1.nwb"]
assert paths[0].stat().st_size == simple1_nwb.stat().st_size
Expand Down Expand Up @@ -678,26 +679,49 @@ def test_get_assets_order(text_dandiset: SampleDandiset) -> None:


def test_get_assets_with_path_prefix(text_dandiset: SampleDandiset) -> None:
assert sorted(
asset.path
for asset in text_dandiset.dandiset.get_assets_with_path_prefix("subdir")
) == ["subdir1/apple.txt", "subdir2/banana.txt", "subdir2/coconut.txt"]
assert sorted(
asset.path
for asset in text_dandiset.dandiset.get_assets_with_path_prefix("subdir2")
) == ["subdir2/banana.txt", "subdir2/coconut.txt"]
assert [
asset.path
for asset in text_dandiset.dandiset.get_assets_with_path_prefix(
"subdir", order="path"
)
] == ["subdir1/apple.txt", "subdir2/banana.txt", "subdir2/coconut.txt"]
assert [
asset.path
for asset in text_dandiset.dandiset.get_assets_with_path_prefix(
"subdir", order="-path"
)
] == ["subdir2/coconut.txt", "subdir2/banana.txt", "subdir1/apple.txt"]
def _get_assets_with_path_prefix(prefix: str, **kw: Any) -> list[str]:
return [
asset.path
for asset in text_dandiset.dandiset.get_assets_with_path_prefix(
prefix, **kw
)
]

assert (
sorted(_get_assets_with_path_prefix("subdir"))
== sorted(_get_assets_with_path_prefix("./subdir"))
== sorted(_get_assets_with_path_prefix("./subdir2/../sub"))
== [
"subdir1/apple.txt",
"subdir2/banana.txt",
"subdir2/coconut.txt",
]
)
assert (
_get_assets_with_path_prefix("subdir/")
== _get_assets_with_path_prefix("./subdir/")
== _get_assets_with_path_prefix("../subdir1/")
== []
)
assert (
sorted(_get_assets_with_path_prefix("subdir2"))
== sorted(_get_assets_with_path_prefix("a/../subdir2"))
== sorted(_get_assets_with_path_prefix("./subdir2"))
== [
"subdir2/banana.txt",
"subdir2/coconut.txt",
]
)
assert _get_assets_with_path_prefix("subdir", order="path") == [
"subdir1/apple.txt",
"subdir2/banana.txt",
"subdir2/coconut.txt",
]
assert _get_assets_with_path_prefix("subdir", order="-path") == [
"subdir2/coconut.txt",
"subdir2/banana.txt",
"subdir1/apple.txt",
]


def test_get_assets_by_glob(text_dandiset: SampleDandiset) -> None:
Expand Down

0 comments on commit da90c49

Please sign in to comment.