Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use yarl to clean up some code #1415

Merged
merged 3 commits into from
Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions dandi/dandiapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@
from time import sleep, time
from types import TracebackType
from typing import TYPE_CHECKING, Any, Dict, List, Optional
from urllib.parse import quote_plus, urlparse, urlunparse

import click
from dandischema import models
from pydantic import BaseModel, Field, PrivateAttr
import requests
import tenacity
from yarl import URL

from . import get_logger
from .consts import (
Expand Down Expand Up @@ -1521,7 +1521,7 @@ def get_content_url(
else:
raise # reraise since we need to figure out how to handle such a case
if strip_query:
url = urlunparse(urlparse(url)._replace(query=""))
url = str(URL(url).with_query(None))
return url

def get_download_file_iter(
Expand Down Expand Up @@ -1970,9 +1970,10 @@ def get_download_file_iter(
Returns a function that when called (optionally with an offset into the
file to start downloading at) returns a generator of chunks of the file
"""
prefix = quote_plus(str(self))
url = self.client.get_url(
f"/zarr/{self.zarr_id}/files?prefix={prefix}&download=true"
url = str(
URL(self.client.get_url(f"/zarr/{self.zarr_id}/files/")).with_query(
{"prefix": str(self), "download": "true"}
)
)

def downloader(start_at: int = 0) -> Iterator[bytes]:
Expand Down
8 changes: 6 additions & 2 deletions dandi/delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pathlib import Path

import click
from yarl import URL

from .consts import DRAFT, ZARR_EXTENSIONS, DandiInstance, dandiset_metadata_file
from .dandiapi import DandiAPIClient, RemoteAsset, RemoteDandiset
Expand Down Expand Up @@ -246,5 +247,8 @@ def find_local_asset(filepath: str) -> tuple[str, str]:


def is_same_url(url1: str, url2: str) -> bool:
# TODO: Use a real URL library like furl, purl, or yarl
return url1.rstrip("/") == url2.rstrip("/")
u1 = URL(url1)
u1 = u1.with_path(u1.path.rstrip("/"))
u2 = URL(url2)
u2 = u2.with_path(u2.path.rstrip("/"))
return u1 == u2
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so how is this better? will it reorder query params or smth to find equality? if so -- would be nice to add a testcase which would solidify adding such capability which was not present before.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It normalizes various URL bits, such as lowercasing the scheme & host and normalizing internationalized domain names.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests added: e58df62

32 changes: 18 additions & 14 deletions dandi/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@
import traceback
import types
from typing import IO, Any, List, Optional, Protocol, TypeVar, Union
from urllib.parse import parse_qs, urlparse, urlunparse

import dateutil.parser
from multidict import MultiDict # dependency of yarl
from pydantic import BaseModel, Field
import requests
import ruamel.yaml
from semantic_version import Version
from yarl import URL

from . import __version__, get_logger
from .consts import DandiInstance, known_instances, known_instances_rev
Expand Down Expand Up @@ -567,8 +568,9 @@
else:
instance = None
is_api = False
bits = urlparse(redirector_url)
redirector_url = urlunparse((bits[0], bits[1], "", "", "", ""))
redirector_url = str(
URL(redirector_url).with_path("").with_query(None).with_fragment(None)
)
else:
dandi_id = dandi_instance_id
instance = known_instances[dandi_id]
Expand Down Expand Up @@ -619,13 +621,13 @@
if dandi_id is None:
# Don't use pydantic.AnyHttpUrl, as that sets the `port` attribute even
# if it's not present in the string.
bits = urlparse(api_url)
if bits.hostname is not None:
dandi_id = bits.hostname
if bits.port is not None:
u = URL(api_url)
if u.host is not None:
dandi_id = u.host
if (port := u.explicit_port) is not None:
if ":" in dandi_id:
dandi_id = f"[{dandi_id}]"
dandi_id += f":{bits.port}"
dandi_id += f":{port}"

Check warning on line 630 in dandi/utils.py

View check run for this annotation

Codecov / codecov/patch

dandi/utils.py#L630

Added line #L630 was not covered by tests
else:
dandi_id = api_url
return DandiInstance(
Expand Down Expand Up @@ -790,12 +792,14 @@
Tests whether the URL ``page2`` is the same as ``page1`` but with the
``page`` query parameter set to ``2``
"""
bits1 = urlparse(page1)
params1 = parse_qs(bits1.query)
params1["page"] = ["2"]
bits2 = urlparse(page2)
params2 = parse_qs(bits2.query)
return (bits1[:3], params1, bits1.fragment) == (bits2[:3], params2, bits2.fragment)
url1 = URL(page1)
params1 = MultiDict(url1.query)
params1["page"] = "2"
url1 = url1.with_query(None)
url2 = URL(page2)
params2 = url2.query
url2 = url2.with_query(None)
return (url1, sorted(params1.items())) == (url2, sorted(params2.items()))


def exclude_from_zarr(path: Path) -> bool:
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ install_requires =
ruamel.yaml >=0.15, <1
semantic-version
tenacity
yarl ~= 1.9
zarr ~= 2.10
zarr_checksum ~= 0.4.0
zip_safe = False
Expand Down