dandi · yarikoptic · Mar 6, 2024 · Mar 1, 2024 · Mar 1, 2024 · Mar 5, 2024
diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py
@@ -14,13 +14,13 @@
 from time import sleep, time
 from types import TracebackType
 from typing import TYPE_CHECKING, Any, Dict, List, Optional
-from urllib.parse import quote_plus, urlparse, urlunparse
 
 import click
 from dandischema import models
 from pydantic import BaseModel, Field, PrivateAttr
 import requests
 import tenacity
+from yarl import URL
 
 from . import get_logger
 from .consts import (
@@ -1521,7 +1521,7 @@ def get_content_url(
             else:
                 raise  # reraise since we need to figure out how to handle such a case
         if strip_query:
-            url = urlunparse(urlparse(url)._replace(query=""))
+            url = str(URL(url).with_query(None))
         return url
 
     def get_download_file_iter(
@@ -1970,9 +1970,10 @@ def get_download_file_iter(
         Returns a function that when called (optionally with an offset into the
         file to start downloading at) returns a generator of chunks of the file
         """
-        prefix = quote_plus(str(self))
-        url = self.client.get_url(
-            f"/zarr/{self.zarr_id}/files?prefix={prefix}&download=true"
+        url = str(
+            URL(self.client.get_url(f"/zarr/{self.zarr_id}/files/")).with_query(
+                {"prefix": str(self), "download": "true"}
+            )
         )
 
         def downloader(start_at: int = 0) -> Iterator[bytes]:

diff --git a/dandi/delete.py b/dandi/delete.py
@@ -6,6 +6,7 @@
 from pathlib import Path
 
 import click
+from yarl import URL
 
 from .consts import DRAFT, ZARR_EXTENSIONS, DandiInstance, dandiset_metadata_file
 from .dandiapi import DandiAPIClient, RemoteAsset, RemoteDandiset
@@ -246,5 +247,8 @@ def find_local_asset(filepath: str) -> tuple[str, str]:
 
 
 def is_same_url(url1: str, url2: str) -> bool:
-    # TODO: Use a real URL library like furl, purl, or yarl
-    return url1.rstrip("/") == url2.rstrip("/")
+    u1 = URL(url1)
+    u1 = u1.with_path(u1.path.rstrip("/"))
+    u2 = URL(url2)
+    u2 = u2.with_path(u2.path.rstrip("/"))
+    return u1 == u2
diff --git a/dandi/utils.py b/dandi/utils.py
@@ -23,13 +23,14 @@
 import traceback
 import types
 from typing import IO, Any, List, Optional, Protocol, TypeVar, Union
-from urllib.parse import parse_qs, urlparse, urlunparse
 
 import dateutil.parser
+from multidict import MultiDict  # dependency of yarl
 from pydantic import BaseModel, Field
 import requests
 import ruamel.yaml
 from semantic_version import Version
+from yarl import URL
 
 from . import __version__, get_logger
 from .consts import DandiInstance, known_instances, known_instances_rev
@@ -567,8 +568,9 @@
         else:
             instance = None
             is_api = False
-            bits = urlparse(redirector_url)
-            redirector_url = urlunparse((bits[0], bits[1], "", "", "", ""))
+            redirector_url = str(
+                URL(redirector_url).with_path("").with_query(None).with_fragment(None)
+            )
     else:
         dandi_id = dandi_instance_id
         instance = known_instances[dandi_id]
@@ -619,13 +621,13 @@
     if dandi_id is None:
         # Don't use pydantic.AnyHttpUrl, as that sets the `port` attribute even
         # if it's not present in the string.
-        bits = urlparse(api_url)
-        if bits.hostname is not None:
-            dandi_id = bits.hostname
-            if bits.port is not None:
+        u = URL(api_url)
+        if u.host is not None:
+            dandi_id = u.host
+            if (port := u.explicit_port) is not None:
                 if ":" in dandi_id:
                     dandi_id = f"[{dandi_id}]"
-                dandi_id += f":{bits.port}"
+                dandi_id += f":{port}"
         else:
             dandi_id = api_url
     return DandiInstance(
@@ -790,12 +792,14 @@
     Tests whether the URL ``page2`` is the same as ``page1`` but with the
     ``page`` query parameter set to ``2``
     """
-    bits1 = urlparse(page1)
-    params1 = parse_qs(bits1.query)
-    params1["page"] = ["2"]
-    bits2 = urlparse(page2)
-    params2 = parse_qs(bits2.query)
-    return (bits1[:3], params1, bits1.fragment) == (bits2[:3], params2, bits2.fragment)
+    url1 = URL(page1)
+    params1 = MultiDict(url1.query)
+    params1["page"] = "2"
+    url1 = url1.with_query(None)
+    url2 = URL(page2)
+    params2 = url2.query
+    url2 = url2.with_query(None)
+    return (url1, sorted(params1.items())) == (url2, sorted(params2.items()))
 
 
 def exclude_from_zarr(path: Path) -> bool:

diff --git a/setup.cfg b/setup.cfg
@@ -55,6 +55,7 @@ install_requires =
     ruamel.yaml >=0.15, <1
     semantic-version
     tenacity
+    yarl ~= 1.9
     zarr ~= 2.10
     zarr_checksum ~= 0.4.0
 zip_safe = False