aio-libs · bdraco · Oct 24, 2024 · Oct 24, 2024 · Oct 24, 2024 · Oct 24, 2024
diff --git a/tests/test_url.py b/tests/test_url.py
@@ -72,7 +72,11 @@ def test_str():
             "http://example.com/this/",
             "is/a/test",
         ),
-        ("http://example.com/this/is/../a//test", "http://example.com/this/", "a/test"),
+        (
+            "http://example.com/this/is/../a//test",
+            "http://example.com/this/",
+            "a//test",
+        ),
         ("http://example.com/path/to", "http://example.com/spam/", "../path/to"),
         ("http://example.com/path", "http://example.com/path/to/", ".."),
         ("http://example.com/path", "http://example.com/other/../path/to/", ".."),
@@ -98,7 +102,6 @@ def test_sub(target: str, base: str, expected: str):
     assert result_url == expected_url
 
 
-@pytest.mark.xfail(reason="Empty segments are not preserved")
 @pytest.mark.parametrize(
     ("target", "base", "expected"),
     [
@@ -110,7 +113,7 @@ def test_sub(target: str, base: str, expected: str):
         (
             "http://example.com////path/////to",
             "http://example.com/////spam",
-            "..//path/////to",
+            "../path/////to",
         ),
     ],
 )
@@ -139,9 +142,9 @@ def test_sub_with_different_anchors():
 
 
 def test_sub_with_two_dots_in_base():
-    expected_error_msg = "'..' segment in '/path/..' cannot be walked"
+    expected_error_msg = "'..' segment in 'path/..' cannot be walked"
     with pytest.raises(ValueError, match=expected_error_msg):
-        URL("path/to") - URL("/path/../from")
+        URL("path/to") - URL("path/../from")
 
 
 def test_repr():

diff --git a/yarl/_path.py b/yarl/_path.py
@@ -1,9 +1,9 @@
 """Utilities for working with paths."""
 
-from collections.abc import Sequence
+from collections.abc import Generator, Sequence
 from contextlib import suppress
 from itertools import chain
-from pathlib import PurePosixPath
+from typing import Union
 
 
 def normalize_path_segments(segments: Sequence[str]) -> list[str]:
@@ -43,29 +43,78 @@ def normalize_path(path: str) -> str:
     return prefix + "/".join(normalize_path_segments(segments))
 
 
+class URLPath:
+    """A class for working with URL paths."""
+
+    __slots__ = ("parts", "path")
+
+    def __init__(self, path: str, strip_tail: bool = False) -> None:
+        """Initialize a URLPath object."""
+        had_trailing_slash = path[-1] == "/"
+        # Strip trailing slash
+        if path and had_trailing_slash:
+            path = path[:-1]
+        if "." in path:
+            # Strip '.' segments
+            parts = [x for x in path.split("/") if x != "."]
+        else:
+            parts = path.split("/")
+        if strip_tail and not had_trailing_slash and parts:
+            parts.pop()
+        self.path = "/".join(parts) or "."
+        self.parts = parts
+
+    def parents(self) -> Generator["URLPath", None, None]:
+        """Return a list of parent paths for a given path."""
+        parts = self.parts
+        for i in range(len(parts) - 1, -1, -1):
+            parent_parts = parts[:i]
+            url_path = object.__new__(URLPath)
+            url_path.path = "/".join(parent_parts) or "."
+            url_path.parts = parent_parts
+            yield url_path
+
+
 def calculate_relative_path(target: str, base: str) -> str:
     """Return the relative path between two other paths.
 
     If the operation is not possible, raise ValueError.
     """
+    target_path = URLPath(target)
+    base_path = URLPath(base, strip_tail=True)
 
-    target = target or "/"
-    base = base or "/"
-
-    target_path = PurePosixPath(target)
-    base_path = PurePosixPath(base)
-
-    if base[-1] != "/":
-        base_path = base_path.parent
+    target_path_parts: Union[set[str], None] = None
+    target_path_path = target_path.path
 
-    for step, path in enumerate(chain((base_path,), base_path.parents)):
-        if path == target_path or path in target_path.parents:
-            break
-        elif path.name == "..":
-            raise ValueError(f"'..' segment in {str(base_path)!r} cannot be walked")
-    else:
+    if (target and target[0] == "/") != (base and base[0] == "/"):
         raise ValueError(
-            f"{str(target_path)!r} and {str(base_path)!r} have different anchors"
+            f"{target_path_path!r} and {base_path.path!r} have different anchors"
         )
-    offset = len(path.parts)
-    return str(PurePosixPath(*("..",) * step, *target_path.parts[offset:]))
+
+    for step, base_walk in enumerate(chain((base_path,), base_path.parents())):
+        if base_walk.path == target_path_path:
+            break
+        # If the target_path_parts is already built we can use a fast path
+        if target_path_parts is not None:
+            if base_walk.path in target_path_parts:
+                break
+            elif base_walk.parts[-1] == "..":
+                raise ValueError(f"'..' segment in {base_path.path!r} cannot be walked")
+            continue
+        target_path_parts = set()
+        # We check one at a time because enumerating parents
+        # builds the value on demand, and we want to stop
+        # as soon as we find the common parent
+        for target_parent in target_path.parents():
+            if target_parent.path == base_path.path:
+                break
+            target_path_parts.add(target_parent.path)
+        else:
+            # If we didn't break, it means we didn't find a common parent
+            if base_walk.parts[-1] == "..":
+                raise ValueError(f"'..' segment in {base_path.path!r} cannot be walked")
+            continue
+        break
+
+    offset = len(base_walk.parts)
+    return "/".join((*("..",) * step, *target_path.parts[offset:])) or "."
diff --git a/yarl/_url.py b/yarl/_url.py
@@ -502,6 +502,11 @@ def __sub__(self, other: object) -> "URL":
         if target_netloc != base_netloc:
             raise ValueError("Both URLs should have the same netloc")
 
+        if target_netloc and not target_path:
+            target_path = "/"
+        if base_netloc and not base_path:
+            base_path = "/"
+
         path = calculate_relative_path(target_path, base_path)
         return self._from_tup(("", "", path, "", ""))