Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce time complexity of URL subtraction #1388

Closed
wants to merge 37 commits into from
Closed
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
7449454
Try to make URL subtraction a bit lazier
bdraco Oct 24, 2024
c1855d0
lazier
bdraco Oct 24, 2024
4c0628f
reduce
bdraco Oct 24, 2024
e60bc57
reduce
bdraco Oct 24, 2024
2d5655b
working
bdraco Oct 25, 2024
295f405
abstract it
bdraco Oct 25, 2024
31630e0
abstract it
bdraco Oct 25, 2024
d24ffe3
wip
bdraco Oct 25, 2024
62e8a49
reduce some more
bdraco Oct 25, 2024
9cae1ef
reduce
bdraco Oct 25, 2024
dcac142
naming
bdraco Oct 25, 2024
6a952a0
naming
bdraco Oct 25, 2024
1fa70d5
naming
bdraco Oct 25, 2024
89dacef
fix
bdraco Oct 25, 2024
bccef2d
fix
bdraco Oct 25, 2024
28dc640
fix
bdraco Oct 25, 2024
bf405f5
fix
bdraco Oct 25, 2024
1a00a76
reduce
bdraco Oct 25, 2024
98f41e5
Update tests/test_url.py
bdraco Oct 25, 2024
75e3650
naming
bdraco Oct 25, 2024
8a4ea89
naming
bdraco Oct 25, 2024
dc5a187
Merge remote-tracking branch 'origin/url_lazy_no_pathlib' into url_la…
bdraco Oct 25, 2024
1cc8805
naming
bdraco Oct 25, 2024
3964744
Add xfail tests for URL subtraction with empty segments
bdraco Oct 25, 2024
03b23a0
Merge branch 'empty_segments_sub' into url_lazy_no_pathlib
bdraco Oct 25, 2024
060b10c
Merge branch 'master' into url_lazy_no_pathlib
bdraco Oct 25, 2024
6d61d54
Merge remote-tracking branch 'origin/url_lazy_no_pathlib' into url_la…
bdraco Oct 25, 2024
e4ff365
tweak
bdraco Oct 25, 2024
2595811
fix
bdraco Oct 25, 2024
9ea6b9c
fixes
bdraco Oct 25, 2024
2d3d173
reduce
bdraco Oct 25, 2024
53b19a6
cleanup
bdraco Oct 25, 2024
4ed7ff2
naming
bdraco Oct 25, 2024
5674c92
Update yarl/_path.py
bdraco Oct 25, 2024
317e384
remove
bdraco Oct 25, 2024
bf918db
adding / should only happen if netloc
bdraco Oct 25, 2024
3885557
adding / should only happen if netloc
bdraco Oct 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,9 @@ def test_str():
],
)
def test_sub(target: str, base: str, expected: str):
assert URL(target) - URL(base) == URL(expected)
expected_result = URL(expected)
result = URL(target) - URL(base)
assert result == expected_result
bdraco marked this conversation as resolved.
Show resolved Hide resolved


def test_sub_with_different_schemes():
Expand Down
95 changes: 82 additions & 13 deletions yarl/_path.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
"""Utilities for working with paths."""

from collections.abc import Sequence
from collections.abc import Generator, Sequence
from contextlib import suppress
from itertools import chain
from pathlib import PurePosixPath
from typing import Union


def normalize_path_segments(segments: Sequence[str]) -> list[str]:
Expand Down Expand Up @@ -43,6 +44,50 @@ def normalize_path(path: str) -> str:
return prefix + "/".join(normalize_path_segments(segments))


class SimplePath:
bdraco marked this conversation as resolved.
Show resolved Hide resolved
__slots__ = ("_tail", "_root", "_trailer", "normalized")

def __init__(self, path: str, strip_root: bool = False) -> None:
"""Initialize a SimplePath object."""
self._tail = [x for x in path.split("/") if x and x != "."]

if strip_root:
bdraco marked this conversation as resolved.
Show resolved Hide resolved
if path[-1] != "/" and len(self._tail) > 0:
self._tail.pop()

self._root = "/" if path[0] == "/" else ""
self._trailer = "." if not path else ""
bdraco marked this conversation as resolved.
Show resolved Hide resolved
self.normalized = self._root + "/".join(self._tail) or self._trailer

@property
def name(self) -> str:
"""Return the last part of the path."""
return (self._tail[-1] if self._tail else "") or self._trailer

@property
def parts_count(self) -> int:
"""Return the number of parts in the path."""
return len(self._tail) + bool(self._root)

@property
def parts(self):
"""An object providing sequence-like access to the
components in the filesystem path."""
if self._root:
return (self._root,) + tuple(self._tail)
return tuple(self._tail)

def parents(self) -> Generator["SimplePath", None, None]:
"""Return a list of parent paths for a given path."""
for i in range(len(self._tail) - 1, -1, -1):
parent = object.__new__(SimplePath)
parent._tail = self._tail[:i]
parent._root = self._root
parent._trailer = self._trailer
parent.normalized = self._root + ("/".join(parent._tail) or self._trailer)
yield parent


def calculate_relative_path(target: str, base: str) -> str:
"""Return the relative path between two other paths.

Expand All @@ -52,20 +97,44 @@ def calculate_relative_path(target: str, base: str) -> str:
target = target or "/"
base = base or "/"

target_path = PurePosixPath(target)
base_path = PurePosixPath(base)
target_path = SimplePath(target)
base_path = SimplePath(base, strip_root=True)

if base[-1] != "/":
base_path = base_path.parent

for step, path in enumerate(chain((base_path,), base_path.parents)):
if path == target_path or path in target_path.parents:
target_path_parent_strs: Union[set[str], None] = None
for step, path in enumerate(chain((base_path,), base_path.parents())):
if path.normalized == target_path.normalized:
break
elif path.name == "..":
raise ValueError(f"'..' segment in {str(base_path)!r} cannot be walked")
# If the target_path_parent_strs is already built use the quick path
bdraco marked this conversation as resolved.
Show resolved Hide resolved
if target_path_parent_strs is not None:
if path.normalized in target_path_parent_strs:
break
elif path.name == "..":
raise ValueError(
f"'..' segment in {base_path.normalized!r} cannot be walked"
)
continue
target_path_parent_strs = set()
# We check one at a time because enumerating parents
# builds the value on demand, and we want to stop
# as soon as we find the common parent
for parent in target_path.parents():
if parent.normalized == base_path.normalized:
break
target_path_parent_strs.add(parent.normalized)
else:
# If we didn't break, it means we didn't find a common parent
if path.name == "..":
raise ValueError(
f"'..' segment in {base_path.normalized!r} cannot be walked"
)
continue
break
else:
raise ValueError(
f"{str(target_path)!r} and {str(base_path)!r} have different anchors"
msg = (
f"{target_path.normalized!r} and {base_path.normalized!r} "
"have different anchors"
)
offset = len(path.parts)
raise ValueError(msg)

offset = path.parts_count
return str(PurePosixPath(*("..",) * step, *target_path.parts[offset:]))
bdraco marked this conversation as resolved.
Show resolved Hide resolved