From 404b542db329157521c597da98a7ccf7b343b4d1 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 12 Oct 2024 21:15:18 -0500 Subject: [PATCH] Avoid normalizing child paths when there are no dots in the path (#1248) --- CHANGES/1248.misc.rst | 1 + tests/test_url.py | 18 ++++++++++++++++++ yarl/_url.py | 4 +++- 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 CHANGES/1248.misc.rst diff --git a/CHANGES/1248.misc.rst b/CHANGES/1248.misc.rst new file mode 100644 index 000000000..135b1c940 --- /dev/null +++ b/CHANGES/1248.misc.rst @@ -0,0 +1 @@ +Improved performance of :py:meth:`~yarl.URL.joinpath` -- by :user:`bdraco`. diff --git a/tests/test_url.py b/tests/test_url.py index 3ab5dfea4..489c33105 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -955,6 +955,16 @@ def test_joinpath(base, to_join, expected): pytest.param("path", "a/", "path/a/", id="default_trailing-empty-segment"), pytest.param("path", "a//", "path/a//", id="default_trailing-empty-segments"), pytest.param("path", "a//b", "path/a//b", id="default_embedded-empty-segment"), + pytest.param( + "path/a/b/c/d/e", "a/../../../../../../c", "path/c", id="long-backtrack" + ), + pytest.param( + "path/a/b/c/d/e", + "a/../../../././../../../c", + "path/c", + id="long-backtrack-with-dots", + ), + pytest.param("path/a/../../d/e", "a/../c", "d/e/c", id="backtrack-in-both"), ], ) def test_joinpath_empty_segments(base, to_join, expected): @@ -965,6 +975,14 @@ def test_joinpath_empty_segments(base, to_join, expected): ) +def test_joinpath_backtrack_to_base(): + url = URL("http://example.com/../../c") + new_url = url.joinpath("../../..") + assert str(new_url) == "http://example.com" + assert new_url.path == "/" + assert new_url.raw_path == "/" + + def test_joinpath_single_empty_segments(): """joining standalone empty segments does not create empty segments""" a = URL("/1//2///3") diff --git a/yarl/_url.py b/yarl/_url.py index 24243f660..898118f80 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -949,6 +949,7 @@ def _make_child(self, paths: "Sequence[str]", encoded: bool = False) -> "URL": keep existing, but do not create new, empty segments """ parsed: List[str] = [] + needs_normalize: bool = False for idx, path in enumerate(reversed(paths)): # empty segment of last is not removed last = idx == 0 @@ -957,6 +958,7 @@ def _make_child(self, paths: "Sequence[str]", encoded: bool = False) -> "URL": f"Appending path {path!r} starting from slash is forbidden" ) path = path if encoded else self._PATH_QUOTER(path) + needs_normalize |= "." in path segments = list(reversed(path.split("/"))) # remove trailing empty segment for all but the last path segment_slice_start = int(not last and segments[0] == "") @@ -968,7 +970,7 @@ def _make_child(self, paths: "Sequence[str]", encoded: bool = False) -> "URL": parsed = [*old_path_segments[:old_path_cutoff], *parsed] if self.absolute: - parsed = _normalize_path_segments(parsed) + parsed = _normalize_path_segments(parsed) if needs_normalize else parsed if parsed and parsed[0] != "": # inject a leading slash when adding a path to an absolute URL # where there was none before