Skip to content

Commit

Permalink
Improve performance of URL.update_query (#1130)
Browse files Browse the repository at this point in the history
  • Loading branch information
bdraco authored Sep 8, 2024
1 parent ee90b00 commit 92dda6d
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 15 deletions.
1 change: 1 addition & 0 deletions CHANGES/1130.misc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improved performance of the :meth:`URL.update_query() <yarl.URL.update_query>` method -- by :user:`bdraco`.
10 changes: 10 additions & 0 deletions tests/test_update_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,16 @@ def test_update_query_multiple_keys():
assert str(u2) == "http://example.com/path?a=3&a=4"


def test_update_query_with_non_ascii():
url = URL("http://example.com/?foo=bar&baz=foo&%F0%9D%95%A6=%F0%9D%95%A6")
assert url.update_query({"𝕦": "𝕦"}) == url


def test_update_query_with_non_ascii_as_str():
url = URL("http://example.com/?foo=bar&baz=foo&%F0%9D%95%A6=%F0%9D%95%A6")
assert url.update_query("𝕦=𝕦") == url


# mod operator


Expand Down
39 changes: 24 additions & 15 deletions yarl/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
)

import idna
from multidict import MultiDict, MultiDictProxy
from multidict import MultiDict, MultiDictProxy, istr

from ._helpers import cached_property
from ._quoting import _Quoter, _Unquoter
Expand Down Expand Up @@ -88,6 +88,7 @@ class _InternalURLCache(TypedDict, total=False):
explicit_port: Union[int, None]
raw_path: str
path: str
_parsed_query: List[Tuple[str, str]]
query: "MultiDictProxy[str]"
raw_query_string: str
query_string: str
Expand Down Expand Up @@ -697,6 +698,11 @@ def path(self) -> str:
"""
return self._PATH_UNQUOTER(self.raw_path)

@cached_property
def _parsed_query(self) -> List[Tuple[str, str]]:
"""Parse query part of URL."""
return parse_qsl(self.raw_query_string, keep_blank_values=True)

@cached_property
def query(self) -> "MultiDictProxy[str]":
"""A MultiDictProxy representing parsed query parameters in decoded
Expand All @@ -705,8 +711,7 @@ def query(self) -> "MultiDictProxy[str]":
Empty value if URL has no query part.
"""
ret = MultiDict(parse_qsl(self.raw_query_string, keep_blank_values=True))
return MultiDictProxy(ret)
return MultiDictProxy(MultiDict(self._parsed_query))

@cached_property
def raw_query_string(self) -> str:
Expand Down Expand Up @@ -1176,6 +1181,15 @@ def _query_var(v: QueryVariable) -> str:
"of type {}".format(v, cls)
)

def _get_str_query_from_iterable(
self, items: Iterable[Tuple[Union[str, istr], str]]
) -> str:
"""Return a query string from an iterable."""
quoter = self._QUERY_PART_QUOTER
# A listcomp is used since listcomps are inlined on CPython 3.12+ and
# they are a bit faster than a generator expression.
return "&".join([f"{quoter(k)}={quoter(self._query_var(v))}" for k, v in items])

def _get_str_query(self, *args: Any, **kwargs: Any) -> Union[str, None]:
query: Union[str, Mapping[str, QueryVariable], None]
if kwargs:
Expand All @@ -1201,14 +1215,11 @@ def _get_str_query(self, *args: Any, **kwargs: Any) -> Union[str, None]:
"Invalid query type: bytes, bytearray and memoryview are forbidden"
)
elif isinstance(query, Sequence):
quoter = self._QUERY_PART_QUOTER
# We don't expect sequence values if we're given a list of pairs
# already; only mappings like builtin `dict` which can't have the
# same key pointing to multiple values are allowed to use
# `_query_seq_pairs`.
query = "&".join(
quoter(k) + "=" + quoter(self._query_var(v)) for k, v in query
)
return self._get_str_query_from_iterable(query)
else:
raise TypeError(
"Invalid query type: only str, mapping or "
Expand Down Expand Up @@ -1250,15 +1261,13 @@ def update_query(self, **kwargs: QueryVariable) -> "URL": ...
def update_query(self, *args: Any, **kwargs: Any) -> "URL":
"""Return a new URL with query part updated."""
s = self._get_str_query(*args, **kwargs)
query = None
if s is not None:
new_query = MultiDict(parse_qsl(s, keep_blank_values=True))
query = MultiDict(self.query)
query.update(new_query)
if s is None:
return URL(self._val._replace(query=""), encoded=True)

return URL(
self._val._replace(query=self._get_str_query(query) or ""), encoded=True
)
query = MultiDict(self._parsed_query)
query.update(parse_qsl(s, keep_blank_values=True))
new_str = self._get_str_query_from_iterable(query.items())
return URL(self._val._replace(query=new_str), encoded=True)

def without_query_params(self, *query_params: str) -> "URL":
"""Remove some keys from query part and return new URL."""
Expand Down

0 comments on commit 92dda6d

Please sign in to comment.