From 97debc933a92467bdf0c902e30eacbb4e4962a50 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Mon, 23 May 2022 11:49:12 +0100 Subject: [PATCH 01/18] Drop RawURL --- httpx/_models.py | 3 +-- httpx/_types.py | 2 -- httpx/_urls.py | 34 +++------------------------------- tests/client/test_proxies.py | 4 ++-- tests/models/test_url.py | 5 ++--- 5 files changed, 8 insertions(+), 40 deletions(-) diff --git a/httpx/_models.py b/httpx/_models.py index 5a213c3564..37aab9bf36 100644 --- a/httpx/_models.py +++ b/httpx/_models.py @@ -35,7 +35,6 @@ CookieTypes, HeaderTypes, QueryParamTypes, - RawURL, RequestContent, RequestData, RequestFiles, @@ -304,7 +303,7 @@ class Request: def __init__( self, method: typing.Union[str, bytes], - url: typing.Union["URL", str, RawURL], + url: typing.Union["URL", str], *, params: QueryParamTypes = None, headers: HeaderTypes = None, diff --git a/httpx/_types.py b/httpx/_types.py index be2744dcf2..c5eae796a0 100644 --- a/httpx/_types.py +++ b/httpx/_types.py @@ -30,8 +30,6 @@ PrimitiveData = Optional[Union[str, int, float, bool]] -RawURL = Tuple[bytes, bytes, Optional[int], bytes] - URLTypes = Union["URL", str] QueryParamTypes = Union[ diff --git a/httpx/_urls.py b/httpx/_urls.py index f6788e5568..ea23c6f7e3 100644 --- a/httpx/_urls.py +++ b/httpx/_urls.py @@ -6,7 +6,7 @@ import rfc3986.exceptions from ._exceptions import InvalidURL -from ._types import PrimitiveData, QueryParamTypes, RawURL, URLTypes +from ._types import PrimitiveData, QueryParamTypes, URLTypes from ._utils import primitive_value_to_str @@ -71,22 +71,9 @@ class URL: """ def __init__( - self, url: typing.Union["URL", str, RawURL] = "", **kwargs: typing.Any + self, url: typing.Union["URL", str] = "", **kwargs: typing.Any ) -> None: - if isinstance(url, (str, tuple)): - if isinstance(url, tuple): - raw_scheme, raw_host, port, raw_path = url - scheme = raw_scheme.decode("ascii") - host = raw_host.decode("ascii") - if host and ":" in host and host[0] != "[": - # it's an IPv6 address, so it should be enclosed in "[" and "]" - # ref: https://tools.ietf.org/html/rfc2732#section-2 - # ref: https://tools.ietf.org/html/rfc3986#section-3.2.2 - host = f"[{host}]" - port_str = "" if port is None else f":{port}" - path = raw_path.decode("ascii") - url = f"{scheme}://{host}{port_str}{path}" - + if isinstance(url, str): try: self._uri_reference = rfc3986.iri_reference(url).encode() except rfc3986.exceptions.InvalidAuthority as exc: @@ -322,21 +309,6 @@ def fragment(self) -> str: """ return unquote(self._uri_reference.fragment or "") - @property - def raw(self) -> RawURL: - """ - The URL in the raw representation used by the low level - transport API. See `BaseTransport.handle_request`. - - Provides the (scheme, host, port, target) for the outgoing request. - """ - return ( - self.raw_scheme, - self.raw_host, - self.port, - self.raw_path, - ) - @property def is_absolute_url(self) -> bool: """ diff --git a/tests/client/test_proxies.py b/tests/client/test_proxies.py index 2e88f644bb..c44cb54aa0 100644 --- a/tests/client/test_proxies.py +++ b/tests/client/test_proxies.py @@ -10,8 +10,8 @@ def url_to_origin(url: str): Given a URL string, return the origin in the raw tuple format that `httpcore` uses for it's representation. """ - scheme, host, port = httpx.URL(url).raw[:3] - return httpcore.URL(scheme=scheme, host=host, port=port, target="/") + u = httpx.URL(url) + return httpcore.URL(scheme=u.raw_scheme, host=u.raw_host, port=u.port, target="/") @pytest.mark.parametrize( diff --git a/tests/models/test_url.py b/tests/models/test_url.py index a088fc2a10..321cffb3c9 100644 --- a/tests/models/test_url.py +++ b/tests/models/test_url.py @@ -417,10 +417,9 @@ def test_ipv6_url_copy_with_host(url_str, new_host): assert str(url) == "http://[::ffff:192.168.0.1]:1234" -@pytest.mark.parametrize("host", [b"[::ffff:192.168.0.1]", b"::ffff:192.168.0.1"]) +@pytest.mark.parametrize("host", ["[::ffff:192.168.0.1]", "::ffff:192.168.0.1"]) def test_ipv6_url_from_raw_url(host): - raw_url = (b"https", host, 443, b"/") - url = httpx.URL(raw_url) + url = httpx.URL(scheme="https", host=host, port=443, path="/") assert url.host == "::ffff:192.168.0.1" assert url.netloc == b"[::ffff:192.168.0.1]" From c975ab9c0cab17ab3796fd65e8407e46e9f0ae52 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 24 May 2022 15:14:37 +0100 Subject: [PATCH 02/18] First pass at adding urlparse --- httpx/_urlparse.py | 355 +++++++++++++++++++++++++++++++++++++++++ httpx/_urls.py | 180 +++------------------ tests/test_urlparse.py | 201 +++++++++++++++++++++++ 3 files changed, 580 insertions(+), 156 deletions(-) create mode 100644 httpx/_urlparse.py create mode 100644 tests/test_urlparse.py diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py new file mode 100644 index 0000000000..51499261c4 --- /dev/null +++ b/httpx/_urlparse.py @@ -0,0 +1,355 @@ +# TODO? +# * Make idna optional +# * hostname synonm? +import ipaddress +import re +import typing + +import idna + +from ._exceptions import InvalidURL + +MAX_URL_LENGTH = 65536 + +# https://datatracker.ietf.org/doc/html/rfc3986.html#section-2.3 +UNRESERVED_CHARACTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~" +SUB_DELIMS = "!$&'()*+,;=" + +PERCENT_ENCODED_REGEX = re.compile("%[A-Fa-f0-9]{2}") + + +# {scheme}: (optional) +# //{authority} (optional) +# {path} +# ?{query} (optional) +# #{fragment} (optional) +URL_REGEX = re.compile( + ( + r"(?:(?P{scheme}):)?" + r"(?://(?P{authority}))?" + r"(?P{path})" + r"(?:\?(?P{query}))?" + r"(?:#(?P{fragment}))?" + ).format( + scheme="[a-zA-Z][a-zA-Z0-9+.-]*", + authority="[^/?#]*", + path="[^?#]*", + query="[^#]*", + fragment=".*", + ) +) + +# {userinfo}@ (optional) +# {host} +# :{port} (optional) +AUTHORITY_REGEX = re.compile( + ( + r"(?:(?P{userinfo})@)?" r"(?P{host})" r":?(?P{port})?" + ).format( + userinfo="[^@]*", # Any character sequence not including '@'. + host="(\\[.*\\]|[^:]*)", # Either any character sequence not including ':', + # or an IPv6 address enclosed within square brackets. + port=".*" # Any character sequence. + ) +) + + +# If we call urlparse with an individual component, then we need to regex +# validate that component individually. +# Note that we're duplicating the same strings as above. Shock! Horror!! +COMPONENT_REGEX = { + "scheme": re.compile("([a-zA-Z][a-zA-Z0-9+.-]*)?"), + "authority": re.compile("[^/?#]*"), + "path": re.compile("[^?#]*"), + "query": re.compile("[^#]*"), + "fragment": re.compile(".*"), + "userinfo": re.compile("[^@]*"), + "host": re.compile("(\\[.*\\]|[^:]*)"), + "port": re.compile(".*") +} + + +# We use these simple regexs as a first pass before handing off to +# the stdlib 'ipaddress' module for IP address validation. +IPv4_STYLE_HOSTNAME = re.compile(r"^[0-9]+.[0-9]+.[0-9]+.[0-9]+$") +IPv6_STYLE_HOSTNAME = re.compile(r"^\[.*\]$") + + +class ParseResult(typing.NamedTuple): + scheme: str + userinfo: str + host: str + port: typing.Optional[int] + path: str + query: typing.Optional[str] + fragment: typing.Optional[str] + + @property + def username(self) -> str: + username, _, password = self.userinfo.partition(":") + return username + + @property + def password(self) -> str: + username, _, password = self.userinfo.partition(":") + return password + + @property + def authority(self) -> str: + return "".join([ + f"{self.userinfo}@" if self.userinfo else "", + f"[{self.host}]" if ":" in self.host else self.host, + f":{self.port}" if self.port is not None else "" + ]) + + @property + def netloc(self) -> str: + return "".join([ + f"[{self.host}]" if ":" in self.host else self.host, + f":{self.port}" if self.port is not None else "" + ]) + + @property + def full_path(self) -> str: + return "".join([ + self.path, + f"?{self.query}" if self.query is not None else "", + ]) + + def copy_with(self, **kwargs: typing.Optional[str]) -> "ParseResult": + if not kwargs: + return self + + defaults = { + "scheme": self.scheme, + "authority": self.authority, + "path": self.path, + "query": self.query, + "fragment": self.fragment + } + defaults.update(kwargs) + return urlparse("", **defaults) + + def __str__(self) -> str: + authority = self.authority + return "".join([ + f"{self.scheme}:" if self.scheme else "", + f"//{authority}" if authority else "", + self.path, + f"?{self.query}" if self.query is not None else "", + f"#{self.fragment}" if self.fragment is not None else "", + ]) + + +def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: + if len(url) > MAX_URL_LENGTH: + raise InvalidURL("URL too long") + if not url.isprintable(): + # If a URL includes any control characters including \t, \r, \n, + # then treat it as invalid. + raise InvalidURL("Invalid non-printable character in URL") + + if "port" in kwargs: + port = kwargs["port"] + kwargs["port"] = str(port) if isinstance(port, int) else port + + if "netloc" in kwargs: + netloc = kwargs.pop("netloc") or "" + kwargs["host"], _, kwargs["port"] = netloc.partition(":") + + if "username" in kwargs or "password" in kwargs: + username = quote(kwargs.pop("username", "") or "") + password = quote(kwargs.pop("password", "") or "") + kwargs["userinfo"] = f"{username}:{password}" if password else username + + if "full_path" in kwargs: + full_path = kwargs.pop("full_path") or "" + kwargs["path"], seperator, kwargs["query"] = full_path.partition("?") + if not seperator: + kwargs.pop("query") + + for key, value in kwargs.items(): + if key not in ("scheme", "authority", "path", "query", "fragment", "userinfo", "host", "port"): + raise TypeError(f"'{key}' is an invalid keyword argument for urlparse()") + + if value is not None: + if len(value) > MAX_URL_LENGTH: + raise InvalidURL(f"URL component '{key}' too long") + if not value.isprintable(): + # If a component includes any control characters including \t, \r, \n, + # then treat it as invalid. + raise InvalidURL(f"Invalid non-printable character in URL component '{key}'") + if not COMPONENT_REGEX[key].fullmatch(value): + raise InvalidURL(f"Invalid URL component '{key}'") + + # The URL_REGEX will always match, but may have empty components. + url_match = URL_REGEX.match(url) + assert url_match is not None + url_dict = url_match.groupdict() + + # * 'scheme', 'authority', and 'path' may be empty strings. + # * 'query' may be 'None', indicating no trailing "?" portion. + # Any string including the empty string, indicates a trailing "?". + # * 'fragment' may be 'None', indicating no trailing "#" portion. + # Any string including the empty string, indicates a trailing "#". + scheme = kwargs.get("scheme", url_dict["scheme"]) or "" + authority = kwargs.get("authority", url_dict["authority"]) or "" + path = kwargs.get("path", url_dict["path"]) or "" + query = kwargs.get("query", url_dict["query"]) + fragment = kwargs.get("fragment", url_dict["fragment"]) + + # The AUTHORITY_REGEX will always match, but may have empty components. + authority_match = AUTHORITY_REGEX.match(authority) + assert authority_match is not None + authority_dict = authority_match.groupdict() + + # * 'userinfo' and 'host' may be empty strings. + # * 'port' may be 'None'. + userinfo = kwargs.get("userinfo", authority_dict["userinfo"]) or "" + host = kwargs.get("host", authority_dict["host"]) or "" + port = kwargs.get("port", authority_dict["port"]) + + # Normalize and validate each component. + # We end up with a parsed representation of the URL, + # with components that are plain ASCII bytestrings. + parsed_scheme: str = scheme.lower() + parsed_userinfo: str = quote(userinfo, safe=SUB_DELIMS + ":") + parsed_host: str = encode_host(host) + parsed_port: typing.Optional[int] = normalize_port(port, scheme) + if userinfo or host or port: + validate_absolute_path(path) + path = normalize_path(path) + parsed_path: str = quote(path, safe=SUB_DELIMS + ":@/") + parsed_query: typing.Optional[str] = None if query is None else quote(query, safe=SUB_DELIMS + "/?") + parsed_fragment: typing.Optional[str] = None if fragment is None else quote(fragment, safe=SUB_DELIMS + "/?") + + # The parsed ASCII bytestrings are our canonical form. + # All properties of the URL are derived from these. + return ParseResult( + parsed_scheme, + parsed_userinfo, + parsed_host, + parsed_port, + parsed_path, + parsed_query, + parsed_fragment, + ) + + +def encode_host(host: str) -> str: + if not host: + return "" + + elif IPv4_STYLE_HOSTNAME.match(host): + # Validate hostnames like #.#.#.# + try: + ipaddress.IPv4Address(host) + except ipaddress.AddressValueError: + raise InvalidURL("Invalid IPv4 address") + return host + + elif IPv6_STYLE_HOSTNAME.match(host): + # Validate hostnames like [...] + # (IPv6 hostnames must always be enclosed within square brackets) + try: + ipaddress.IPv6Address(host[1:-1]) + except ipaddress.AddressValueError: + raise InvalidURL("Invalid IPv6 address") + return host[1:-1] + + elif all(ord(char) <= 127 for char in host): + # Regular ASCII hostnames + return quote(host.lower()) + + # IDNA hostnames + try: + return idna.encode(host.lower()).decode("ascii") + except idna.IDNAError: + raise InvalidURL("Invalid IDNA hostname") + + +def normalize_port( + port: typing.Optional[typing.Union[str, int]], scheme: str +) -> typing.Optional[int]: + # https://tools.ietf.org/html/rfc3986#section-3.2.3 + # + # A scheme may define a default port. For example, the "http" scheme + # defines a default port of "80", corresponding to its reserved TCP + # port number. The type of port designated by the port number (e.g., + # TCP, UDP, SCTP) is defined by the URI scheme. URI producers and + # normalizers should omit the port component and its ":" delimiter if + # port is empty or if its value would be the same as that of the + # scheme's default. + if not port: + return None + + try: + port_as_int = int(port) + except ValueError: + raise InvalidURL("Invalid port") + + default_port = {"http": 80, "https": 443}.get(scheme) + if port_as_int == default_port: + return None + return port_as_int + + +def validate_absolute_path(path: str) -> None: + # For absolute URLs the path must either be empty or start + # with a '/' character. + # + # https://datatracker.ietf.org/doc/html/rfc3986/#section-3 + # https://datatracker.ietf.org/doc/html/rfc3986/#section-3.3 + if path and not path.startswith("/"): + raise InvalidURL("For absolute URLs, path must be empty or begin with '/'") + + +def normalize_path(path: str) -> str: + """ + Drop "." and ".." segments from a URL path. + + For example: + + normalize_path("/path/./to/somewhere/..") == "/path/to" + """ + # https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4 + components = path.split("/") + output: typing.List[str] = [] + for component in components: + if component == ".": + pass + elif component == "..": + if output and output != [""]: + output.pop() + else: + output.append(component) + return "/".join(output) + + +def percent_encode(char: str) -> str: + """ + Replace every character in a string with the percent-encoded representation. + + Characters outside the ASCII range are represented with their a percent-encoded + representation of their UTF-8 byte sequence. + + For example: + + percent_encode(" ") == "%20" + """ + return "".join([f"%{byte:02x}" for byte in char.encode("utf-8")]).upper() + + +def quote(string: str, safe: str = "/") -> str: + ESCAPED_CHARS = UNRESERVED_CHARACTERS + safe + if string.count("%") == len(PERCENT_ENCODED_REGEX.findall(string)): + # If all occurances of '%' are valid '%xx' escapes, then treat + # percent as a non-escaping character. + ESCAPED_CHARS += "%" + + return "".join( + [ + char if char in ESCAPED_CHARS else percent_encode(char) + for char in string + ] + ) diff --git a/httpx/_urls.py b/httpx/_urls.py index ea23c6f7e3..98dacc63d4 100644 --- a/httpx/_urls.py +++ b/httpx/_urls.py @@ -2,11 +2,9 @@ from urllib.parse import parse_qs, quote, unquote, urlencode import idna -import rfc3986 -import rfc3986.exceptions -from ._exceptions import InvalidURL from ._types import PrimitiveData, QueryParamTypes, URLTypes +from ._urlparse import urlparse from ._utils import primitive_value_to_str @@ -74,43 +72,14 @@ def __init__( self, url: typing.Union["URL", str] = "", **kwargs: typing.Any ) -> None: if isinstance(url, str): - try: - self._uri_reference = rfc3986.iri_reference(url).encode() - except rfc3986.exceptions.InvalidAuthority as exc: - raise InvalidURL(message=str(exc)) from None - - if self.is_absolute_url: - # We don't want to normalize relative URLs, since doing so - # removes any leading `../` portion. - self._uri_reference = self._uri_reference.normalize() + self._uri_reference = urlparse(url, **kwargs) elif isinstance(url, URL): - self._uri_reference = url._uri_reference + self._uri_reference = url._uri_reference.copy_with(**kwargs) else: raise TypeError( f"Invalid type for url. Expected str or httpx.URL, got {type(url)}: {url!r}" ) - # Perform port normalization, following the WHATWG spec for default ports. - # - # See: - # * https://tools.ietf.org/html/rfc3986#section-3.2.3 - # * https://url.spec.whatwg.org/#url-miscellaneous - # * https://url.spec.whatwg.org/#scheme-state - default_port = { - "ftp": ":21", - "http": ":80", - "https": ":443", - "ws": ":80", - "wss": ":443", - }.get(self._uri_reference.scheme, "") - authority = self._uri_reference.authority or "" - if default_port and authority.endswith(default_port): - authority = authority[: -len(default_port)] - self._uri_reference = self._uri_reference.copy_with(authority=authority) - - if kwargs: - self._uri_reference = self.copy_with(**kwargs)._uri_reference - @property def scheme(self) -> str: """ @@ -176,10 +145,6 @@ def host(self) -> str: """ host: str = self._uri_reference.host or "" - if host and ":" in host and host[0] == "[": - # it's an IPv6 address - host = host.lstrip("[").rstrip("]") - if host.startswith("xn--"): host = idna.decode(host) @@ -206,11 +171,6 @@ def raw_host(self) -> bytes: assert url.raw_host == b"::ffff:192.168.0.1" """ host: str = self._uri_reference.host or "" - - if host and ":" in host and host[0] == "[": - # it's an IPv6 address - host = host.lstrip("[").rstrip("]") - return host.encode("ascii") @property @@ -242,8 +202,10 @@ def netloc(self) -> bytes: host = self._uri_reference.host or "" port = self._uri_reference.port netloc = host.encode("ascii") - if port: - netloc = netloc + b":" + port.encode("ascii") + if b":" in netloc: + netloc = b"[" + netloc + b"]" + if port is not None: + netloc = netloc + b":" + str(port).encode("ascii") return netloc @property @@ -355,9 +317,6 @@ def copy_with(self, **kwargs: typing.Any) -> "URL": "params": object, } - # Step 1 - # ====== - # # Perform type checking for all supported keyword arguments. for key, value in kwargs.items(): if key not in allowed: @@ -368,99 +327,24 @@ def copy_with(self, **kwargs: typing.Any) -> "URL": seen = type(value).__name__ message = f"Argument {key!r} must be {expected} but got {seen}" raise TypeError(message) + if isinstance(value, bytes): + kwargs[key] = value.decode("ascii") - # Step 2 - # ====== - # - # Consolidate "username", "password", "userinfo", "host", "port" and "netloc" - # into a single "authority" keyword, for `rfc3986`. - if "username" in kwargs or "password" in kwargs: - # Consolidate "username" and "password" into "userinfo". - username = quote(kwargs.pop("username", self.username) or "") - password = quote(kwargs.pop("password", self.password) or "") - userinfo = f"{username}:{password}" if password else username - kwargs["userinfo"] = userinfo.encode("ascii") - - if "host" in kwargs or "port" in kwargs: - # Consolidate "host" and "port" into "netloc". - host = kwargs.pop("host", self.host) or "" - port = kwargs.pop("port", self.port) - - if host and ":" in host and host[0] != "[": - # IPv6 addresses need to be escaped within square brackets. - host = f"[{host}]" - - kwargs["netloc"] = ( - f"{host}:{port}".encode("ascii") - if port is not None - else host.encode("ascii") - ) - - if "userinfo" in kwargs or "netloc" in kwargs: - # Consolidate "userinfo" and "netloc" into authority. - userinfo = (kwargs.pop("userinfo", self.userinfo) or b"").decode("ascii") - netloc = (kwargs.pop("netloc", self.netloc) or b"").decode("ascii") - authority = f"{userinfo}@{netloc}" if userinfo else netloc - kwargs["authority"] = authority - - # Step 3 - # ====== - # - # Wrangle any "path", "query", "raw_path" and "params" keywords into - # "query" and "path" keywords for `rfc3986`. if "raw_path" in kwargs: - # If "raw_path" is included, then split it into "path" and "query" components. - raw_path = kwargs.pop("raw_path") or b"" - path, has_query, query = raw_path.decode("ascii").partition("?") - kwargs["path"] = path - kwargs["query"] = query if has_query else None + kwargs["full_path"] = kwargs.pop("raw_path") + + if "params" in kwargs: + # Replace any "params" keyword with the raw "query" instead. + # + # Ensure that empty params use `kwargs["query"] = None` rather + # than `kwargs["query"] = ""`, so that generated URLs do not + # include an empty trailing "?". + params = kwargs.pop("params") + kwargs["query"] = None if not params else str(QueryParams(params)) - else: - if kwargs.get("path") is not None: - # Ensure `kwargs["path"] = ` for `rfc3986`. - kwargs["path"] = quote(kwargs["path"]) - - if kwargs.get("query") is not None: - # Ensure `kwargs["query"] = ` for `rfc3986`. - # - # Note that `.copy_with(query=None)` and `.copy_with(query=b"")` - # are subtly different. The `None` style will not include an empty - # trailing "?" character. - kwargs["query"] = kwargs["query"].decode("ascii") - - if "params" in kwargs: - # Replace any "params" keyword with the raw "query" instead. - # - # Ensure that empty params use `kwargs["query"] = None` rather - # than `kwargs["query"] = ""`, so that generated URLs do not - # include an empty trailing "?". - params = kwargs.pop("params") - kwargs["query"] = None if not params else str(QueryParams(params)) - - # Step 4 - # ====== - # - # Ensure any fragment component is quoted. - if kwargs.get("fragment") is not None: - kwargs["fragment"] = quote(kwargs["fragment"]) - - # Step 5 - # ====== - # - # At this point kwargs may include keys for "scheme", "authority", "path", - # "query" and "fragment". Together these constitute the entire URL. - # - # See https://tools.ietf.org/html/rfc3986#section-3 - # - # foo://example.com:8042/over/there?name=ferret#nose - # \_/ \______________/\_________/ \_________/ \__/ - # | | | | | - # scheme authority path query fragment new_url = URL(self) new_url._uri_reference = self._uri_reference.copy_with(**kwargs) - if new_url.is_absolute_url: - new_url._uri_reference = new_url._uri_reference.normalize() - return URL(new_url) + return new_url def copy_set_param(self, key: str, value: typing.Any = None) -> "URL": return self.copy_with(params=self.params.set(key, value)) @@ -484,21 +368,9 @@ def join(self, url: URLTypes) -> "URL": url = url.join("/new/path") assert url == "https://www.example.com/new/path" """ - if self.is_relative_url: - # Workaround to handle relative URLs, which otherwise raise - # rfc3986.exceptions.ResolutionError when used as an argument - # in `.resolve_with`. - return ( - self.copy_with(scheme="http", host="example.com") - .join(url) - .copy_with(scheme=None, host=None) - ) + from urllib.parse import urljoin - # We drop any fragment portion, because RFC 3986 strictly - # treats URLs with a fragment portion as not being absolute URLs. - base_uri = self._uri_reference.copy_with(fragment=None) - relative_url = URL(url) - return URL(relative_url._uri_reference.resolve_with(base_uri).unsplit()) + return URL(urljoin(str(self), str(URL(url)))) def __hash__(self) -> int: return hash(str(self)) @@ -507,7 +379,7 @@ def __eq__(self, other: typing.Any) -> bool: return isinstance(other, (URL, str)) and str(self) == str(URL(other)) def __str__(self) -> str: - return self._uri_reference.unsplit() + return str(self._uri_reference) def __repr__(self) -> str: class_name = self.__class__.__name__ @@ -516,11 +388,7 @@ def __repr__(self) -> str: # Mask any password component in the URL representation, to lower the # risk of unintended leakage, such as in debug information and logging. username = quote(self.username) - url_str = ( - rfc3986.urlparse(url_str) - .copy_with(userinfo=f"{username}:[secure]") - .unsplit() - ) + url_str = str(self.copy_with(userinfo=f"{username}:[secure]")) return f"{class_name}({url_str!r})" diff --git a/tests/test_urlparse.py b/tests/test_urlparse.py new file mode 100644 index 0000000000..61253b69ff --- /dev/null +++ b/tests/test_urlparse.py @@ -0,0 +1,201 @@ +import pytest + +import httpx +from httpx._urlparse import urlparse + + +def test_urlparse(): + url = urlparse("https://www.example.com/") + + assert url.scheme == "https" + assert url.userinfo == "" + assert url.netloc == "www.example.com" + assert url.host == "www.example.com" + assert url.port is None + assert url.path == "/" + assert url.query is None + assert url.fragment is None + + assert str(url) == "https://www.example.com/" + + +# Tests for different host types + + +def test_urlparse_valid_host(): + url = urlparse("https://example.com/") + assert url.host == "example.com" + + +def test_urlparse_normalized_host(): + url = urlparse("https://EXAMPLE.com/") + assert url.host == "example.com" + + +def test_urlparse_valid_ipv4(): + url = urlparse("https://1.2.3.4/") + assert url.host == "1.2.3.4" + + +def test_urlparse_invalid_ipv4(): + with pytest.raises(httpx.InvalidURL) as exc: + urlparse("https://999.999.999.999/") + assert str(exc.value) == "Invalid IPv4 address" + + +def test_urlparse_valid_ipv6(): + url = urlparse("https://[2001:db8::ff00:42:8329]/") + assert url.host == "2001:db8::ff00:42:8329" + + +def test_urlparse_invalid_ipv6(): + with pytest.raises(httpx.InvalidURL) as exc: + urlparse("https://[2001]/") + assert str(exc.value) == "Invalid IPv6 address" + + +def test_urlparse_unescaped_idna_host(): + url = urlparse("https://δΈ­ε›½.icom.museum/") + assert url.host == "xn--fiqs8s.icom.museum" + + +def test_urlparse_escaped_idna_host(): + url = urlparse("https://xn--fiqs8s.icom.museum/") + assert url.host == "xn--fiqs8s.icom.museum" + + +def test_urlparse_invalid_idna_host(): + with pytest.raises(httpx.InvalidURL) as exc: + urlparse("https://β˜ƒ.com/") + assert str(exc.value) == "Invalid IDNA hostname" + + +# Tests for different port types + + +def test_urlparse_valid_port(): + url = urlparse("https://example.com:123/") + assert url.port == 123 + + +def test_urlparse_normalized_port(): + # If the port matches the scheme default it is normalized to None. + url = urlparse("https://example.com:443/") + assert url.port is None + + +def test_urlparse_invalid_port(): + with pytest.raises(httpx.InvalidURL) as exc: + urlparse("https://example.com:abc/") + assert str(exc.value) == "Invalid port" + + +# Tests for path handling + + +def test_urlparse_normalized_path(): + url = urlparse("https://example.com/abc/def/../ghi/./jkl") + assert url.path == "/abc/ghi/jkl" + + +def test_urlparse_escaped_path(): + url = urlparse("https://example.com/ /🌟/") + assert url.path == "/%20/%F0%9F%8C%9F/" + + +def test_urlparse_leading_dot_prefix_on_absolute_url(): + url = urlparse("https://example.com/../abc") + assert url.path == "/abc" + + +def test_urlparse_leading_dot_prefix_on_relative_url(): + url = urlparse("../abc") + assert url.path == "../abc" + + +# Tests for invalid URLs + + +def test_urlparse_excessively_long_url(): + with pytest.raises(httpx.InvalidURL) as exc: + urlparse("https://www.example.com/" + "x" * 100_000) + assert str(exc.value) == "URL too long" + + +def test_urlparse_excessively_long_component(): + with pytest.raises(httpx.InvalidURL) as exc: + urlparse("https://www.example.com", path="/" + "x" * 100_000) + assert str(exc.value) == "URL component 'path' too long" + + +def test_urlparse_non_printing_character_in_url(): + with pytest.raises(httpx.InvalidURL) as exc: + urlparse("https://www.example.com/\n") + assert str(exc.value) == "Invalid non-printable character in URL" + + +def test_urlparse_non_printing_character_in_component(): + with pytest.raises(httpx.InvalidURL) as exc: + urlparse("https://www.example.com", path="/\n") + assert str(exc.value) == "Invalid non-printable character in URL component 'path'" + + +# Test for urlparse components + + +def test_urlparse_with_components(): + url = urlparse(scheme="https", host="www.example.com", path="/") + + assert url.scheme == "https" + assert url.userinfo == "" + assert url.host == "www.example.com" + assert url.port is None + assert url.path == "/" + assert url.query is None + assert url.fragment is None + + assert str(url) == "https://www.example.com/" + + +def test_urlparse_with_invalid_component(): + with pytest.raises(TypeError) as exc: + urlparse(scheme="https", host="www.example.com", incorrect="/") + assert str(exc.value) == "'incorrect' is an invalid keyword argument for urlparse()" + + +def test_urlparse_with_invalid_scheme(): + with pytest.raises(httpx.InvalidURL) as exc: + urlparse(scheme="~", host="www.example.com", path="/") + assert str(exc.value) == "Invalid URL component 'scheme'" + + +def test_urlparse_with_invalid_path(): + with pytest.raises(httpx.InvalidURL) as exc: + urlparse(scheme="https", host="www.example.com", path="abc") + assert str(exc.value) == "For absolute URLs, path must be empty or begin with '/'" + + +def test_urlparse_with_relative_path(): + # This path would be invalid for an absolute URL, but is valid as a relative URL. + url = urlparse(path="abc") + assert url.path == "abc" + + +# Tests for accessing and modifying `urlparse` results. + + +def test_copy_with(): + url = urlparse("https://www.example.com/") + assert str(url) == "https://www.example.com/" + + url = url.copy_with() + assert str(url) == "https://www.example.com/" + + url = url.copy_with(scheme="http") + assert str(url) == "http://www.example.com/" + + url = url.copy_with(netloc="example.com") + assert str(url) == "http://example.com/" + + url = url.copy_with(path="/abc") + assert str(url) == "http://example.com/abc" From 8bd5de9e52767d813ee0f75d9e8b9a63af4f928e Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 24 May 2022 16:18:10 +0100 Subject: [PATCH 03/18] Update urlparse --- httpx/_urlparse.py | 31 +++++++++++++++++++++++-------- tests/test_asgi.py | 2 +- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index 51499261c4..00a812a754 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -241,7 +241,11 @@ def encode_host(host: str) -> str: return "" elif IPv4_STYLE_HOSTNAME.match(host): - # Validate hostnames like #.#.#.# + # Validate IPv4 hostnames like #.#.#.# + # + # From https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2.2 + # + # IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet try: ipaddress.IPv4Address(host) except ipaddress.AddressValueError: @@ -249,8 +253,14 @@ def encode_host(host: str) -> str: return host elif IPv6_STYLE_HOSTNAME.match(host): - # Validate hostnames like [...] - # (IPv6 hostnames must always be enclosed within square brackets) + # Validate IPv6 hostnames like [...] + # + # From https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2.2 + # + # "A host identified by an Internet Protocol literal address, version 6 + # [RFC3513] or later, is distinguished by enclosing the IP literal + # within square brackets ("[" and "]"). This is the only place where + # square bracket characters are allowed in the URI syntax." try: ipaddress.IPv6Address(host[1:-1]) except ipaddress.AddressValueError: @@ -259,7 +269,11 @@ def encode_host(host: str) -> str: elif all(ord(char) <= 127 for char in host): # Regular ASCII hostnames - return quote(host.lower()) + # + # From https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2.2 + # + # reg-name = *( unreserved / pct-encoded / sub-delims ) + return quote(host.lower(), safe=SUB_DELIMS) # IDNA hostnames try: @@ -271,15 +285,15 @@ def encode_host(host: str) -> str: def normalize_port( port: typing.Optional[typing.Union[str, int]], scheme: str ) -> typing.Optional[int]: - # https://tools.ietf.org/html/rfc3986#section-3.2.3 + # From https://tools.ietf.org/html/rfc3986#section-3.2.3 # - # A scheme may define a default port. For example, the "http" scheme + # "A scheme may define a default port. For example, the "http" scheme # defines a default port of "80", corresponding to its reserved TCP # port number. The type of port designated by the port number (e.g., # TCP, UDP, SCTP) is defined by the URI scheme. URI producers and # normalizers should omit the port component and its ":" delimiter if # port is empty or if its value would be the same as that of the - # scheme's default. + # scheme's default." if not port: return None @@ -288,7 +302,8 @@ def normalize_port( except ValueError: raise InvalidURL("Invalid port") - default_port = {"http": 80, "https": 443}.get(scheme) + # See https://url.spec.whatwg.org/#url-miscellaneous + default_port = {"ftp": 21, "http": 80, "https": 443, "ws": 80, "wss": 443}.get(scheme) if port_as_int == default_port: return None return port_as_int diff --git a/tests/test_asgi.py b/tests/test_asgi.py index 60f55dfd6f..cac75cc972 100644 --- a/tests/test_asgi.py +++ b/tests/test_asgi.py @@ -116,7 +116,7 @@ async def test_asgi_raw_path(): response = await client.get(url) assert response.status_code == 200 - assert response.json() == {"raw_path": "/user%40example.org"} + assert response.json() == {"raw_path": "/user@example.org"} @pytest.mark.usefixtures("async_environment") From d38e113d3b26bfef4743654fb5c02a011ed2e259 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Fri, 27 May 2022 13:54:27 +0100 Subject: [PATCH 04/18] Add urlparse --- httpx/_urlparse.py | 134 ++++++++++++++++++++++++++++++--------------- httpx/_urls.py | 2 +- 2 files changed, 92 insertions(+), 44 deletions(-) diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index 00a812a754..ace7e64081 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -12,7 +12,9 @@ MAX_URL_LENGTH = 65536 # https://datatracker.ietf.org/doc/html/rfc3986.html#section-2.3 -UNRESERVED_CHARACTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~" +UNRESERVED_CHARACTERS = ( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~" +) SUB_DELIMS = "!$&'()*+,;=" PERCENT_ENCODED_REGEX = re.compile("%[A-Fa-f0-9]{2}") @@ -46,10 +48,10 @@ ( r"(?:(?P{userinfo})@)?" r"(?P{host})" r":?(?P{port})?" ).format( - userinfo="[^@]*", # Any character sequence not including '@'. + userinfo="[^@]*", # Any character sequence not including '@'. host="(\\[.*\\]|[^:]*)", # Either any character sequence not including ':', - # or an IPv6 address enclosed within square brackets. - port=".*" # Any character sequence. + # or an IPv6 address enclosed within square brackets. + port=".*", # Any character sequence. ) ) @@ -65,7 +67,7 @@ "fragment": re.compile(".*"), "userinfo": re.compile("[^@]*"), "host": re.compile("(\\[.*\\]|[^:]*)"), - "port": re.compile(".*") + "port": re.compile(".*"), } @@ -96,25 +98,31 @@ def password(self) -> str: @property def authority(self) -> str: - return "".join([ - f"{self.userinfo}@" if self.userinfo else "", - f"[{self.host}]" if ":" in self.host else self.host, - f":{self.port}" if self.port is not None else "" - ]) + return "".join( + [ + f"{self.userinfo}@" if self.userinfo else "", + f"[{self.host}]" if ":" in self.host else self.host, + f":{self.port}" if self.port is not None else "", + ] + ) @property def netloc(self) -> str: - return "".join([ - f"[{self.host}]" if ":" in self.host else self.host, - f":{self.port}" if self.port is not None else "" - ]) + return "".join( + [ + f"[{self.host}]" if ":" in self.host else self.host, + f":{self.port}" if self.port is not None else "", + ] + ) @property def full_path(self) -> str: - return "".join([ - self.path, - f"?{self.query}" if self.query is not None else "", - ]) + return "".join( + [ + self.path, + f"?{self.query}" if self.query is not None else "", + ] + ) def copy_with(self, **kwargs: typing.Optional[str]) -> "ParseResult": if not kwargs: @@ -125,60 +133,97 @@ def copy_with(self, **kwargs: typing.Optional[str]) -> "ParseResult": "authority": self.authority, "path": self.path, "query": self.query, - "fragment": self.fragment + "fragment": self.fragment, } defaults.update(kwargs) return urlparse("", **defaults) def __str__(self) -> str: authority = self.authority - return "".join([ - f"{self.scheme}:" if self.scheme else "", - f"//{authority}" if authority else "", - self.path, - f"?{self.query}" if self.query is not None else "", - f"#{self.fragment}" if self.fragment is not None else "", - ]) + return "".join( + [ + f"{self.scheme}:" if self.scheme else "", + f"//{authority}" if authority else "", + self.path, + f"?{self.query}" if self.query is not None else "", + f"#{self.fragment}" if self.fragment is not None else "", + ] + ) def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: + # Initial basic checks on allowable URLs. + # --------------------------------------- + + # Hard limit the maximum allowable URL length. if len(url) > MAX_URL_LENGTH: raise InvalidURL("URL too long") + + # If a URL includes any control characters including \t, \r, \n, + # then treat it as invalid. if not url.isprintable(): - # If a URL includes any control characters including \t, \r, \n, - # then treat it as invalid. raise InvalidURL("Invalid non-printable character in URL") + # Some keyword arguments require special handling. + # ------------------------------------------------ + + # Coerce "port" to a string, if it is provided as an integer. if "port" in kwargs: port = kwargs["port"] kwargs["port"] = str(port) if isinstance(port, int) else port + # Replace "netloc" with "host and "port". if "netloc" in kwargs: netloc = kwargs.pop("netloc") or "" kwargs["host"], _, kwargs["port"] = netloc.partition(":") + # Replace "username" and/or "password" with "userinfo". if "username" in kwargs or "password" in kwargs: username = quote(kwargs.pop("username", "") or "") password = quote(kwargs.pop("password", "") or "") kwargs["userinfo"] = f"{username}:{password}" if password else username + # Replace "full_path" with "path" and "query". if "full_path" in kwargs: full_path = kwargs.pop("full_path") or "" kwargs["path"], seperator, kwargs["query"] = full_path.partition("?") if not seperator: - kwargs.pop("query") + kwargs["query"] = None + + # Ensure that IPv6 "host" addresses are always escaped with "[...]". + if "host" in kwargs: + host = kwargs.get("host") or "" + if ":" in host and not (host.startswith("[") and host.endswith("]")): + kwargs["host"] = f"[{host}]" + + # If any keyword arguments are provided, ensure they are valid. + # ------------------------------------------------------------- for key, value in kwargs.items(): - if key not in ("scheme", "authority", "path", "query", "fragment", "userinfo", "host", "port"): + if key not in ( + "scheme", + "authority", + "path", + "query", + "fragment", + "userinfo", + "host", + "port", + ): raise TypeError(f"'{key}' is an invalid keyword argument for urlparse()") if value is not None: if len(value) > MAX_URL_LENGTH: raise InvalidURL(f"URL component '{key}' too long") + + # If a component includes any control characters including \t, \r, \n, + # then treat it as invalid. if not value.isprintable(): - # If a component includes any control characters including \t, \r, \n, - # then treat it as invalid. - raise InvalidURL(f"Invalid non-printable character in URL component '{key}'") + raise InvalidURL( + f"Invalid non-printable character in URL component '{key}'" + ) + + # Ensure that keyword arguments match as a valid regex. if not COMPONENT_REGEX[key].fullmatch(value): raise InvalidURL(f"Invalid URL component '{key}'") @@ -220,8 +265,12 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: validate_absolute_path(path) path = normalize_path(path) parsed_path: str = quote(path, safe=SUB_DELIMS + ":@/") - parsed_query: typing.Optional[str] = None if query is None else quote(query, safe=SUB_DELIMS + "/?") - parsed_fragment: typing.Optional[str] = None if fragment is None else quote(fragment, safe=SUB_DELIMS + "/?") + parsed_query: typing.Optional[str] = ( + None if query is None else quote(query, safe=SUB_DELIMS + "/?") + ) + parsed_fragment: typing.Optional[str] = ( + None if fragment is None else quote(fragment, safe=SUB_DELIMS + "/?") + ) # The parsed ASCII bytestrings are our canonical form. # All properties of the URL are derived from these. @@ -294,7 +343,7 @@ def normalize_port( # normalizers should omit the port component and its ":" delimiter if # port is empty or if its value would be the same as that of the # scheme's default." - if not port: + if port is None or port == "": return None try: @@ -303,7 +352,9 @@ def normalize_port( raise InvalidURL("Invalid port") # See https://url.spec.whatwg.org/#url-miscellaneous - default_port = {"ftp": 21, "http": 80, "https": 443, "ws": 80, "wss": 443}.get(scheme) + default_port = {"ftp": 21, "http": 80, "https": 443, "ws": 80, "wss": 443}.get( + scheme + ) if port_as_int == default_port: return None return port_as_int @@ -356,15 +407,12 @@ def percent_encode(char: str) -> str: def quote(string: str, safe: str = "/") -> str: - ESCAPED_CHARS = UNRESERVED_CHARACTERS + safe + NON_ESCAPED_CHARS = UNRESERVED_CHARACTERS + safe if string.count("%") == len(PERCENT_ENCODED_REGEX.findall(string)): # If all occurances of '%' are valid '%xx' escapes, then treat # percent as a non-escaping character. - ESCAPED_CHARS += "%" + NON_ESCAPED_CHARS += "%" return "".join( - [ - char if char in ESCAPED_CHARS else percent_encode(char) - for char in string - ] + [char if char in NON_ESCAPED_CHARS else percent_encode(char) for char in string] ) diff --git a/httpx/_urls.py b/httpx/_urls.py index 98dacc63d4..5c3285bb85 100644 --- a/httpx/_urls.py +++ b/httpx/_urls.py @@ -384,7 +384,7 @@ def __str__(self) -> str: def __repr__(self) -> str: class_name = self.__class__.__name__ url_str = str(self) - if self._uri_reference.userinfo: + if self._uri_reference.password: # Mask any password component in the URL representation, to lower the # risk of unintended leakage, such as in debug information and logging. username = quote(self.username) From 8636a785861a0f68e99f95d23a0b726376a2e7fb Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Mon, 30 May 2022 13:47:18 +0100 Subject: [PATCH 05/18] Add urlparse --- httpx/_urls.py | 119 ++++++++++++++++++++++++++++--------------------- 1 file changed, 67 insertions(+), 52 deletions(-) diff --git a/httpx/_urls.py b/httpx/_urls.py index 5c3285bb85..a823b7a6c3 100644 --- a/httpx/_urls.py +++ b/httpx/_urls.py @@ -1,5 +1,5 @@ import typing -from urllib.parse import parse_qs, quote, unquote, urlencode +from urllib.parse import parse_qs, unquote, urlencode import idna @@ -71,6 +71,47 @@ class URL: def __init__( self, url: typing.Union["URL", str] = "", **kwargs: typing.Any ) -> None: + if kwargs: + allowed = { + "scheme": str, + "username": str, + "password": str, + "userinfo": bytes, + "host": str, + "port": int, + "netloc": bytes, + "path": str, + "query": bytes, + "raw_path": bytes, + "fragment": str, + "params": object, + } + + # Perform type checking for all supported keyword arguments. + for key, value in kwargs.items(): + if key not in allowed: + message = f"{key!r} is an invalid keyword argument for URL()" + raise TypeError(message) + if value is not None and not isinstance(value, allowed[key]): + expected = allowed[key].__name__ + seen = type(value).__name__ + message = f"Argument {key!r} must be {expected} but got {seen}" + raise TypeError(message) + if isinstance(value, bytes): + kwargs[key] = value.decode("ascii") + + if "raw_path" in kwargs: + kwargs["full_path"] = kwargs.pop("raw_path") + + if "params" in kwargs: + # Replace any "params" keyword with the raw "query" instead. + # + # Ensure that empty params use `kwargs["query"] = None` rather + # than `kwargs["query"] = ""`, so that generated URLs do not + # include an empty trailing "?". + params = kwargs.pop("params") + kwargs["query"] = None if not params else str(QueryParams(params)) + if isinstance(url, str): self._uri_reference = urlparse(url, **kwargs) elif isinstance(url, URL): @@ -302,49 +343,7 @@ def copy_with(self, **kwargs: typing.Any) -> "URL": url = httpx.URL("https://www.example.com").copy_with(username="jo@gmail.com", password="a secret") assert url == "https://jo%40email.com:a%20secret@www.example.com" """ - allowed = { - "scheme": str, - "username": str, - "password": str, - "userinfo": bytes, - "host": str, - "port": int, - "netloc": bytes, - "path": str, - "query": bytes, - "raw_path": bytes, - "fragment": str, - "params": object, - } - - # Perform type checking for all supported keyword arguments. - for key, value in kwargs.items(): - if key not in allowed: - message = f"{key!r} is an invalid keyword argument for copy_with()" - raise TypeError(message) - if value is not None and not isinstance(value, allowed[key]): - expected = allowed[key].__name__ - seen = type(value).__name__ - message = f"Argument {key!r} must be {expected} but got {seen}" - raise TypeError(message) - if isinstance(value, bytes): - kwargs[key] = value.decode("ascii") - - if "raw_path" in kwargs: - kwargs["full_path"] = kwargs.pop("raw_path") - - if "params" in kwargs: - # Replace any "params" keyword with the raw "query" instead. - # - # Ensure that empty params use `kwargs["query"] = None` rather - # than `kwargs["query"] = ""`, so that generated URLs do not - # include an empty trailing "?". - params = kwargs.pop("params") - kwargs["query"] = None if not params else str(QueryParams(params)) - - new_url = URL(self) - new_url._uri_reference = self._uri_reference.copy_with(**kwargs) - return new_url + return URL(self, **kwargs) def copy_set_param(self, key: str, value: typing.Any = None) -> "URL": return self.copy_with(params=self.params.set(key, value)) @@ -382,14 +381,30 @@ def __str__(self) -> str: return str(self._uri_reference) def __repr__(self) -> str: - class_name = self.__class__.__name__ - url_str = str(self) - if self._uri_reference.password: - # Mask any password component in the URL representation, to lower the - # risk of unintended leakage, such as in debug information and logging. - username = quote(self.username) - url_str = str(self.copy_with(userinfo=f"{username}:[secure]")) - return f"{class_name}({url_str!r})" + scheme, userinfo, host, port, path, query, fragment = self._uri_reference + + if ":" in userinfo: + # Mask any password component. + userinfo = f'{userinfo.split(":")[0]}:[secure]' + + authority = "".join( + [ + f"{userinfo}@" if userinfo else "", + f"[{host}]" if ":" in host else host, + f":{port}" if port is not None else "", + ] + ) + url = "".join( + [ + f"{self.scheme}:" if scheme else "", + f"//{authority}" if authority else "", + path, + f"?{query}" if query is not None else "", + f"#{fragment}" if fragment is not None else "", + ] + ) + + return f"{self.__class__.__name__}({url!r})" class QueryParams(typing.Mapping[str, str]): From 02d6593a78873961f7a02e807df46d7b5a247622 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 31 May 2022 12:26:33 +0100 Subject: [PATCH 06/18] Unicode non-printables can be valid in IDNA hostnames --- httpx/_urlparse.py | 12 +++++------ tests/models/test_url.py | 44 ++++------------------------------------ tests/test_urlparse.py | 7 +++++-- 3 files changed, 15 insertions(+), 48 deletions(-) diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index ace7e64081..aa584897ab 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -159,10 +159,10 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: if len(url) > MAX_URL_LENGTH: raise InvalidURL("URL too long") - # If a URL includes any control characters including \t, \r, \n, + # If a URL includes any ASCII control characters including \t, \r, \n, # then treat it as invalid. - if not url.isprintable(): - raise InvalidURL("Invalid non-printable character in URL") + if any(char.isascii() and not char.isprintable() for char in url): + raise InvalidURL("Invalid non-printable ASCII character in URL") # Some keyword arguments require special handling. # ------------------------------------------------ @@ -216,11 +216,11 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: if len(value) > MAX_URL_LENGTH: raise InvalidURL(f"URL component '{key}' too long") - # If a component includes any control characters including \t, \r, \n, + # If a component includes any ASCII control characters including \t, \r, \n, # then treat it as invalid. - if not value.isprintable(): + if any(char.isascii() and not char.isprintable() for char in value): raise InvalidURL( - f"Invalid non-printable character in URL component '{key}'" + f"Invalid non-printable ASCII character in URL component '{key}'" ) # Ensure that keyword arguments match as a valid regex. diff --git a/tests/models/test_url.py b/tests/models/test_url.py index 321cffb3c9..8a5d6f496a 100644 --- a/tests/models/test_url.py +++ b/tests/models/test_url.py @@ -312,49 +312,13 @@ def test_url_copywith_security(): """ Prevent unexpected changes on URL after calling copy_with (CVE-2021-41945) """ - url = httpx.URL("https://u:p@[invalid!]//evilHost/path?t=w#tw") - original_scheme = url.scheme - original_userinfo = url.userinfo - original_netloc = url.netloc - original_raw_path = url.raw_path - original_query = url.query - original_fragment = url.fragment - url = url.copy_with() - assert url.scheme == original_scheme - assert url.userinfo == original_userinfo - assert url.netloc == original_netloc - assert url.raw_path == original_raw_path - assert url.query == original_query - assert url.fragment == original_fragment - - url = httpx.URL("https://u:p@[invalid!]//evilHost/path?t=w#tw") - original_scheme = url.scheme - original_netloc = url.netloc - original_raw_path = url.raw_path - original_query = url.query - original_fragment = url.fragment - url = url.copy_with(userinfo=b"") - assert url.scheme == original_scheme - assert url.userinfo == b"" - assert url.netloc == original_netloc - assert url.raw_path == original_raw_path - assert url.query == original_query - assert url.fragment == original_fragment + with pytest.raises(httpx.InvalidURL): + httpx.URL("https://u:p@[invalid!]//evilHost/path?t=w#tw") url = httpx.URL("https://example.com/path?t=w#tw") - original_userinfo = url.userinfo - original_netloc = url.netloc - original_raw_path = url.raw_path - original_query = url.query - original_fragment = url.fragment bad = "https://xxxx:xxxx@xxxxxxx/xxxxx/xxx?x=x#xxxxx" - url = url.copy_with(scheme=bad) - assert url.scheme == bad - assert url.userinfo == original_userinfo - assert url.netloc == original_netloc - assert url.raw_path == original_raw_path - assert url.query == original_query - assert url.fragment == original_fragment + with pytest.raises(httpx.InvalidURL): + url.copy_with(scheme=bad) def test_url_invalid(): diff --git a/tests/test_urlparse.py b/tests/test_urlparse.py index 61253b69ff..3e562b79ab 100644 --- a/tests/test_urlparse.py +++ b/tests/test_urlparse.py @@ -131,13 +131,16 @@ def test_urlparse_excessively_long_component(): def test_urlparse_non_printing_character_in_url(): with pytest.raises(httpx.InvalidURL) as exc: urlparse("https://www.example.com/\n") - assert str(exc.value) == "Invalid non-printable character in URL" + assert str(exc.value) == "Invalid non-printable ASCII character in URL" def test_urlparse_non_printing_character_in_component(): with pytest.raises(httpx.InvalidURL) as exc: urlparse("https://www.example.com", path="/\n") - assert str(exc.value) == "Invalid non-printable character in URL component 'path'" + assert ( + str(exc.value) + == "Invalid non-printable ASCII character in URL component 'path'" + ) # Test for urlparse components From a9da21fcabe0698b9361ae65a720f3a52497e07d Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 31 May 2022 13:11:30 +0100 Subject: [PATCH 07/18] Update _urlparse.py docstring --- httpx/_urlparse.py | 114 +++++++++++++++++++++------------------------ 1 file changed, 52 insertions(+), 62 deletions(-) diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index aa584897ab..e5b8dfb58b 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -1,6 +1,20 @@ -# TODO? -# * Make idna optional -# * hostname synonm? +""" +An implementation of `urlparse` that provides URL validation and normalization +as described by RFC3986. + +We rely on this implementation rather than the one in Python's stdlib, because: + +* It provides more complete URL validation. +* It properly differentiates between an empty querystring and an absent querystring, + to distinguish URLs with a trailing '?'. +* It handles scheme, hostname, port, and path normalization. +* It supports IDNA hostnames, normalizing them to their encoded form. +* The API supports passing individual components, as well as the complete URL string. + +Previously we relied on the excellent `rfc3986` package to handle URL parsing and +validation, but this module provides a simpler alternative, with less indirection +required. +""" import ipaddress import re import typing @@ -12,9 +26,7 @@ MAX_URL_LENGTH = 65536 # https://datatracker.ietf.org/doc/html/rfc3986.html#section-2.3 -UNRESERVED_CHARACTERS = ( - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~" -) +UNRESERVED_CHARACTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~" SUB_DELIMS = "!$&'()*+,;=" PERCENT_ENCODED_REGEX = re.compile("%[A-Fa-f0-9]{2}") @@ -48,10 +60,10 @@ ( r"(?:(?P{userinfo})@)?" r"(?P{host})" r":?(?P{port})?" ).format( - userinfo="[^@]*", # Any character sequence not including '@'. + userinfo="[^@]*", # Any character sequence not including '@'. host="(\\[.*\\]|[^:]*)", # Either any character sequence not including ':', - # or an IPv6 address enclosed within square brackets. - port=".*", # Any character sequence. + # or an IPv6 address enclosed within square brackets. + port=".*" # Any character sequence. ) ) @@ -67,7 +79,7 @@ "fragment": re.compile(".*"), "userinfo": re.compile("[^@]*"), "host": re.compile("(\\[.*\\]|[^:]*)"), - "port": re.compile(".*"), + "port": re.compile(".*") } @@ -98,31 +110,25 @@ def password(self) -> str: @property def authority(self) -> str: - return "".join( - [ - f"{self.userinfo}@" if self.userinfo else "", - f"[{self.host}]" if ":" in self.host else self.host, - f":{self.port}" if self.port is not None else "", - ] - ) + return "".join([ + f"{self.userinfo}@" if self.userinfo else "", + f"[{self.host}]" if ":" in self.host else self.host, + f":{self.port}" if self.port is not None else "" + ]) @property def netloc(self) -> str: - return "".join( - [ - f"[{self.host}]" if ":" in self.host else self.host, - f":{self.port}" if self.port is not None else "", - ] - ) + return "".join([ + f"[{self.host}]" if ":" in self.host else self.host, + f":{self.port}" if self.port is not None else "" + ]) @property def full_path(self) -> str: - return "".join( - [ - self.path, - f"?{self.query}" if self.query is not None else "", - ] - ) + return "".join([ + self.path, + f"?{self.query}" if self.query is not None else "", + ]) def copy_with(self, **kwargs: typing.Optional[str]) -> "ParseResult": if not kwargs: @@ -133,22 +139,20 @@ def copy_with(self, **kwargs: typing.Optional[str]) -> "ParseResult": "authority": self.authority, "path": self.path, "query": self.query, - "fragment": self.fragment, + "fragment": self.fragment } defaults.update(kwargs) return urlparse("", **defaults) def __str__(self) -> str: authority = self.authority - return "".join( - [ - f"{self.scheme}:" if self.scheme else "", - f"//{authority}" if authority else "", - self.path, - f"?{self.query}" if self.query is not None else "", - f"#{self.fragment}" if self.fragment is not None else "", - ] - ) + return "".join([ + f"{self.scheme}:" if self.scheme else "", + f"//{authority}" if authority else "", + self.path, + f"?{self.query}" if self.query is not None else "", + f"#{self.fragment}" if self.fragment is not None else "", + ]) def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: @@ -200,16 +204,7 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: # ------------------------------------------------------------- for key, value in kwargs.items(): - if key not in ( - "scheme", - "authority", - "path", - "query", - "fragment", - "userinfo", - "host", - "port", - ): + if key not in ("scheme", "authority", "path", "query", "fragment", "userinfo", "host", "port"): raise TypeError(f"'{key}' is an invalid keyword argument for urlparse()") if value is not None: @@ -219,9 +214,7 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: # If a component includes any ASCII control characters including \t, \r, \n, # then treat it as invalid. if any(char.isascii() and not char.isprintable() for char in value): - raise InvalidURL( - f"Invalid non-printable ASCII character in URL component '{key}'" - ) + raise InvalidURL(f"Invalid non-printable ASCII character in URL component '{key}'") # Ensure that keyword arguments match as a valid regex. if not COMPONENT_REGEX[key].fullmatch(value): @@ -265,12 +258,8 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: validate_absolute_path(path) path = normalize_path(path) parsed_path: str = quote(path, safe=SUB_DELIMS + ":@/") - parsed_query: typing.Optional[str] = ( - None if query is None else quote(query, safe=SUB_DELIMS + "/?") - ) - parsed_fragment: typing.Optional[str] = ( - None if fragment is None else quote(fragment, safe=SUB_DELIMS + "/?") - ) + parsed_query: typing.Optional[str] = None if query is None else quote(query, safe=SUB_DELIMS + "/?") + parsed_fragment: typing.Optional[str] = None if fragment is None else quote(fragment, safe=SUB_DELIMS + "/?") # The parsed ASCII bytestrings are our canonical form. # All properties of the URL are derived from these. @@ -352,9 +341,7 @@ def normalize_port( raise InvalidURL("Invalid port") # See https://url.spec.whatwg.org/#url-miscellaneous - default_port = {"ftp": 21, "http": 80, "https": 443, "ws": 80, "wss": 443}.get( - scheme - ) + default_port = {"ftp": 21, "http": 80, "https": 443, "ws": 80, "wss": 443}.get(scheme) if port_as_int == default_port: return None return port_as_int @@ -414,5 +401,8 @@ def quote(string: str, safe: str = "/") -> str: NON_ESCAPED_CHARS += "%" return "".join( - [char if char in NON_ESCAPED_CHARS else percent_encode(char) for char in string] + [ + char if char in NON_ESCAPED_CHARS else percent_encode(char) + for char in string + ] ) From 36a8d8c25aa36385385472dc83ccb65f5afd52fa Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 31 May 2022 13:20:43 +0100 Subject: [PATCH 08/18] Linting --- httpx/_urlparse.py | 94 +++++++++++++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 35 deletions(-) diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index e5b8dfb58b..94aa250933 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -26,7 +26,9 @@ MAX_URL_LENGTH = 65536 # https://datatracker.ietf.org/doc/html/rfc3986.html#section-2.3 -UNRESERVED_CHARACTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~" +UNRESERVED_CHARACTERS = ( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~" +) SUB_DELIMS = "!$&'()*+,;=" PERCENT_ENCODED_REGEX = re.compile("%[A-Fa-f0-9]{2}") @@ -60,10 +62,10 @@ ( r"(?:(?P{userinfo})@)?" r"(?P{host})" r":?(?P{port})?" ).format( - userinfo="[^@]*", # Any character sequence not including '@'. + userinfo="[^@]*", # Any character sequence not including '@'. host="(\\[.*\\]|[^:]*)", # Either any character sequence not including ':', - # or an IPv6 address enclosed within square brackets. - port=".*" # Any character sequence. + # or an IPv6 address enclosed within square brackets. + port=".*", # Any character sequence. ) ) @@ -79,7 +81,7 @@ "fragment": re.compile(".*"), "userinfo": re.compile("[^@]*"), "host": re.compile("(\\[.*\\]|[^:]*)"), - "port": re.compile(".*") + "port": re.compile(".*"), } @@ -110,25 +112,31 @@ def password(self) -> str: @property def authority(self) -> str: - return "".join([ - f"{self.userinfo}@" if self.userinfo else "", - f"[{self.host}]" if ":" in self.host else self.host, - f":{self.port}" if self.port is not None else "" - ]) + return "".join( + [ + f"{self.userinfo}@" if self.userinfo else "", + f"[{self.host}]" if ":" in self.host else self.host, + f":{self.port}" if self.port is not None else "", + ] + ) @property def netloc(self) -> str: - return "".join([ - f"[{self.host}]" if ":" in self.host else self.host, - f":{self.port}" if self.port is not None else "" - ]) + return "".join( + [ + f"[{self.host}]" if ":" in self.host else self.host, + f":{self.port}" if self.port is not None else "", + ] + ) @property def full_path(self) -> str: - return "".join([ - self.path, - f"?{self.query}" if self.query is not None else "", - ]) + return "".join( + [ + self.path, + f"?{self.query}" if self.query is not None else "", + ] + ) def copy_with(self, **kwargs: typing.Optional[str]) -> "ParseResult": if not kwargs: @@ -139,20 +147,22 @@ def copy_with(self, **kwargs: typing.Optional[str]) -> "ParseResult": "authority": self.authority, "path": self.path, "query": self.query, - "fragment": self.fragment + "fragment": self.fragment, } defaults.update(kwargs) return urlparse("", **defaults) def __str__(self) -> str: authority = self.authority - return "".join([ - f"{self.scheme}:" if self.scheme else "", - f"//{authority}" if authority else "", - self.path, - f"?{self.query}" if self.query is not None else "", - f"#{self.fragment}" if self.fragment is not None else "", - ]) + return "".join( + [ + f"{self.scheme}:" if self.scheme else "", + f"//{authority}" if authority else "", + self.path, + f"?{self.query}" if self.query is not None else "", + f"#{self.fragment}" if self.fragment is not None else "", + ] + ) def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: @@ -204,7 +214,16 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: # ------------------------------------------------------------- for key, value in kwargs.items(): - if key not in ("scheme", "authority", "path", "query", "fragment", "userinfo", "host", "port"): + if key not in ( + "scheme", + "authority", + "path", + "query", + "fragment", + "userinfo", + "host", + "port", + ): raise TypeError(f"'{key}' is an invalid keyword argument for urlparse()") if value is not None: @@ -214,7 +233,9 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: # If a component includes any ASCII control characters including \t, \r, \n, # then treat it as invalid. if any(char.isascii() and not char.isprintable() for char in value): - raise InvalidURL(f"Invalid non-printable ASCII character in URL component '{key}'") + raise InvalidURL( + f"Invalid non-printable ASCII character in URL component '{key}'" + ) # Ensure that keyword arguments match as a valid regex. if not COMPONENT_REGEX[key].fullmatch(value): @@ -258,8 +279,12 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: validate_absolute_path(path) path = normalize_path(path) parsed_path: str = quote(path, safe=SUB_DELIMS + ":@/") - parsed_query: typing.Optional[str] = None if query is None else quote(query, safe=SUB_DELIMS + "/?") - parsed_fragment: typing.Optional[str] = None if fragment is None else quote(fragment, safe=SUB_DELIMS + "/?") + parsed_query: typing.Optional[str] = ( + None if query is None else quote(query, safe=SUB_DELIMS + "/?") + ) + parsed_fragment: typing.Optional[str] = ( + None if fragment is None else quote(fragment, safe=SUB_DELIMS + "/?") + ) # The parsed ASCII bytestrings are our canonical form. # All properties of the URL are derived from these. @@ -341,7 +366,9 @@ def normalize_port( raise InvalidURL("Invalid port") # See https://url.spec.whatwg.org/#url-miscellaneous - default_port = {"ftp": 21, "http": 80, "https": 443, "ws": 80, "wss": 443}.get(scheme) + default_port = {"ftp": 21, "http": 80, "https": 443, "ws": 80, "wss": 443}.get( + scheme + ) if port_as_int == default_port: return None return port_as_int @@ -401,8 +428,5 @@ def quote(string: str, safe: str = "/") -> str: NON_ESCAPED_CHARS += "%" return "".join( - [ - char if char in NON_ESCAPED_CHARS else percent_encode(char) - for char in string - ] + [char if char in NON_ESCAPED_CHARS else percent_encode(char) for char in string] ) From f0b79b3e231e52386598a723237efe9b9c4f9b4b Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 31 May 2022 14:05:55 +0100 Subject: [PATCH 09/18] Trim away ununsed codepaths --- httpx/_urlparse.py | 19 ------------------- httpx/_urls.py | 22 ++++++---------------- 2 files changed, 6 insertions(+), 35 deletions(-) diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index 94aa250933..947c01e01a 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -100,16 +100,6 @@ class ParseResult(typing.NamedTuple): query: typing.Optional[str] fragment: typing.Optional[str] - @property - def username(self) -> str: - username, _, password = self.userinfo.partition(":") - return username - - @property - def password(self) -> str: - username, _, password = self.userinfo.partition(":") - return password - @property def authority(self) -> str: return "".join( @@ -129,15 +119,6 @@ def netloc(self) -> str: ] ) - @property - def full_path(self) -> str: - return "".join( - [ - self.path, - f"?{self.query}" if self.query is not None else "", - ] - ) - def copy_with(self, **kwargs: typing.Optional[str]) -> "ParseResult": if not kwargs: return self diff --git a/httpx/_urls.py b/httpx/_urls.py index 1578830164..681462529b 100644 --- a/httpx/_urls.py +++ b/httpx/_urls.py @@ -127,7 +127,7 @@ def scheme(self) -> str: The URL scheme, such as "http", "https". Always normalised to lowercase. """ - return self._uri_reference.scheme or "" + return self._uri_reference.scheme @property def raw_scheme(self) -> bytes: @@ -135,7 +135,7 @@ def raw_scheme(self) -> bytes: The raw bytes representation of the URL scheme, such as b"http", b"https". Always normalised to lowercase. """ - return self.scheme.encode("ascii") + return self._uri_reference.scheme.encode("ascii") @property def userinfo(self) -> bytes: @@ -143,8 +143,7 @@ def userinfo(self) -> bytes: The URL userinfo as a raw bytestring. For example: b"jo%40email.com:a%20secret". """ - userinfo = self._uri_reference.userinfo or "" - return userinfo.encode("ascii") + return self._uri_reference.userinfo.encode("ascii") @property def username(self) -> str: @@ -211,8 +210,7 @@ def raw_host(self) -> bytes: url = httpx.URL("https://[::ffff:192.168.0.1]") assert url.raw_host == b"::ffff:192.168.0.1" """ - host: str = self._uri_reference.host or "" - return host.encode("ascii") + return self._uri_reference.host.encode("ascii") @property def port(self) -> typing.Optional[int]: @@ -228,8 +226,7 @@ def port(self) -> typing.Optional[int]: assert httpx.URL("http://www.example.com") == httpx.URL("http://www.example.com:80") assert httpx.URL("http://www.example.com:80").port is None """ - port = self._uri_reference.port - return int(port) if port else None + return self._uri_reference.port @property def netloc(self) -> bytes: @@ -240,14 +237,7 @@ def netloc(self) -> bytes: This property may be used for generating the value of a request "Host" header. """ - host = self._uri_reference.host or "" - port = self._uri_reference.port - netloc = host.encode("ascii") - if b":" in netloc: - netloc = b"[" + netloc + b"]" - if port is not None: - netloc = netloc + b":" + str(port).encode("ascii") - return netloc + return self._uri_reference.netloc.encode("ascii") @property def path(self) -> str: From 31231a168b75cec2e62cc524c5f0320fc98390aa Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 31 May 2022 16:30:07 +0100 Subject: [PATCH 10/18] Tweaks for path validation depending on scheme and authority presence --- httpx/_urlparse.py | 44 +++++++++++++++++++++++++++++++----------- httpx/_urls.py | 6 +++--- tests/test_urlparse.py | 28 +++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 14 deletions(-) diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index 947c01e01a..8d27e5df25 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -47,7 +47,7 @@ r"(?:\?(?P{query}))?" r"(?:#(?P{fragment}))?" ).format( - scheme="[a-zA-Z][a-zA-Z0-9+.-]*", + scheme="([a-zA-Z][a-zA-Z0-9+.-]*)?", authority="[^/?#]*", path="[^?#]*", query="[^#]*", @@ -256,9 +256,15 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: parsed_userinfo: str = quote(userinfo, safe=SUB_DELIMS + ":") parsed_host: str = encode_host(host) parsed_port: typing.Optional[int] = normalize_port(port, scheme) - if userinfo or host or port: - validate_absolute_path(path) + + has_scheme = parsed_scheme != "" + has_authority = ( + parsed_userinfo != "" or parsed_host != "" or parsed_port is not None + ) + validate_path(path, has_scheme=has_scheme, has_authority=has_authority) + if has_authority: path = normalize_path(path) + parsed_path: str = quote(path, safe=SUB_DELIMS + ":@/") parsed_query: typing.Optional[str] = ( None if query is None else quote(query, safe=SUB_DELIMS + "/?") @@ -355,14 +361,30 @@ def normalize_port( return port_as_int -def validate_absolute_path(path: str) -> None: - # For absolute URLs the path must either be empty or start - # with a '/' character. - # - # https://datatracker.ietf.org/doc/html/rfc3986/#section-3 - # https://datatracker.ietf.org/doc/html/rfc3986/#section-3.3 - if path and not path.startswith("/"): - raise InvalidURL("For absolute URLs, path must be empty or begin with '/'") +def validate_path(path: str, has_scheme: bool, has_authority: bool) -> None: + """ + Path validation rules that depend on if the URL contains a scheme or authority component. + + See https://datatracker.ietf.org/doc/html/rfc3986.html#section-3.3 + """ + if has_authority: + # > If a URI contains an authority component, then the path component + # > must either be empty or begin with a slash ("/") character." + if path and not path.startswith("/"): + raise InvalidURL("For absolute URLs, path must be empty or begin with '/'") + else: + # > If a URI does not contain an authority component, then the path cannot begin + # > with two slash characters ("//"). + if path.startswith("//"): + raise InvalidURL( + "URLs with no authority component cannot have a path starting with '//'" + ) + # > In addition, a URI reference (Section 4.1) may be a relative-path reference, in which + # > case the first path segment cannot contain a colon (":") character. + if path.startswith(":") and not has_scheme: + raise InvalidURL( + "URLs with no scheme component cannot have a path starting with ':'" + ) def normalize_path(path: str) -> str: diff --git a/httpx/_urls.py b/httpx/_urls.py index 681462529b..b49e10763d 100644 --- a/httpx/_urls.py +++ b/httpx/_urls.py @@ -151,7 +151,7 @@ def username(self) -> str: The URL username as a string, with URL decoding applied. For example: "jo@email.com" """ - userinfo = self._uri_reference.userinfo or "" + userinfo = self._uri_reference.userinfo return unquote(userinfo.partition(":")[0]) @property @@ -160,7 +160,7 @@ def password(self) -> str: The URL password as a string, with URL decoding applied. For example: "a secret" """ - userinfo = self._uri_reference.userinfo or "" + userinfo = self._uri_reference.userinfo return unquote(userinfo.partition(":")[2]) @property @@ -183,7 +183,7 @@ def host(self) -> str: url = httpx.URL("https://[::ffff:192.168.0.1]") assert url.host == "::ffff:192.168.0.1" """ - host: str = self._uri_reference.host or "" + host: str = self._uri_reference.host if host.startswith("xn--"): host = idna.decode(host) diff --git a/tests/test_urlparse.py b/tests/test_urlparse.py index 3e562b79ab..e48ffa64e1 100644 --- a/tests/test_urlparse.py +++ b/tests/test_urlparse.py @@ -19,6 +19,20 @@ def test_urlparse(): assert str(url) == "https://www.example.com/" +def test_urlparse_no_scheme(): + url = urlparse("://example.com") + assert url.scheme == "" + assert url.host == "example.com" + assert url.path == "" + + +def test_urlparse_no_authority(): + url = urlparse("http://") + assert url.scheme == "http" + assert url.host == "" + assert url.path == "" + + # Tests for different host types @@ -177,6 +191,20 @@ def test_urlparse_with_invalid_path(): urlparse(scheme="https", host="www.example.com", path="abc") assert str(exc.value) == "For absolute URLs, path must be empty or begin with '/'" + with pytest.raises(httpx.InvalidURL) as exc: + urlparse(path="//abc") + assert ( + str(exc.value) + == "URLs with no authority component cannot have a path starting with '//'" + ) + + with pytest.raises(httpx.InvalidURL) as exc: + urlparse(path=":abc") + assert ( + str(exc.value) + == "URLs with no scheme component cannot have a path starting with ':'" + ) + def test_urlparse_with_relative_path(): # This path would be invalid for an absolute URL, but is valid as a relative URL. From f9d3ce6fdf1c0db490bcace1f303460cbeb542c7 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 1 Jun 2022 13:38:27 +0100 Subject: [PATCH 11/18] Minor cleanups --- httpx/_urlparse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index 8d27e5df25..9c183962bc 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -317,7 +317,7 @@ def encode_host(host: str) -> str: raise InvalidURL("Invalid IPv6 address") return host[1:-1] - elif all(ord(char) <= 127 for char in host): + elif host.isascii(): # Regular ASCII hostnames # # From https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2.2 @@ -327,7 +327,7 @@ def encode_host(host: str) -> str: # IDNA hostnames try: - return idna.encode(host.lower()).decode("ascii") + return idna.encode(host).decode("ascii").lower() except idna.IDNAError: raise InvalidURL("Invalid IDNA hostname") From 2351dd88c27a473c4ff7c5e06a3c279c00323ada Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 1 Jun 2022 13:59:05 +0100 Subject: [PATCH 12/18] Minor cleanups --- httpx/_urlparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index 9c183962bc..75859f342d 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -327,7 +327,7 @@ def encode_host(host: str) -> str: # IDNA hostnames try: - return idna.encode(host).decode("ascii").lower() + return idna.encode(host.lower()).decode("ascii") except idna.IDNAError: raise InvalidURL("Invalid IDNA hostname") From cedfd9c2a7f4c85a21c5d93cf68eb82877767d31 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 1 Jun 2022 14:21:01 +0100 Subject: [PATCH 13/18] full_path -> raw_path, forr internal consistency --- httpx/_urlparse.py | 104 +++++++++++++++++---------------------------- httpx/_urls.py | 3 -- 2 files changed, 38 insertions(+), 69 deletions(-) diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index 75859f342d..7664087885 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -26,9 +26,7 @@ MAX_URL_LENGTH = 65536 # https://datatracker.ietf.org/doc/html/rfc3986.html#section-2.3 -UNRESERVED_CHARACTERS = ( - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~" -) +UNRESERVED_CHARACTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~" SUB_DELIMS = "!$&'()*+,;=" PERCENT_ENCODED_REGEX = re.compile("%[A-Fa-f0-9]{2}") @@ -62,10 +60,10 @@ ( r"(?:(?P{userinfo})@)?" r"(?P{host})" r":?(?P{port})?" ).format( - userinfo="[^@]*", # Any character sequence not including '@'. + userinfo="[^@]*", # Any character sequence not including '@'. host="(\\[.*\\]|[^:]*)", # Either any character sequence not including ':', - # or an IPv6 address enclosed within square brackets. - port=".*", # Any character sequence. + # or an IPv6 address enclosed within square brackets. + port=".*" # Any character sequence. ) ) @@ -81,7 +79,7 @@ "fragment": re.compile(".*"), "userinfo": re.compile("[^@]*"), "host": re.compile("(\\[.*\\]|[^:]*)"), - "port": re.compile(".*"), + "port": re.compile(".*") } @@ -102,22 +100,18 @@ class ParseResult(typing.NamedTuple): @property def authority(self) -> str: - return "".join( - [ - f"{self.userinfo}@" if self.userinfo else "", - f"[{self.host}]" if ":" in self.host else self.host, - f":{self.port}" if self.port is not None else "", - ] - ) + return "".join([ + f"{self.userinfo}@" if self.userinfo else "", + f"[{self.host}]" if ":" in self.host else self.host, + f":{self.port}" if self.port is not None else "" + ]) @property def netloc(self) -> str: - return "".join( - [ - f"[{self.host}]" if ":" in self.host else self.host, - f":{self.port}" if self.port is not None else "", - ] - ) + return "".join([ + f"[{self.host}]" if ":" in self.host else self.host, + f":{self.port}" if self.port is not None else "" + ]) def copy_with(self, **kwargs: typing.Optional[str]) -> "ParseResult": if not kwargs: @@ -128,22 +122,20 @@ def copy_with(self, **kwargs: typing.Optional[str]) -> "ParseResult": "authority": self.authority, "path": self.path, "query": self.query, - "fragment": self.fragment, + "fragment": self.fragment } defaults.update(kwargs) return urlparse("", **defaults) def __str__(self) -> str: authority = self.authority - return "".join( - [ - f"{self.scheme}:" if self.scheme else "", - f"//{authority}" if authority else "", - self.path, - f"?{self.query}" if self.query is not None else "", - f"#{self.fragment}" if self.fragment is not None else "", - ] - ) + return "".join([ + f"{self.scheme}:" if self.scheme else "", + f"//{authority}" if authority else "", + self.path, + f"?{self.query}" if self.query is not None else "", + f"#{self.fragment}" if self.fragment is not None else "", + ]) def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: @@ -178,10 +170,10 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: password = quote(kwargs.pop("password", "") or "") kwargs["userinfo"] = f"{username}:{password}" if password else username - # Replace "full_path" with "path" and "query". - if "full_path" in kwargs: - full_path = kwargs.pop("full_path") or "" - kwargs["path"], seperator, kwargs["query"] = full_path.partition("?") + # Replace "raw_path" with "path" and "query". + if "raw_path" in kwargs: + raw_path = kwargs.pop("raw_path") or "" + kwargs["path"], seperator, kwargs["query"] = raw_path.partition("?") if not seperator: kwargs["query"] = None @@ -195,16 +187,7 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: # ------------------------------------------------------------- for key, value in kwargs.items(): - if key not in ( - "scheme", - "authority", - "path", - "query", - "fragment", - "userinfo", - "host", - "port", - ): + if key not in ("scheme", "authority", "path", "query", "fragment", "userinfo", "host", "port"): raise TypeError(f"'{key}' is an invalid keyword argument for urlparse()") if value is not None: @@ -214,9 +197,7 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: # If a component includes any ASCII control characters including \t, \r, \n, # then treat it as invalid. if any(char.isascii() and not char.isprintable() for char in value): - raise InvalidURL( - f"Invalid non-printable ASCII character in URL component '{key}'" - ) + raise InvalidURL(f"Invalid non-printable ASCII character in URL component '{key}'") # Ensure that keyword arguments match as a valid regex. if not COMPONENT_REGEX[key].fullmatch(value): @@ -258,20 +239,14 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: parsed_port: typing.Optional[int] = normalize_port(port, scheme) has_scheme = parsed_scheme != "" - has_authority = ( - parsed_userinfo != "" or parsed_host != "" or parsed_port is not None - ) + has_authority = parsed_userinfo != "" or parsed_host != "" or parsed_port is not None validate_path(path, has_scheme=has_scheme, has_authority=has_authority) if has_authority: path = normalize_path(path) parsed_path: str = quote(path, safe=SUB_DELIMS + ":@/") - parsed_query: typing.Optional[str] = ( - None if query is None else quote(query, safe=SUB_DELIMS + "/?") - ) - parsed_fragment: typing.Optional[str] = ( - None if fragment is None else quote(fragment, safe=SUB_DELIMS + "/?") - ) + parsed_query: typing.Optional[str] = None if query is None else quote(query, safe=SUB_DELIMS + "/?") + parsed_fragment: typing.Optional[str] = None if fragment is None else quote(fragment, safe=SUB_DELIMS + "/?") # The parsed ASCII bytestrings are our canonical form. # All properties of the URL are derived from these. @@ -353,9 +328,7 @@ def normalize_port( raise InvalidURL("Invalid port") # See https://url.spec.whatwg.org/#url-miscellaneous - default_port = {"ftp": 21, "http": 80, "https": 443, "ws": 80, "wss": 443}.get( - scheme - ) + default_port = {"ftp": 21, "http": 80, "https": 443, "ws": 80, "wss": 443}.get(scheme) if port_as_int == default_port: return None return port_as_int @@ -376,15 +349,11 @@ def validate_path(path: str, has_scheme: bool, has_authority: bool) -> None: # > If a URI does not contain an authority component, then the path cannot begin # > with two slash characters ("//"). if path.startswith("//"): - raise InvalidURL( - "URLs with no authority component cannot have a path starting with '//'" - ) + raise InvalidURL("URLs with no authority component cannot have a path starting with '//'") # > In addition, a URI reference (Section 4.1) may be a relative-path reference, in which # > case the first path segment cannot contain a colon (":") character. if path.startswith(":") and not has_scheme: - raise InvalidURL( - "URLs with no scheme component cannot have a path starting with ':'" - ) + raise InvalidURL("URLs with no scheme component cannot have a path starting with ':'") def normalize_path(path: str) -> str: @@ -431,5 +400,8 @@ def quote(string: str, safe: str = "/") -> str: NON_ESCAPED_CHARS += "%" return "".join( - [char if char in NON_ESCAPED_CHARS else percent_encode(char) for char in string] + [ + char if char in NON_ESCAPED_CHARS else percent_encode(char) + for char in string + ] ) diff --git a/httpx/_urls.py b/httpx/_urls.py index b49e10763d..b111855d36 100644 --- a/httpx/_urls.py +++ b/httpx/_urls.py @@ -100,9 +100,6 @@ def __init__( if isinstance(value, bytes): kwargs[key] = value.decode("ascii") - if "raw_path" in kwargs: - kwargs["full_path"] = kwargs.pop("raw_path") - if "params" in kwargs: # Replace any "params" keyword with the raw "query" instead. # From 1b4801d7784b79bcb71932427bd49225d3a627ec Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 1 Jun 2022 14:46:17 +0100 Subject: [PATCH 14/18] Linting fixes --- httpx/_urlparse.py | 96 ++++++++++++++++++++++++++++++---------------- 1 file changed, 62 insertions(+), 34 deletions(-) diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index 7664087885..e16e812391 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -26,7 +26,9 @@ MAX_URL_LENGTH = 65536 # https://datatracker.ietf.org/doc/html/rfc3986.html#section-2.3 -UNRESERVED_CHARACTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~" +UNRESERVED_CHARACTERS = ( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~" +) SUB_DELIMS = "!$&'()*+,;=" PERCENT_ENCODED_REGEX = re.compile("%[A-Fa-f0-9]{2}") @@ -60,10 +62,10 @@ ( r"(?:(?P{userinfo})@)?" r"(?P{host})" r":?(?P{port})?" ).format( - userinfo="[^@]*", # Any character sequence not including '@'. + userinfo="[^@]*", # Any character sequence not including '@'. host="(\\[.*\\]|[^:]*)", # Either any character sequence not including ':', - # or an IPv6 address enclosed within square brackets. - port=".*" # Any character sequence. + # or an IPv6 address enclosed within square brackets. + port=".*", # Any character sequence. ) ) @@ -79,7 +81,7 @@ "fragment": re.compile(".*"), "userinfo": re.compile("[^@]*"), "host": re.compile("(\\[.*\\]|[^:]*)"), - "port": re.compile(".*") + "port": re.compile(".*"), } @@ -100,18 +102,22 @@ class ParseResult(typing.NamedTuple): @property def authority(self) -> str: - return "".join([ - f"{self.userinfo}@" if self.userinfo else "", - f"[{self.host}]" if ":" in self.host else self.host, - f":{self.port}" if self.port is not None else "" - ]) + return "".join( + [ + f"{self.userinfo}@" if self.userinfo else "", + f"[{self.host}]" if ":" in self.host else self.host, + f":{self.port}" if self.port is not None else "", + ] + ) @property def netloc(self) -> str: - return "".join([ - f"[{self.host}]" if ":" in self.host else self.host, - f":{self.port}" if self.port is not None else "" - ]) + return "".join( + [ + f"[{self.host}]" if ":" in self.host else self.host, + f":{self.port}" if self.port is not None else "", + ] + ) def copy_with(self, **kwargs: typing.Optional[str]) -> "ParseResult": if not kwargs: @@ -122,20 +128,22 @@ def copy_with(self, **kwargs: typing.Optional[str]) -> "ParseResult": "authority": self.authority, "path": self.path, "query": self.query, - "fragment": self.fragment + "fragment": self.fragment, } defaults.update(kwargs) return urlparse("", **defaults) def __str__(self) -> str: authority = self.authority - return "".join([ - f"{self.scheme}:" if self.scheme else "", - f"//{authority}" if authority else "", - self.path, - f"?{self.query}" if self.query is not None else "", - f"#{self.fragment}" if self.fragment is not None else "", - ]) + return "".join( + [ + f"{self.scheme}:" if self.scheme else "", + f"//{authority}" if authority else "", + self.path, + f"?{self.query}" if self.query is not None else "", + f"#{self.fragment}" if self.fragment is not None else "", + ] + ) def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: @@ -187,7 +195,16 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: # ------------------------------------------------------------- for key, value in kwargs.items(): - if key not in ("scheme", "authority", "path", "query", "fragment", "userinfo", "host", "port"): + if key not in ( + "scheme", + "authority", + "path", + "query", + "fragment", + "userinfo", + "host", + "port", + ): raise TypeError(f"'{key}' is an invalid keyword argument for urlparse()") if value is not None: @@ -197,7 +214,9 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: # If a component includes any ASCII control characters including \t, \r, \n, # then treat it as invalid. if any(char.isascii() and not char.isprintable() for char in value): - raise InvalidURL(f"Invalid non-printable ASCII character in URL component '{key}'") + raise InvalidURL( + f"Invalid non-printable ASCII character in URL component '{key}'" + ) # Ensure that keyword arguments match as a valid regex. if not COMPONENT_REGEX[key].fullmatch(value): @@ -239,14 +258,20 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: parsed_port: typing.Optional[int] = normalize_port(port, scheme) has_scheme = parsed_scheme != "" - has_authority = parsed_userinfo != "" or parsed_host != "" or parsed_port is not None + has_authority = ( + parsed_userinfo != "" or parsed_host != "" or parsed_port is not None + ) validate_path(path, has_scheme=has_scheme, has_authority=has_authority) if has_authority: path = normalize_path(path) parsed_path: str = quote(path, safe=SUB_DELIMS + ":@/") - parsed_query: typing.Optional[str] = None if query is None else quote(query, safe=SUB_DELIMS + "/?") - parsed_fragment: typing.Optional[str] = None if fragment is None else quote(fragment, safe=SUB_DELIMS + "/?") + parsed_query: typing.Optional[str] = ( + None if query is None else quote(query, safe=SUB_DELIMS + "/?") + ) + parsed_fragment: typing.Optional[str] = ( + None if fragment is None else quote(fragment, safe=SUB_DELIMS + "/?") + ) # The parsed ASCII bytestrings are our canonical form. # All properties of the URL are derived from these. @@ -328,7 +353,9 @@ def normalize_port( raise InvalidURL("Invalid port") # See https://url.spec.whatwg.org/#url-miscellaneous - default_port = {"ftp": 21, "http": 80, "https": 443, "ws": 80, "wss": 443}.get(scheme) + default_port = {"ftp": 21, "http": 80, "https": 443, "ws": 80, "wss": 443}.get( + scheme + ) if port_as_int == default_port: return None return port_as_int @@ -349,11 +376,15 @@ def validate_path(path: str, has_scheme: bool, has_authority: bool) -> None: # > If a URI does not contain an authority component, then the path cannot begin # > with two slash characters ("//"). if path.startswith("//"): - raise InvalidURL("URLs with no authority component cannot have a path starting with '//'") + raise InvalidURL( + "URLs with no authority component cannot have a path starting with '//'" + ) # > In addition, a URI reference (Section 4.1) may be a relative-path reference, in which # > case the first path segment cannot contain a colon (":") character. if path.startswith(":") and not has_scheme: - raise InvalidURL("URLs with no scheme component cannot have a path starting with ':'") + raise InvalidURL( + "URLs with no scheme component cannot have a path starting with ':'" + ) def normalize_path(path: str) -> str: @@ -400,8 +431,5 @@ def quote(string: str, safe: str = "/") -> str: NON_ESCAPED_CHARS += "%" return "".join( - [ - char if char in NON_ESCAPED_CHARS else percent_encode(char) - for char in string - ] + [char if char in NON_ESCAPED_CHARS else percent_encode(char) for char in string] ) From 2e0ec53349e7650fe66334d82fa176f19db22cb5 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 1 Jun 2022 14:48:55 +0100 Subject: [PATCH 15/18] Drop rfc3986 dependency --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 52bedbab97..ed34093463 100644 --- a/setup.py +++ b/setup.py @@ -58,7 +58,7 @@ def get_packages(package): install_requires=[ "certifi", "sniffio", - "rfc3986[idna2008]>=1.3,<2", + "idna", "httpcore>=0.15.0,<0.16.0", ], extras_require={ From f3d596b574b0ff87532d23ff71e3f9568031eccd Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 1 Jun 2022 14:55:25 +0100 Subject: [PATCH 16/18] Add test for #1833 --- tests/models/test_url.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/models/test_url.py b/tests/models/test_url.py index 8a5d6f496a..25f8fd9481 100644 --- a/tests/models/test_url.py +++ b/tests/models/test_url.py @@ -388,3 +388,11 @@ def test_ipv6_url_from_raw_url(host): assert url.host == "::ffff:192.168.0.1" assert url.netloc == b"[::ffff:192.168.0.1]" assert str(url) == "https://[::ffff:192.168.0.1]/" + + +def test_resolution_error_1833(): + """ + See https://github.com/encode/httpx/issues/1833 + """ + url = httpx.URL("https://example.com/?[]") + assert url.join("/") == "https://example.com/" From 6dd270fd685dcc80340eeb41a114aab30fb3277b Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Thu, 8 Dec 2022 14:11:57 +0000 Subject: [PATCH 17/18] Linting --- httpx/_urls.py | 1 + 1 file changed, 1 insertion(+) diff --git a/httpx/_urls.py b/httpx/_urls.py index adb028cce5..1bcbc8b29a 100644 --- a/httpx/_urls.py +++ b/httpx/_urls.py @@ -67,6 +67,7 @@ class URL: * `url.query` is raw bytes, without URL escaping. A URL query string portion can only be properly URL escaped when decoding the parameter names and values themselves. """ + def __init__( self, url: typing.Union["URL", str] = "", **kwargs: typing.Any ) -> None: From ed1c5e0ebe00546b9bc6f0d44c90a63daa96e804 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Fri, 30 Dec 2022 10:09:26 +0000 Subject: [PATCH 18/18] Drop 'rfc3986' dependancy from README and docs homepage --- README.md | 3 +-- docs/index.md | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 520e85c360..4d25491a6a 100644 --- a/README.md +++ b/README.md @@ -128,8 +128,7 @@ The HTTPX project relies on these excellent libraries: * `httpcore` - The underlying transport implementation for `httpx`. * `h11` - HTTP/1.1 support. * `certifi` - SSL certificates. -* `rfc3986` - URL parsing & normalization. - * `idna` - Internationalized domain name support. +* `idna` - Internationalized domain name support. * `sniffio` - Async library autodetection. As well as these optional installs: diff --git a/docs/index.md b/docs/index.md index ec16ce7d1a..cd25ee6ca5 100644 --- a/docs/index.md +++ b/docs/index.md @@ -109,8 +109,7 @@ The HTTPX project relies on these excellent libraries: * `httpcore` - The underlying transport implementation for `httpx`. * `h11` - HTTP/1.1 support. * `certifi` - SSL certificates. -* `rfc3986` - URL parsing & normalization. - * `idna` - Internationalized domain name support. +* `idna` - Internationalized domain name support. * `sniffio` - Async library autodetection. As well as these optional installs: