From 0a8b44e67d470239f9659b6c3127af990303491f Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Tue, 27 Apr 2021 14:06:23 +0100 Subject: [PATCH] Perform port normalization for http, https, ws, wss, and ftp schemes (#1603) --- httpx/_models.py | 47 +++++++++++++++++++++++++++--------- tests/client/test_proxies.py | 3 +-- tests/models/test_url.py | 8 +++--- 3 files changed, 41 insertions(+), 17 deletions(-) diff --git a/httpx/_models.py b/httpx/_models.py index fa8c266027..a6157a8728 100644 --- a/httpx/_models.py +++ b/httpx/_models.py @@ -100,6 +100,13 @@ class URL: url = httpx.URL("http://xn--fiqs8s.icom.museum") assert url.raw_host == b"xn--fiqs8s.icom.museum" + * `url.port` is either None or an integer. URLs that include the default port for + "http", "https", "ws", "wss", and "ftp" schemes have their port normalized to `None`. + + assert httpx.URL("http://example.com") == httpx.URL("http://example.com:80") + assert httpx.URL("http://example.com").port is None + assert httpx.URL("http://example.com:80").port is None + * `url.userinfo` is raw bytes, without URL escaping. Usually you'll want to work with `url.username` and `url.password` instead, which handle the URL escaping. @@ -144,6 +151,24 @@ def __init__( f"Invalid type for url. Expected str or httpx.URL, got {type(url)}: {url!r}" ) + # Perform port normalization, following the WHATWG spec for default ports. + # + # See: + # * https://tools.ietf.org/html/rfc3986#section-3.2.3 + # * https://url.spec.whatwg.org/#url-miscellaneous + # * https://url.spec.whatwg.org/#scheme-state + default_port = { + "ftp": ":21", + "http": ":80", + "https": ":443", + "ws": ":80", + "wss": ":443", + }.get(self._uri_reference.scheme, "") + authority = self._uri_reference.authority or "" + if default_port and authority.endswith(default_port): + authority = authority[: -len(default_port)] + self._uri_reference = self._uri_reference.copy_with(authority=authority) + if kwargs: self._uri_reference = self.copy_with(**kwargs)._uri_reference @@ -253,6 +278,15 @@ def raw_host(self) -> bytes: def port(self) -> typing.Optional[int]: """ The URL port as an integer. + + Note that the URL class performs port normalization as per the WHATWG spec. + Default ports for "http", "https", "ws", "wss", and "ftp" schemes are always + treated as `None`. + + For example: + + assert httpx.URL("http://www.example.com") == httpx.URL("http://www.example.com:80") + assert httpx.URL("http://www.example.com:80").port is None """ port = self._uri_reference.port return int(port) if port else None @@ -263,13 +297,8 @@ def netloc(self) -> bytes: Either `` or `:` as bytes. Always normalized to lowercase, and IDNA encoded. - The port component is not included if it is the default for an - "http://" or "https://" URL. - This property may be used for generating the value of a request "Host" header. - - See: https://tools.ietf.org/html/rfc3986#section-3.2.3 """ host = self._uri_reference.host or "" port = self._uri_reference.port @@ -547,7 +576,7 @@ def __hash__(self) -> int: return hash(str(self)) def __eq__(self, other: typing.Any) -> bool: - return isinstance(other, (URL, str)) and str(self) == str(other) + return isinstance(other, (URL, str)) and str(self) == str(URL(other)) def __str__(self) -> str: return self._uri_reference.unsplit() @@ -1099,11 +1128,7 @@ def _prepare(self, default_headers: typing.Dict[str, str]) -> None: ) if not has_host and self.url.host: - default_port = {"http": b":80", "https": b":443"}.get(self.url.scheme, b"") - host_header = self.url.netloc - if host_header.endswith(default_port): - host_header = host_header[: -len(default_port)] - auto_headers.append((b"Host", host_header)) + auto_headers.append((b"Host", self.url.netloc)) if not has_content_length and self.method in ("POST", "PUT", "PATCH"): auto_headers.append((b"Content-Length", b"0")) diff --git a/tests/client/test_proxies.py b/tests/client/test_proxies.py index d4919031a4..6ea4cbe407 100644 --- a/tests/client/test_proxies.py +++ b/tests/client/test_proxies.py @@ -79,9 +79,8 @@ def test_proxies_parameter(proxies, expected_proxies): ("http://example.com", {"all://": PROXY_URL, "http://example.com": None}, None), ("http://example.com", {"http://": PROXY_URL}, PROXY_URL), ("http://example.com", {"all://example.com": PROXY_URL}, PROXY_URL), - ("http://example.com", {"all://example.com:80": PROXY_URL}, None), ("http://example.com", {"http://example.com": PROXY_URL}, PROXY_URL), - ("http://example.com", {"http://example.com:80": PROXY_URL}, None), + ("http://example.com", {"http://example.com:80": PROXY_URL}, PROXY_URL), ("http://example.com:8080", {"http://example.com:8080": PROXY_URL}, PROXY_URL), ("http://example.com:8080", {"http://example.com": PROXY_URL}, PROXY_URL), ( diff --git a/tests/models/test_url.py b/tests/models/test_url.py index c28d070f88..cd099bd931 100644 --- a/tests/models/test_url.py +++ b/tests/models/test_url.py @@ -12,7 +12,7 @@ "中国.icom.museum", b"xn--fiqs8s.icom.museum", "http", - 80, + None, ), ( "http://Königsgäßchen.de", @@ -36,7 +36,7 @@ "βόλος.com", b"xn--nxasmm1c.com", "https", - 443, + None, ), ( "http://ශ්‍රී.com:444", @@ -374,5 +374,5 @@ def test_ipv6_url_from_raw_url(host): url = httpx.URL(raw_url) assert url.host == "::ffff:192.168.0.1" - assert url.netloc == b"[::ffff:192.168.0.1]:443" - assert str(url) == "https://[::ffff:192.168.0.1]:443/" + assert url.netloc == b"[::ffff:192.168.0.1]" + assert str(url) == "https://[::ffff:192.168.0.1]/"