Skip to content

Commit

Permalink
Perform port normalization for http, https, ws, wss, and ftp schemes (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
tomchristie committed Apr 27, 2021
1 parent c927f3e commit 0a8b44e
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 17 deletions.
47 changes: 36 additions & 11 deletions httpx/_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@ class URL:
url = httpx.URL("http://xn--fiqs8s.icom.museum")
assert url.raw_host == b"xn--fiqs8s.icom.museum"
* `url.port` is either None or an integer. URLs that include the default port for
"http", "https", "ws", "wss", and "ftp" schemes have their port normalized to `None`.
assert httpx.URL("http://example.com") == httpx.URL("http://example.com:80")
assert httpx.URL("http://example.com").port is None
assert httpx.URL("http://example.com:80").port is None
* `url.userinfo` is raw bytes, without URL escaping. Usually you'll want to work with
`url.username` and `url.password` instead, which handle the URL escaping.
Expand Down Expand Up @@ -144,6 +151,24 @@ def __init__(
f"Invalid type for url. Expected str or httpx.URL, got {type(url)}: {url!r}"
)

# Perform port normalization, following the WHATWG spec for default ports.
#
# See:
# * https://tools.ietf.org/html/rfc3986#section-3.2.3
# * https://url.spec.whatwg.org/#url-miscellaneous
# * https://url.spec.whatwg.org/#scheme-state
default_port = {
"ftp": ":21",
"http": ":80",
"https": ":443",
"ws": ":80",
"wss": ":443",
}.get(self._uri_reference.scheme, "")
authority = self._uri_reference.authority or ""
if default_port and authority.endswith(default_port):
authority = authority[: -len(default_port)]
self._uri_reference = self._uri_reference.copy_with(authority=authority)

if kwargs:
self._uri_reference = self.copy_with(**kwargs)._uri_reference

Expand Down Expand Up @@ -253,6 +278,15 @@ def raw_host(self) -> bytes:
def port(self) -> typing.Optional[int]:
"""
The URL port as an integer.
Note that the URL class performs port normalization as per the WHATWG spec.
Default ports for "http", "https", "ws", "wss", and "ftp" schemes are always
treated as `None`.
For example:
assert httpx.URL("http://www.example.com") == httpx.URL("http://www.example.com:80")
assert httpx.URL("http://www.example.com:80").port is None
"""
port = self._uri_reference.port
return int(port) if port else None
Expand All @@ -263,13 +297,8 @@ def netloc(self) -> bytes:
Either `<host>` or `<host>:<port>` as bytes.
Always normalized to lowercase, and IDNA encoded.
The port component is not included if it is the default for an
"http://" or "https://" URL.
This property may be used for generating the value of a request
"Host" header.
See: https://tools.ietf.org/html/rfc3986#section-3.2.3
"""
host = self._uri_reference.host or ""
port = self._uri_reference.port
Expand Down Expand Up @@ -547,7 +576,7 @@ def __hash__(self) -> int:
return hash(str(self))

def __eq__(self, other: typing.Any) -> bool:
return isinstance(other, (URL, str)) and str(self) == str(other)
return isinstance(other, (URL, str)) and str(self) == str(URL(other))

def __str__(self) -> str:
return self._uri_reference.unsplit()
Expand Down Expand Up @@ -1099,11 +1128,7 @@ def _prepare(self, default_headers: typing.Dict[str, str]) -> None:
)

if not has_host and self.url.host:
default_port = {"http": b":80", "https": b":443"}.get(self.url.scheme, b"")
host_header = self.url.netloc
if host_header.endswith(default_port):
host_header = host_header[: -len(default_port)]
auto_headers.append((b"Host", host_header))
auto_headers.append((b"Host", self.url.netloc))
if not has_content_length and self.method in ("POST", "PUT", "PATCH"):
auto_headers.append((b"Content-Length", b"0"))

Expand Down
3 changes: 1 addition & 2 deletions tests/client/test_proxies.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,8 @@ def test_proxies_parameter(proxies, expected_proxies):
("http://example.com", {"all://": PROXY_URL, "http://example.com": None}, None),
("http://example.com", {"http://": PROXY_URL}, PROXY_URL),
("http://example.com", {"all://example.com": PROXY_URL}, PROXY_URL),
("http://example.com", {"all://example.com:80": PROXY_URL}, None),
("http://example.com", {"http://example.com": PROXY_URL}, PROXY_URL),
("http://example.com", {"http://example.com:80": PROXY_URL}, None),
("http://example.com", {"http://example.com:80": PROXY_URL}, PROXY_URL),
("http://example.com:8080", {"http://example.com:8080": PROXY_URL}, PROXY_URL),
("http://example.com:8080", {"http://example.com": PROXY_URL}, PROXY_URL),
(
Expand Down
8 changes: 4 additions & 4 deletions tests/models/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"中国.icom.museum",
b"xn--fiqs8s.icom.museum",
"http",
80,
None,
),
(
"http://Königsgäßchen.de",
Expand All @@ -36,7 +36,7 @@
"βόλος.com",
b"xn--nxasmm1c.com",
"https",
443,
None,
),
(
"http://ශ්‍රී.com:444",
Expand Down Expand Up @@ -374,5 +374,5 @@ def test_ipv6_url_from_raw_url(host):
url = httpx.URL(raw_url)

assert url.host == "::ffff:192.168.0.1"
assert url.netloc == b"[::ffff:192.168.0.1]:443"
assert str(url) == "https://[::ffff:192.168.0.1]:443/"
assert url.netloc == b"[::ffff:192.168.0.1]"
assert str(url) == "https://[::ffff:192.168.0.1]/"

0 comments on commit 0a8b44e

Please sign in to comment.