From d5c12ba890557a575c313bb3017910d7616fce3d Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Fri, 6 Oct 2023 17:11:40 +0100 Subject: [PATCH] [PR #7661/85713a48 backport][3.8] Update Python parser for RFCs 9110/9112 (#7662) **This is a backport of PR #7661 as merged into 3.9 (85713a4894610e848490915e5871ad71199348e2).** None Co-authored-by: Sam Bull --- aiohttp/http_parser.py | 84 +++++++++++++++++++++---------------- tests/test_http_parser.py | 87 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 131 insertions(+), 40 deletions(-) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 982570396c6..4356670c3ed 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -60,16 +60,16 @@ ASCIISET: Final[Set[str]] = set(string.printable) -# See https://tools.ietf.org/html/rfc7230#section-3.1.1 -# and https://tools.ietf.org/html/rfc7230#appendix-B +# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview +# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens # # method = token # tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / # "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA # token = 1*tchar METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+") -VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d+).(\d+)") -HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]") +VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)") +HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\]") class RawRequestMessage(NamedTuple): @@ -148,8 +148,11 @@ def parse_headers( except ValueError: raise InvalidHeader(line) from None - bname = bname.strip(b" \t") - bvalue = bvalue.lstrip() + # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2 + if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"} + raise InvalidHeader(line) + + bvalue = bvalue.lstrip(b" \t") if HDRRE.search(bname): raise InvalidHeader(bname) if len(bname) > self.max_field_size: @@ -170,6 +173,7 @@ def parse_headers( # consume continuation lines continuation = line and line[0] in (32, 9) # (' ', '\t') + # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding if continuation: bvalue_lst = [bvalue] while continuation: @@ -204,10 +208,14 @@ def parse_headers( str(header_length), ) - bvalue = bvalue.strip() + bvalue = bvalue.strip(b" \t") name = bname.decode("utf-8", "surrogateescape") value = bvalue.decode("utf-8", "surrogateescape") + # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5 + if "\n" in value or "\r" in value or "\x00" in value: + raise InvalidHeader(bvalue) + headers.add(name, value) raw_headers.append((bname, bvalue)) @@ -322,15 +330,12 @@ def get_content_length() -> Optional[int]: if length_hdr is None: return None - try: - length = int(length_hdr) - except ValueError: + # Shouldn't allow +/- or other number formats. + # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2 + if not length_hdr.strip(" \t").isdigit(): raise InvalidHeader(CONTENT_LENGTH) - if length < 0: - raise InvalidHeader(CONTENT_LENGTH) - - return length + return int(length_hdr) length = get_content_length() # do not support old websocket spec @@ -470,6 +475,24 @@ def parse_headers( upgrade = False chunked = False + # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6 + # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf + singletons = ( + hdrs.CONTENT_LENGTH, + hdrs.CONTENT_LOCATION, + hdrs.CONTENT_RANGE, + hdrs.CONTENT_TYPE, + hdrs.ETAG, + hdrs.HOST, + hdrs.MAX_FORWARDS, + hdrs.SERVER, + hdrs.TRANSFER_ENCODING, + hdrs.USER_AGENT, + ) + bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None) + if bad_hdr is not None: + raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.") + # keep-alive conn = headers.get(hdrs.CONNECTION) if conn: @@ -523,7 +546,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage: # request line line = lines[0].decode("utf-8", "surrogateescape") try: - method, path, version = line.split(None, 2) + method, path, version = line.split(maxsplit=2) except ValueError: raise BadStatusLine(line) from None @@ -537,14 +560,10 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage: raise BadStatusLine(method) # version - try: - if version.startswith("HTTP/"): - n1, n2 = version[5:].split(".", 1) - version_o = HttpVersion(int(n1), int(n2)) - else: - raise BadStatusLine(version) - except Exception: - raise BadStatusLine(version) + match = VERSRE.match(version) + if match is None: + raise BadStatusLine(line) + version_o = HttpVersion(int(match.group(1)), int(match.group(2))) if method == "CONNECT": # authority-form, @@ -611,12 +630,12 @@ class HttpResponseParser(HttpParser[RawResponseMessage]): def parse_message(self, lines: List[bytes]) -> RawResponseMessage: line = lines[0].decode("utf-8", "surrogateescape") try: - version, status = line.split(None, 1) + version, status = line.split(maxsplit=1) except ValueError: raise BadStatusLine(line) from None try: - status, reason = status.split(None, 1) + status, reason = status.split(maxsplit=1) except ValueError: reason = "" @@ -632,13 +651,9 @@ def parse_message(self, lines: List[bytes]) -> RawResponseMessage: version_o = HttpVersion(int(match.group(1)), int(match.group(2))) # The status code is a three-digit number - try: - status_i = int(status) - except ValueError: - raise BadStatusLine(line) from None - - if status_i > 999: + if len(status) != 3 or not status.isdigit(): raise BadStatusLine(line) + status_i = int(status) # read headers ( @@ -773,14 +788,13 @@ def feed_data( else: size_b = chunk[:pos] - try: - size = int(bytes(size_b), 16) - except ValueError: + if not size_b.isdigit(): exc = TransferEncodingError( chunk[:pos].decode("ascii", "surrogateescape") ) self.payload.set_exception(exc) - raise exc from None + raise exc + size = int(bytes(size_b), 16) chunk = chunk[pos + 2 :] if size == 0: # eof marker diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 6c2067c6ec2..b002fed43f2 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -465,6 +465,74 @@ def test_invalid_name(parser) -> None: parser.feed_data(text) +def test_cve_2023_37276(parser: Any) -> None: + text = b"""POST / HTTP/1.1\r\nHost: localhost:8080\r\nX-Abc: \rxTransfer-Encoding: chunked\r\n\r\n""" + with pytest.raises(http_exceptions.BadHttpMessage): + parser.feed_data(text) + + +@pytest.mark.parametrize( + "hdr", + ( + "Content-Length: -5", # https://www.rfc-editor.org/rfc/rfc9110.html#name-content-length + "Content-Length: +256", + "Foo: abc\rdef", # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5 + "Bar: abc\ndef", + "Baz: abc\x00def", + "Foo : bar", # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2 + "Foo\t: bar", + ), +) +def test_bad_headers(parser: Any, hdr: str) -> None: + text = f"POST / HTTP/1.1\r\n{hdr}\r\n\r\n".encode() + with pytest.raises(http_exceptions.InvalidHeader): + parser.feed_data(text) + + +def test_bad_chunked_py(loop: Any, protocol: Any) -> None: + """Test that invalid chunked encoding doesn't allow content-length to be used.""" + parser = HttpRequestParserPy( + protocol, + loop, + 2**16, + max_line_size=8190, + max_field_size=8190, + ) + text = ( + b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n" + + b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n" + ) + messages, upgrade, tail = parser.feed_data(text) + assert isinstance(messages[0][1].exception(), http_exceptions.TransferEncodingError) + + +@pytest.mark.skipif( + "HttpRequestParserC" not in dir(aiohttp.http_parser), + reason="C based HTTP parser not available", +) +def test_bad_chunked_c(loop: Any, protocol: Any) -> None: + """C parser behaves differently. Maybe we should align them later.""" + parser = HttpRequestParserC( + protocol, + loop, + 2**16, + max_line_size=8190, + max_field_size=8190, + ) + text = ( + b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n" + + b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n" + ) + with pytest.raises(http_exceptions.BadHttpMessage): + parser.feed_data(text) + + +def test_whitespace_before_header(parser: Any) -> None: + text = b"GET / HTTP/1.1\r\n\tContent-Length: 1\r\n\r\nX" + with pytest.raises(http_exceptions.BadHttpMessage): + parser.feed_data(text) + + @pytest.mark.parametrize("size", [40960, 8191]) def test_max_header_field_size(parser, size) -> None: name = b"t" * size @@ -646,6 +714,11 @@ def test_http_request_parser_bad_version(parser) -> None: parser.feed_data(b"GET //get HT/11\r\n\r\n") +def test_http_request_parser_bad_version_number(parser: Any) -> None: + with pytest.raises(http_exceptions.BadHttpMessage): + parser.feed_data(b"GET /test HTTP/12.3\r\n\r\n") + + @pytest.mark.parametrize("size", [40965, 8191]) def test_http_request_max_status_line(parser, size) -> None: path = b"t" * (size - 5) @@ -713,6 +786,11 @@ def test_http_response_parser_bad_version(response) -> None: response.feed_data(b"HT/11 200 Ok\r\n\r\n") +def test_http_response_parser_bad_version_number(response) -> None: + with pytest.raises(http_exceptions.BadHttpMessage): + response.feed_data(b"HTTP/12.3 200 Ok\r\n\r\n") + + def test_http_response_parser_no_reason(response) -> None: msg = response.feed_data(b"HTTP/1.1 200\r\n\r\n")[0][0][0] @@ -743,19 +821,18 @@ def test_http_response_parser_bad(response) -> None: response.feed_data(b"HTT/1\r\n\r\n") -@pytest.mark.skipif(not NO_EXTENSIONS, reason="Behaviour has changed in C parser") def test_http_response_parser_code_under_100(response) -> None: - msg = response.feed_data(b"HTTP/1.1 99 test\r\n\r\n")[0][0][0] - assert msg.code == 99 + with pytest.raises(http_exceptions.BadStatusLine): + response.feed_data(b"HTTP/1.1 99 test\r\n\r\n") def test_http_response_parser_code_above_999(response) -> None: - with pytest.raises(http_exceptions.BadHttpMessage): + with pytest.raises(http_exceptions.BadStatusLine): response.feed_data(b"HTTP/1.1 9999 test\r\n\r\n") def test_http_response_parser_code_not_int(response) -> None: - with pytest.raises(http_exceptions.BadHttpMessage): + with pytest.raises(http_exceptions.BadStatusLine): response.feed_data(b"HTTP/1.1 ttt test\r\n\r\n")