From 5e6781527eb05c27a7217a6f92aad0d630061ef5 Mon Sep 17 00:00:00 2001
From: Illia Volochii <illia.volochii@gmail.com>
Date: Tue, 7 Mar 2023 21:02:32 +0200
Subject: [PATCH 01/10] gh-102153: Start stripping C0 control and space chars
 in `urlsplit`

---
 Doc/library/urllib.parse.rst                  |  9 ++++-
 Lib/test/test_urlparse.py                     | 40 ++++++++++++++++++-
 Lib/urllib/parse.py                           |  5 +++
 ...-03-07-20-59-17.gh-issue-102153.14CLSZ.rst |  3 ++
 4 files changed, 54 insertions(+), 3 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst

diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index 96b396510794b4..35b329a58329de 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -324,8 +324,9 @@ or on combining URL components into a URL string.
    ``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
    decomposed before parsing, no error will be raised.
 
-   Following the `WHATWG spec`_ that updates RFC 3986, ASCII newline
-   ``\n``, ``\r`` and tab ``\t`` characters are stripped from the URL.
+   Following the `WHATWG spec`_ that updates RFC 3986, leading and trailing C0
+   control and space characters are stripped from the URL. ``\n``, ``\r`` and
+   tab ``\t`` characters are removed from the URL at any position.
 
    .. versionchanged:: 3.6
       Out-of-range port numbers now raise :exc:`ValueError`, instead of
@@ -338,6 +339,10 @@ or on combining URL components into a URL string.
    .. versionchanged:: 3.10
       ASCII newline and tab characters are stripped from the URL.
 
+   .. versionchanged:: 3.12
+      Leading and trailing C0 control and space characters are stripped from
+      the URL
+
 .. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser
 
 .. function:: urlunsplit(parts)
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 80fb9e5cd2a445..c522f75fb581b9 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -649,6 +649,44 @@ def test_urlsplit_remove_unsafe_bytes(self):
             self.assertEqual(p.scheme, "http")
             self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
 
+    def test_urlsplit_strip_url(self):
+        noise = bytes([*range(0, 0x1f), 0x20])
+        base_url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
+
+        url = noise.decode() + base_url + noise.decode()
+        p = urllib.parse.urlsplit(url)
+        self.assertEqual(p.scheme, "http")
+        self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
+        self.assertEqual(p.path, "/doc/")
+        self.assertEqual(p.query, "query=yes")
+        self.assertEqual(p.fragment, "frag")
+        self.assertEqual(p.username, "User")
+        self.assertEqual(p.password, "Pass")
+        self.assertEqual(p.hostname, "www.python.org")
+        self.assertEqual(p.port, 80)
+        self.assertEqual(p.geturl(), base_url)
+
+        url = noise + base_url.encode() + noise
+        p = urllib.parse.urlsplit(url)
+        self.assertEqual(p.scheme, b"http")
+        self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
+        self.assertEqual(p.path, b"/doc/")
+        self.assertEqual(p.query, b"query=yes")
+        self.assertEqual(p.fragment, b"frag")
+        self.assertEqual(p.username, b"User")
+        self.assertEqual(p.password, b"Pass")
+        self.assertEqual(p.hostname, b"www.python.org")
+        self.assertEqual(p.port, 80)
+        self.assertEqual(p.geturl(), base_url.encode())
+
+        # with scheme as cache-key
+        url = "//www.python.org/"
+        scheme = noise.decode() + "https" + noise.decode()
+        for _ in range(2):
+            p = urllib.parse.urlsplit(url, scheme=scheme)
+            self.assertEqual(p.scheme, "https")
+            self.assertEqual(p.geturl(), "https://www.python.org/")
+
     def test_attributes_bad_port(self):
         """Check handling of invalid ports."""
         for bytes in (False, True):
@@ -656,7 +694,7 @@ def test_attributes_bad_port(self):
                 for port in ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 ", "६"):
                     with self.subTest(bytes=bytes, parse=parse, port=port):
                         netloc = "www.example.net:" + port
-                        url = "http://" + netloc
+                        url = "http://" + netloc + "/"
                         if bytes:
                             if netloc.isascii() and port.isascii():
                                 netloc = netloc.encode("ascii")
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 5f95c5ff7f9c1c..fb4b57bdba4a99 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -79,6 +79,9 @@
                 '0123456789'
                 '+-.')
 
+# Leading and trailing C0 control and space to be stripped per WHATWG spec
+_URL_CHARS_TO_STRIP = "".join([*(chr(i) for i in range(0, 0x1f + 1)), " "])
+
 # Unsafe bytes to be removed per WHATWG spec
 _UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']
 
@@ -452,6 +455,8 @@ def urlsplit(url, scheme='', allow_fragments=True):
     """
 
     url, scheme, _coerce_result = _coerce_args(url, scheme)
+    url = url.strip(_URL_CHARS_TO_STRIP)
+    scheme = scheme.strip(_URL_CHARS_TO_STRIP)
 
     for b in _UNSAFE_URL_BYTES_TO_REMOVE:
         url = url.replace(b, "")
diff --git a/Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst b/Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst
new file mode 100644
index 00000000000000..97652398a0fd70
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst
@@ -0,0 +1,3 @@
+:func:`urllib.parse.urlsplit` now strips leading and trailing C0 control and
+space characters following the controlling specification for URLs defined by
+WHATWG in response to CVE-2023-24329. Patch by Illia Volochii.

From 84231baf9edef385b5f9121f61dcc916d0c0d3ef Mon Sep 17 00:00:00 2001
From: Illia Volochii <illia.volochii@gmail.com>
Date: Tue, 7 Mar 2023 21:25:33 +0200
Subject: [PATCH 02/10] Add a period

---
 Doc/library/urllib.parse.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index 35b329a58329de..4d009e73461d60 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -341,7 +341,7 @@ or on combining URL components into a URL string.
 
    .. versionchanged:: 3.12
       Leading and trailing C0 control and space characters are stripped from
-      the URL
+      the URL.
 
 .. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser
 

From 716e1c278bd3f2bd9931d7607ee6dc5da13fded6 Mon Sep 17 00:00:00 2001
From: Illia Volochii <illia.volochii@gmail.com>
Date: Wed, 8 Mar 2023 14:15:39 +0200
Subject: [PATCH 03/10] Simplify code

---
 Lib/test/test_urlparse.py | 2 +-
 Lib/urllib/parse.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index c522f75fb581b9..60462059dad5ad 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -650,7 +650,7 @@ def test_urlsplit_remove_unsafe_bytes(self):
             self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
 
     def test_urlsplit_strip_url(self):
-        noise = bytes([*range(0, 0x1f), 0x20])
+        noise = bytes(range(0, 0x20 + 1))
         base_url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
 
         url = noise.decode() + base_url + noise.decode()
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index fb4b57bdba4a99..8ed485760d0ba3 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -80,7 +80,7 @@
                 '+-.')
 
 # Leading and trailing C0 control and space to be stripped per WHATWG spec
-_URL_CHARS_TO_STRIP = "".join([*(chr(i) for i in range(0, 0x1f + 1)), " "])
+_URL_CHARS_TO_STRIP = "".join([chr(i) for i in range(0, 0x20 + 1)])
 
 # Unsafe bytes to be removed per WHATWG spec
 _UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']

From a13bf41219b239511267a307703221f7c05514a4 Mon Sep 17 00:00:00 2001
From: "Gregory P. Smith" <greg@krypto.org>
Date: Thu, 27 Apr 2023 15:27:39 -0700
Subject: [PATCH 04/10] Expand the constant instead of computing it at import
 time.

---
 Lib/urllib/parse.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 8ed485760d0ba3..b846dc64a11874 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -80,7 +80,8 @@
                 '+-.')
 
 # Leading and trailing C0 control and space to be stripped per WHATWG spec
-_URL_CHARS_TO_STRIP = "".join([chr(i) for i in range(0, 0x20 + 1)])
+# == "".join([chr(i) for i in range(0, 0x20 + 1)])
+_URL_CHARS_TO_STRIP = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '
 
 # Unsafe bytes to be removed per WHATWG spec
 _UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']

From c863a8107ba2d03c4039fe04b451607f3bc1103f Mon Sep 17 00:00:00 2001
From: "Gregory P. Smith [Google LLC]" <greg@krypto.org>
Date: Mon, 1 May 2023 17:04:54 -0700
Subject: [PATCH 05/10] Only lstrip the URL to avoid breaking applications.

Many existing applications rely (for better or worse) on the trailing spaces
being preserved by this API.  So this moves more conservative and keeps those.
The issue this change is addressing is triggered by leading spaces.

One example library relyong on behavior: Django's URL validator library (at
least in Django 3.2 and earlier; I have not checked later versions).  If
trailing spaces are stripped, its logic that involves urllib.parse for one logic
path within its checks can fail to reject some URLs as invalid.
---
 Doc/library/urllib.parse.rst |  9 ++++-----
 Lib/test/test_urlparse.py    | 21 +++++++++++++++++++++
 Lib/urllib/parse.py          | 14 ++++++++++----
 3 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index 4d009e73461d60..1f482655579a88 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -324,9 +324,9 @@ or on combining URL components into a URL string.
    ``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
    decomposed before parsing, no error will be raised.
 
-   Following the `WHATWG spec`_ that updates RFC 3986, leading and trailing C0
-   control and space characters are stripped from the URL. ``\n``, ``\r`` and
-   tab ``\t`` characters are removed from the URL at any position.
+   Following some of the `WHATWG spec`_ that updates RFC 3986, leading C0
+   control control and space characters are stripped from the URL. ``\n``,
+   ``\r`` and tab ``\t`` characters are removed from the URL at any position.
 
    .. versionchanged:: 3.6
       Out-of-range port numbers now raise :exc:`ValueError`, instead of
@@ -340,8 +340,7 @@ or on combining URL components into a URL string.
       ASCII newline and tab characters are stripped from the URL.
 
    .. versionchanged:: 3.12
-      Leading and trailing C0 control and space characters are stripped from
-      the URL.
+      Leading WHATWG C0 control and space characters are stripped from the URL.
 
 .. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser
 
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 60462059dad5ad..38d8e624b88eca 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -679,6 +679,27 @@ def test_urlsplit_strip_url(self):
         self.assertEqual(p.port, 80)
         self.assertEqual(p.geturl(), base_url.encode())
 
+        # Test that trailing space is preserved as some applications rely on
+        # this within query strings.
+        query_spaces_url = "https://www.python.org:88/doc/?query=    "
+        p = urllib.parse.urlsplit(noise.decode("utf-8") + query_spaces_url)
+        self.assertEqual(p.scheme, "https")
+        self.assertEqual(p.netloc, "www.python.org:88")
+        self.assertEqual(p.path, "/doc/")
+        self.assertEqual(p.query, "query=    ")
+        self.assertEqual(p.port, 88)
+        self.assertEqual(p.geturl(), query_spaces_url)
+
+        p = urllib.parse.urlsplit("www.pypi.org ")
+        # That "hostname" gets considered a "path" due to the
+        # trailing space and our existing logic...  YUCK...
+        # and re-assembles via geturl aka unurlsplit into the original.
+        # django.core.validators.URLValidator (at least through v3.2) relies on
+        # this, for better or worse, to catch it in a ValidationError via its
+        # regular expressions.
+        # Here we test the basic round trip concept of such a trailing space.
+        self.assertEqual(urllib.parse.urlunsplit(p), "www.pypi.org ")
+
         # with scheme as cache-key
         url = "//www.python.org/"
         scheme = noise.decode() + "https" + noise.decode()
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index b846dc64a11874..01953614a314be 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -25,6 +25,10 @@
 scenarios for parsing, and for backward compatibility purposes, some
 parsing quirks from older RFCs are retained. The testcases in
 test_urlparse.py provides a good indicator of parsing behavior.
+
+The WHATWG URL Parser spec should also be considered.  We are not compliant with
+it either due to existing user code API behavior expectations (Hyrum's Law).
+It serves as a useful guide when making changes.
 """
 
 from collections import namedtuple
@@ -79,9 +83,9 @@
                 '0123456789'
                 '+-.')
 
-# Leading and trailing C0 control and space to be stripped per WHATWG spec
+# Leading and trailing C0 control and space to be stripped per WHATWG spec.
 # == "".join([chr(i) for i in range(0, 0x20 + 1)])
-_URL_CHARS_TO_STRIP = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '
+_WHATWG_C0_CONTROL_OR_SPACE = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '
 
 # Unsafe bytes to be removed per WHATWG spec
 _UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']
@@ -456,8 +460,10 @@ def urlsplit(url, scheme='', allow_fragments=True):
     """
 
     url, scheme, _coerce_result = _coerce_args(url, scheme)
-    url = url.strip(_URL_CHARS_TO_STRIP)
-    scheme = scheme.strip(_URL_CHARS_TO_STRIP)
+    # Only lstrip url as some applications rely on preserving trailing space.
+    # (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both)
+    url = url.lstrip(_WHATWG_C0_CONTROL_OR_SPACE)
+    scheme = scheme.strip(_WHATWG_C0_CONTROL_OR_SPACE)
 
     for b in _UNSAFE_URL_BYTES_TO_REMOVE:
         url = url.replace(b, "")

From fd3e429a1c69b9a3141d5668ff9f110cc6f97035 Mon Sep 17 00:00:00 2001
From: "Gregory P. Smith [Google LLC]" <greg@krypto.org>
Date: Mon, 1 May 2023 17:21:35 -0700
Subject: [PATCH 06/10] Fix the unittests to allow trailing C0

Also be explicit about specifying utf-8 on encode and decode.
---
 Lib/test/test_urlparse.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 38d8e624b88eca..61e67b17294432 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -653,7 +653,7 @@ def test_urlsplit_strip_url(self):
         noise = bytes(range(0, 0x20 + 1))
         base_url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
 
-        url = noise.decode() + base_url + noise.decode()
+        url = noise.decode("utf-8") + base_url
         p = urllib.parse.urlsplit(url)
         self.assertEqual(p.scheme, "http")
         self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
@@ -666,7 +666,7 @@ def test_urlsplit_strip_url(self):
         self.assertEqual(p.port, 80)
         self.assertEqual(p.geturl(), base_url)
 
-        url = noise + base_url.encode() + noise
+        url = noise + base_url.encode("utf-8")
         p = urllib.parse.urlsplit(url)
         self.assertEqual(p.scheme, b"http")
         self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
@@ -677,7 +677,7 @@ def test_urlsplit_strip_url(self):
         self.assertEqual(p.password, b"Pass")
         self.assertEqual(p.hostname, b"www.python.org")
         self.assertEqual(p.port, 80)
-        self.assertEqual(p.geturl(), base_url.encode())
+        self.assertEqual(p.geturl(), base_url.encode("utf-8"))
 
         # Test that trailing space is preserved as some applications rely on
         # this within query strings.
@@ -702,7 +702,7 @@ def test_urlsplit_strip_url(self):
 
         # with scheme as cache-key
         url = "//www.python.org/"
-        scheme = noise.decode() + "https" + noise.decode()
+        scheme = noise.decode("utf-8") + "https" + noise.decode("utf-8")
         for _ in range(2):
             p = urllib.parse.urlsplit(url, scheme=scheme)
             self.assertEqual(p.scheme, "https")

From 456d238c5a6c8565e4ba1d3edc6d93bfc049b785 Mon Sep 17 00:00:00 2001
From: "Gregory P. Smith" <greg@krypto.org>
Date: Wed, 3 May 2023 14:20:46 -0700
Subject: [PATCH 07/10] remove a duplicate word
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Lumír 'Frenzy' Balhar <frenzy.madness@gmail.com>
---
 Doc/library/urllib.parse.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index 1f482655579a88..db5fa4b3bf19be 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -325,7 +325,7 @@ or on combining URL components into a URL string.
    decomposed before parsing, no error will be raised.
 
    Following some of the `WHATWG spec`_ that updates RFC 3986, leading C0
-   control control and space characters are stripped from the URL. ``\n``,
+   control and space characters are stripped from the URL. ``\n``,
    ``\r`` and tab ``\t`` characters are removed from the URL at any position.
 
    .. versionchanged:: 3.6

From 0f7f9ea830064d3d2e96680ce63eeb503acfd1ad Mon Sep 17 00:00:00 2001
From: "Gregory P. Smith" <greg@krypto.org>
Date: Wed, 3 May 2023 14:21:53 -0700
Subject: [PATCH 08/10] Update 2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst

remove "trailing"
---
 .../Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst b/Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst
index 97652398a0fd70..e57ac4ed3ac5d7 100644
--- a/Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst
+++ b/Misc/NEWS.d/next/Security/2023-03-07-20-59-17.gh-issue-102153.14CLSZ.rst
@@ -1,3 +1,3 @@
-:func:`urllib.parse.urlsplit` now strips leading and trailing C0 control and
-space characters following the controlling specification for URLs defined by
-WHATWG in response to CVE-2023-24329. Patch by Illia Volochii.
+:func:`urllib.parse.urlsplit` now strips leading C0 control and space
+characters following the specification for URLs defined by WHATWG in
+response to CVE-2023-24329. Patch by Illia Volochii.

From a510652af8eb02fd2377accbd66101c81bb326e8 Mon Sep 17 00:00:00 2001
From: "Gregory P. Smith" <greg@krypto.org>
Date: Wed, 17 May 2023 00:09:50 -0700
Subject: [PATCH 09/10] Add urlparse and urlsplit security warnings.

The added section describing the situation is longer than I might want,
but being more brief just leaves open questions.

This is a lighter worded version of my original text proposed in
https://discuss.python.org/t/how-to-word-a-warning-about-security-uses-in-urllib-parse-docs/26399
---
 Doc/library/urllib.parse.rst | 38 ++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index db5fa4b3bf19be..edcf815544735b 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -159,6 +159,10 @@ or on combining URL components into a URL string.
       ParseResult(scheme='http', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html',
                   params='', query='', fragment='')
 
+   .. warning::
+
+      The :func:`urlparse` API does not perform validation.  See :ref:`URL
+      parsing security <url-parsing-security>` for details.
 
    .. versionchanged:: 3.2
       Added IPv6 URL parsing capabilities.
@@ -328,6 +332,11 @@ or on combining URL components into a URL string.
    control and space characters are stripped from the URL. ``\n``,
    ``\r`` and tab ``\t`` characters are removed from the URL at any position.
 
+   .. warning::
+
+      The :func:`urlsplit` API does not perform validation.  See :ref:`URL
+      parsing security <url-parsing-security>` for details.
+
    .. versionchanged:: 3.6
       Out-of-range port numbers now raise :exc:`ValueError`, instead of
       returning :const:`None`.
@@ -418,6 +427,35 @@ or on combining URL components into a URL string.
    or ``scheme://host/path``). If *url* is not a wrapped URL, it is returned
    without changes.
 
+.. _url-parsing-security:
+
+URL parsing security
+--------------------
+
+   The :func:`urlsplit` and :func:`urlparse` APIs do not perform **validation**
+   of inputs.  They may not raise errors on inputs that other applications
+   consider invalid.  They may accept and pass through some inputs that might
+   not be considered URLs elsewhere as unusually split component parts.  Their
+   purpose is for practical functionality rather than purity.
+
+   Instead of raising an exception on unusual input, they may instead return
+   some components as empty ``""`` strings. Or components may contain more than
+   perhaps they should.
+
+   We recommend that users of these APIs where the values may be used anywhere
+   with security implications code defensively. Do some verification within
+   your code before trusting a returned component part.  Does that ``scheme``
+   make sense?  Is that a sensible ``path``?  Is there anything strange about
+   that ``hostname``?  etc.
+
+   What constitutes a URL is not universally well defined.  Different
+   applications have different needs and desired constraints.  For instance the
+   living `WHATWG spec`_ describes what user facing web clients such as a web
+   browser require.  While :rfc:`3986` is more general.  These functions
+   incorporate some aspects of both, but cannot be claimed compliant with
+   either.  Our APIs and code with expectations on their behaviors predate both
+   standards.  We attempt to maintain backwards compatibility.
+
 .. _parsing-ascii-encoded-bytes:
 
 Parsing ASCII Encoded Bytes

From 766a7d3385418a85e39ccb992a459b8d553d087f Mon Sep 17 00:00:00 2001
From: "Gregory P. Smith" <greg@krypto.org>
Date: Wed, 17 May 2023 00:26:35 -0700
Subject: [PATCH 10/10] doc formatting and wording tweaks.

---
 Doc/library/urllib.parse.rst | 54 ++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index edcf815544735b..5a9a53f83dace0 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -161,8 +161,8 @@ or on combining URL components into a URL string.
 
    .. warning::
 
-      The :func:`urlparse` API does not perform validation.  See :ref:`URL
-      parsing security <url-parsing-security>` for details.
+      :func:`urlparse` does not perform validation.  See :ref:`URL parsing
+      security <url-parsing-security>` for details.
 
    .. versionchanged:: 3.2
       Added IPv6 URL parsing capabilities.
@@ -334,8 +334,8 @@ or on combining URL components into a URL string.
 
    .. warning::
 
-      The :func:`urlsplit` API does not perform validation.  See :ref:`URL
-      parsing security <url-parsing-security>` for details.
+      :func:`urlsplit` does not perform validation.  See :ref:`URL parsing
+      security <url-parsing-security>` for details.
 
    .. versionchanged:: 3.6
       Out-of-range port numbers now raise :exc:`ValueError`, instead of
@@ -432,29 +432,29 @@ or on combining URL components into a URL string.
 URL parsing security
 --------------------
 
-   The :func:`urlsplit` and :func:`urlparse` APIs do not perform **validation**
-   of inputs.  They may not raise errors on inputs that other applications
-   consider invalid.  They may accept and pass through some inputs that might
-   not be considered URLs elsewhere as unusually split component parts.  Their
-   purpose is for practical functionality rather than purity.
-
-   Instead of raising an exception on unusual input, they may instead return
-   some components as empty ``""`` strings. Or components may contain more than
-   perhaps they should.
-
-   We recommend that users of these APIs where the values may be used anywhere
-   with security implications code defensively. Do some verification within
-   your code before trusting a returned component part.  Does that ``scheme``
-   make sense?  Is that a sensible ``path``?  Is there anything strange about
-   that ``hostname``?  etc.
-
-   What constitutes a URL is not universally well defined.  Different
-   applications have different needs and desired constraints.  For instance the
-   living `WHATWG spec`_ describes what user facing web clients such as a web
-   browser require.  While :rfc:`3986` is more general.  These functions
-   incorporate some aspects of both, but cannot be claimed compliant with
-   either.  Our APIs and code with expectations on their behaviors predate both
-   standards.  We attempt to maintain backwards compatibility.
+The :func:`urlsplit` and :func:`urlparse` APIs do not perform **validation** of
+inputs.  They may not raise errors on inputs that other applications consider
+invalid.  They may also succeed on some inputs that might not be considered
+URLs elsewhere.  Their purpose is for practical functionality rather than
+purity.
+
+Instead of raising an exception on unusual input, they may instead return some
+component parts as empty strings. Or components may contain more than perhaps
+they should.
+
+We recommend that users of these APIs where the values may be used anywhere
+with security implications code defensively. Do some verification within your
+code before trusting a returned component part.  Does that ``scheme`` make
+sense?  Is that a sensible ``path``?  Is there anything strange about that
+``hostname``?  etc.
+
+What constitutes a URL is not universally well defined.  Different applications
+have different needs and desired constraints.  For instance the living `WHATWG
+spec`_ describes what user facing web clients such as a web browser require.
+While :rfc:`3986` is more general.  These functions incorporate some aspects of
+both, but cannot be claimed compliant with either.  The APIs and existing user
+code with expectations on specific behaviors predate both standards leading us
+to be very cautious about making API behavior changes.
 
 .. _parsing-ascii-encoded-bytes: