Skip to content

Commit

Permalink
[3.9] bpo-43882 Remove the newline, and tab early. From query and fra…
Browse files Browse the repository at this point in the history
…gments. (#25853)

* Remove the newline, and tab early. From query and fragments.
  • Loading branch information
orsenthil authored May 3, 2021
1 parent bf12424 commit 8a59574
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 11 deletions.
24 changes: 16 additions & 8 deletions Lib/test/test_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,32 +614,40 @@ def test_urlsplit_attributes(self):

def test_urlsplit_remove_unsafe_bytes(self):
# Remove ASCII tabs and newlines from input
url = "http://www.python.org/java\nscript:\talert('msg\r\n')/#frag"
url = "http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http")
self.assertEqual(p.netloc, "www.python.org")
self.assertEqual(p.path, "/javascript:alert('msg')/")
self.assertEqual(p.query, "")
self.assertEqual(p.fragment, "frag")
self.assertEqual(p.query, "query=something")
self.assertEqual(p.fragment, "fragment")
self.assertEqual(p.username, None)
self.assertEqual(p.password, None)
self.assertEqual(p.hostname, "www.python.org")
self.assertEqual(p.port, None)
self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/#frag")
self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")

# Remove ASCII tabs and newlines from input as bytes.
url = b"http://www.python.org/java\nscript:\talert('msg\r\n')/#frag"
url = b"http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, b"http")
self.assertEqual(p.netloc, b"www.python.org")
self.assertEqual(p.path, b"/javascript:alert('msg')/")
self.assertEqual(p.query, b"")
self.assertEqual(p.fragment, b"frag")
self.assertEqual(p.query, b"query=something")
self.assertEqual(p.fragment, b"fragment")
self.assertEqual(p.username, None)
self.assertEqual(p.password, None)
self.assertEqual(p.hostname, b"www.python.org")
self.assertEqual(p.port, None)
self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/#frag")
self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment")

# with scheme as cache-key
url = "http://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
scheme = "ht\ntp"
for _ in range(2):
p = urllib.parse.urlsplit(url, scheme=scheme)
self.assertEqual(p.scheme, "http")
self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")

def test_attributes_bad_port(self):
"""Check handling of invalid ports."""
Expand Down
8 changes: 5 additions & 3 deletions Lib/urllib/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,11 @@ def urlsplit(url, scheme='', allow_fragments=True):
"""

url, scheme, _coerce_result = _coerce_args(url, scheme)

for b in _UNSAFE_URL_BYTES_TO_REMOVE:
url = url.replace(b, "")
scheme = scheme.replace(b, "")

allow_fragments = bool(allow_fragments)
key = url, scheme, allow_fragments, type(url), type(scheme)
cached = _parse_cache.get(key, None)
Expand All @@ -472,9 +477,6 @@ def urlsplit(url, scheme='', allow_fragments=True):
else:
scheme, url = url[:i].lower(), url[i+1:]

for b in _UNSAFE_URL_BYTES_TO_REMOVE:
url = url.replace(b, "")

if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
if (('[' in netloc and ']' not in netloc) or
Expand Down

0 comments on commit 8a59574

Please sign in to comment.