Skip to content

Commit

Permalink
Merge pull request #484 from KeepSafe/url_quoting
Browse files Browse the repository at this point in the history
Fix #480: Properly requote URL's path
  • Loading branch information
asvetlov committed Sep 2, 2015
2 parents a2f0563 + 0b1b73e commit 3970c74
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 4 deletions.
2 changes: 2 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ CHANGES
* `ClientResponse.close()` forces connection closing by default from now #479
N.B. Backward incompatible change: was `.close(force=False)
Using `force` parameter for the method is deprecated: use `.release()` instead.

* Properly requote URL's path #480
4 changes: 2 additions & 2 deletions aiohttp/client_reqrep.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ def update_path(self, params):
else:
query = params

self.path = urllib.parse.urlunsplit(
('', '', urllib.parse.quote(path, safe='/%:='), query, fragment))
self.path = urllib.parse.urlunsplit(('', '', helpers.requote_uri(path),
query, fragment))
self.url = urllib.parse.urlunsplit(
(scheme, netloc, self.path, '', ''))

Expand Down
4 changes: 4 additions & 0 deletions aiohttp/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,3 +186,7 @@ def __repr__(self):
return '<{} expected={} got={} host={} port={}>'.format(
self.__class__.__name__, self.expected, self.got,
self.host, self.port)


class InvalidURL(Exception):
"""Invalid URL."""
53 changes: 51 additions & 2 deletions aiohttp/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
import base64
import io
import os
import urllib.parse
from urllib.parse import quote, urlencode
from collections import namedtuple
from wsgiref.handlers import format_date_time

from . import hdrs, multidict
from .errors import InvalidURL

__all__ = ('BasicAuth', 'FormData', 'parse_mimetype')

Expand Down Expand Up @@ -126,7 +127,7 @@ def _gen_form_urlencoded(self, encoding):
for type_options, _, value in self._fields:
data.append((type_options['name'], value))

data = urllib.parse.urlencode(data, doseq=True)
data = urlencode(data, doseq=True)
return data.encode(encoding)

def _gen_form_data(self, *args, **kwargs):
Expand Down Expand Up @@ -322,3 +323,51 @@ def __get__(self, inst, owner, _marker=_marker):

def __set__(self, inst, value):
raise AttributeError("reified property is read-only")


# The unreserved URI characters (RFC 3986)
UNRESERVED_SET = frozenset(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+ "0123456789-._~")


def unquote_unreserved(uri):
"""Un-escape any percent-escape sequences in a URI that are unreserved
characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
"""
parts = uri.split('%')
for i in range(1, len(parts)):
h = parts[i][0:2]
if len(h) == 2 and h.isalnum():
try:
c = chr(int(h, 16))
except ValueError:
raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)

if c in UNRESERVED_SET:
parts[i] = c + parts[i][2:]
else:
parts[i] = '%' + parts[i]
else:
parts[i] = '%' + parts[i]
return ''.join(parts)


def requote_uri(uri):
"""Re-quote the given URI.
This function passes the given URI through an unquote/quote cycle to
ensure that it is fully and consistently quoted.
"""
safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
safe_without_percent = "!#$&'()*+,/:;=?@[]~"
try:
# Unquote only the unreserved characters
# Then quote only illegal characters (do not quote reserved,
# unreserved, or '%')
return quote(unquote_unreserved(uri), safe=safe_with_percent)
except InvalidURL:
# We couldn't unquote the given URI, so let's try quoting it, but
# there may be unquoted '%'s in the URI. We need to make sure they're
# properly quoted so they do not cause issues elsewhere.
return quote(uri, safe=safe_without_percent)
14 changes: 14 additions & 0 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,17 @@ def test_get_lower(self):
self.assertEqual(atoms['{TEST}o'], '123')
self.assertEqual(atoms['{UNKNOWN}o'], '-')
self.assertEqual(atoms['{UNKNOWN}'], '-')


class TestRequoting(unittest.TestCase):

def test_requote_uri_with_unquoted_percents(self):
# Ensure we handle unquoted percent signs in redirects.
bad_uri = 'http://example.com/fiz?buz=%ppicture'
quoted = 'http://example.com/fiz?buz=%25ppicture'
self.assertEqual(quoted, helpers.requote_uri(bad_uri))

def test_requote_uri_properly_requotes(self):
# Ensure requoting doesn't break expectations.
quoted = 'http://example.com/fiz?buz=%25ppicture'
self.assertEqual(quoted, helpers.requote_uri(quoted))

0 comments on commit 3970c74

Please sign in to comment.