diff --git a/CHANGES.rst b/CHANGES.rst index c40160bc..f3f5d146 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,23 @@ Unreleased ========== +- The URL validator regex has been updated to no longer be vulnerable to a + catastrophic backtracking that would have led to an infinite loop. See + https://github.com/Pylons/colander/pull/323 and + https://github.com/Pylons/colander/issues/290. With thanks to Przemek + (https://github.com/p-m-k). + + This does change the behaviour of the URL validator and it no longer supports + ``file://`` URI scheme (https://tools.ietf.org/html/rfc8089). Users that + wish to validate ``file://`` URI's should change their validator to use + ``colander.file_uri`` instead. + + It has also dropped support for alternate schemes outside of http/ftp (and + their secure equivelants). Please let us know if we need to relax this + requirement. + + CVE-ID: CVE-2017-18361 + - The Email validator has been updated to use the same regular expression that is used by the WhatWG HTML specification, thereby increasing the email addresses that will validate correctly from web forms submitted. See diff --git a/colander/__init__.py b/colander/__init__.py index 6f0dff57..6eac447e 100644 --- a/colander/__init__.py +++ b/colander/__init__.py @@ -607,14 +607,40 @@ def _luhnok(value): return checksum +# Gingerly lifted from Django 1.3.x: +# https://github.com/django/django/blob/stable/1.3.x/django/core/validators.py#L45 +# <3 y'all! URL_REGEX = ( - r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|' - r'[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|' - r'(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|' - r'[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))' # "emacs! + # {http,ftp}s:// (not required) + r'^((?:http|ftp)s?://)?' + # Domain + r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+' + r'(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' + # Localhost + r'localhost|' + # IPv6 address + r'\[[a-f0-9:]+\]|' + # IPv4 address + r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' + # Optional port + r'(?::\d+)?' + # Path + r'(?:/?|[/?]\S+)$' ) -url = Regex(URL_REGEX, _('Must be a URL')) +url = Regex(URL_REGEX, msg=_('Must be a URL'), flags=re.IGNORECASE) + + +URI_REGEX = ( + # file:// (required) + r'^file://' + # Path + r'(?:/|[/?]\S+)$' +) + +file_uri = Regex( + URI_REGEX, msg=_('Must be a file:// URI scheme'), flags=re.IGNORECASE +) UUID_REGEX = ( r'^(?:urn:uuid:)?\{?[a-f0-9]{8}(?:-?[a-f0-9]{4}){3}-?[a-f0-9]{12}\}?$' diff --git a/colander/tests/test_colander.py b/colander/tests/test_colander.py index 917a7190..5411c2bb 100644 --- a/colander/tests/test_colander.py +++ b/colander/tests/test_colander.py @@ -646,6 +646,76 @@ def test_it_failure(self): self.assertRaises(Invalid, self._callFUT, val) + def test_add_sample_dos(self): + # In the old regex (colander <=1.6) this would cause a catastrophic + # backtracking that would cause the regex engine to go into an infinite + # loop. + val = "http://www.mysite.com/(tttttttttttttttttttttt.jpg" + + result = self._callFUT(val) + self.assertEqual(result, None) + + def test_website_no_scheme(self): + val = "www.mysite.com" + + result = self._callFUT(val) + self.assertEqual(result, None) + + def test_ipv6(self): + val = "http://[2001:db8::0]/" + + result = self._callFUT(val) + self.assertEqual(result, None) + + def test_ipv4(self): + val = "http://192.0.2.1/" + + result = self._callFUT(val) + self.assertEqual(result, None) + + def test_file_raises(self): + from colander import Invalid + + val = "file:///this/is/a/file.jpg" + + self.assertRaises(Invalid, self._callFUT, val) + + +class Test_file_uri_validator(unittest.TestCase): + def _callFUT(self, val): + from colander import file_uri + + return file_uri(None, val) + + def test_it_success(self): + val = 'file:///' + result = self._callFUT(val) + self.assertEqual(result, None) + + def test_it_failure(self): + val = 'not-a-uri' + from colander import Invalid + + self.assertRaises(Invalid, self._callFUT, val) + + def test_no_path_fails(self): + val = 'file://' + from colander import Invalid + + self.assertRaises(Invalid, self._callFUT, val) + + def test_file_with_path(self): + val = "file:///this/is/a/file.jpg" + + result = self._callFUT(val) + self.assertEqual(result, None) + + def test_file_with_path_windows(self): + val = "file:///c:/is/a/file.jpg" + + result = self._callFUT(val) + self.assertEqual(result, None) + class TestUUID(unittest.TestCase): def _callFUT(self, val):