From d4f4f77a2cfa518426178bd69d2b29dee57f770d Mon Sep 17 00:00:00 2001 From: Bert JW Regeer Date: Fri, 1 Feb 2019 01:16:06 -0700 Subject: [PATCH 1/6] Update the URL validator regex --- colander/__init__.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/colander/__init__.py b/colander/__init__.py index 6f0dff57..b07215f0 100644 --- a/colander/__init__.py +++ b/colander/__init__.py @@ -607,14 +607,28 @@ def _luhnok(value): return checksum +# Gingerly lifted from Django 1.3.x: +# https://github.com/django/django/blob/stable/1.3.x/django/core/validators.py#L45 +# <3 y'all! URL_REGEX = ( - r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|' - r'[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|' - r'(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|' - r'[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))' # "emacs! + # {http,ftp}s:// (not required) + r'^((?:http|ftp)s?://)?' + # Domain + r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+' + r'(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' + # Localhost + r'localhost|' + # IPv6 address + r'\[[a-f0-9:]+\]|' + # IPv4 address + r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' + # Optional port + r'(?::\d+)?' + # Path + r'(?:/?|[/?]\S+)$' ) -url = Regex(URL_REGEX, _('Must be a URL')) +url = Regex(URL_REGEX, msg=_('Must be a URL'), flags=re.IGNORECASE) UUID_REGEX = ( r'^(?:urn:uuid:)?\{?[a-f0-9]{8}(?:-?[a-f0-9]{4}){3}-?[a-f0-9]{12}\}?$' From ddd98c0ff4ab0c06ba66557e7ab5b3f2ea8c0a5e Mon Sep 17 00:00:00 2001 From: Bert JW Regeer Date: Fri, 1 Feb 2019 01:18:54 -0700 Subject: [PATCH 2/6] Add test with catastrophic backtracking --- colander/tests/test_colander.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/colander/tests/test_colander.py b/colander/tests/test_colander.py index 917a7190..cb89d8c5 100644 --- a/colander/tests/test_colander.py +++ b/colander/tests/test_colander.py @@ -646,6 +646,21 @@ def test_it_failure(self): self.assertRaises(Invalid, self._callFUT, val) + def test_add_sample_dos(self): + # In the old regex (colander <=1.6) this would cause a catastrophic + # backtracking that would cause the regex engine to go into an infinite + # loop. + val = "http://www.mysite.com/(tttttttttttttttttttttt.jpg" + + result = self._callFUT(val) + self.assertEqual(result, None) + + def test_website_no_scheme(self): + val = "www.mysite.com" + + result = self._callFUT(val) + self.assertEqual(result, None) + class TestUUID(unittest.TestCase): def _callFUT(self, val): From 7ccafc2909bfbc5a592dc5abd944ef9960c35b35 Mon Sep 17 00:00:00 2001 From: Bert JW Regeer Date: Fri, 1 Feb 2019 10:37:33 -0700 Subject: [PATCH 3/6] Update tests for url validator --- colander/tests/test_colander.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/colander/tests/test_colander.py b/colander/tests/test_colander.py index cb89d8c5..b7576b9f 100644 --- a/colander/tests/test_colander.py +++ b/colander/tests/test_colander.py @@ -661,6 +661,24 @@ def test_website_no_scheme(self): result = self._callFUT(val) self.assertEqual(result, None) + def test_ipv6(self): + val = "http://[2001:db8::0]/" + + result = self._callFUT(val) + self.assertEqual(result, None) + + def test_ipv4(self): + val = "http://192.0.2.1/" + + result = self._callFUT(val) + self.assertEqual(result, None) + + def test_file_raises(self): + from colander import Invalid + + val = "file:///this/is/a/file.jpg" + + self.assertRaises(Invalid, self._callFUT, val) class TestUUID(unittest.TestCase): def _callFUT(self, val): From 1a17b237fed2fdd3ac0e04ec8bfb4b206c7fe046 Mon Sep 17 00:00:00 2001 From: Bert JW Regeer Date: Fri, 1 Feb 2019 10:37:03 -0700 Subject: [PATCH 4/6] Add new file_uri validator --- colander/__init__.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/colander/__init__.py b/colander/__init__.py index b07215f0..6eac447e 100644 --- a/colander/__init__.py +++ b/colander/__init__.py @@ -630,6 +630,18 @@ def _luhnok(value): url = Regex(URL_REGEX, msg=_('Must be a URL'), flags=re.IGNORECASE) + +URI_REGEX = ( + # file:// (required) + r'^file://' + # Path + r'(?:/|[/?]\S+)$' +) + +file_uri = Regex( + URI_REGEX, msg=_('Must be a file:// URI scheme'), flags=re.IGNORECASE +) + UUID_REGEX = ( r'^(?:urn:uuid:)?\{?[a-f0-9]{8}(?:-?[a-f0-9]{4}){3}-?[a-f0-9]{12}\}?$' ) From 0d22a9a15f4d3c615528088e033226ee284e01e0 Mon Sep 17 00:00:00 2001 From: Bert JW Regeer Date: Fri, 1 Feb 2019 10:39:13 -0700 Subject: [PATCH 5/6] Add tests for file_uri validator --- colander/tests/test_colander.py | 37 +++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/colander/tests/test_colander.py b/colander/tests/test_colander.py index b7576b9f..5411c2bb 100644 --- a/colander/tests/test_colander.py +++ b/colander/tests/test_colander.py @@ -680,6 +680,43 @@ def test_file_raises(self): self.assertRaises(Invalid, self._callFUT, val) + +class Test_file_uri_validator(unittest.TestCase): + def _callFUT(self, val): + from colander import file_uri + + return file_uri(None, val) + + def test_it_success(self): + val = 'file:///' + result = self._callFUT(val) + self.assertEqual(result, None) + + def test_it_failure(self): + val = 'not-a-uri' + from colander import Invalid + + self.assertRaises(Invalid, self._callFUT, val) + + def test_no_path_fails(self): + val = 'file://' + from colander import Invalid + + self.assertRaises(Invalid, self._callFUT, val) + + def test_file_with_path(self): + val = "file:///this/is/a/file.jpg" + + result = self._callFUT(val) + self.assertEqual(result, None) + + def test_file_with_path_windows(self): + val = "file:///c:/is/a/file.jpg" + + result = self._callFUT(val) + self.assertEqual(result, None) + + class TestUUID(unittest.TestCase): def _callFUT(self, val): from colander import uuid From 2ba8973a5fa669609fa59a3e98f64fd832036a1f Mon Sep 17 00:00:00 2001 From: Bert JW Regeer Date: Fri, 1 Feb 2019 01:36:41 -0700 Subject: [PATCH 6/6] Add CHANGES.rst for #323 --- CHANGES.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index c40160bc..f3f5d146 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,23 @@ Unreleased ========== +- The URL validator regex has been updated to no longer be vulnerable to a + catastrophic backtracking that would have led to an infinite loop. See + https://github.com/Pylons/colander/pull/323 and + https://github.com/Pylons/colander/issues/290. With thanks to Przemek + (https://github.com/p-m-k). + + This does change the behaviour of the URL validator and it no longer supports + ``file://`` URI scheme (https://tools.ietf.org/html/rfc8089). Users that + wish to validate ``file://`` URI's should change their validator to use + ``colander.file_uri`` instead. + + It has also dropped support for alternate schemes outside of http/ftp (and + their secure equivelants). Please let us know if we need to relax this + requirement. + + CVE-ID: CVE-2017-18361 + - The Email validator has been updated to use the same regular expression that is used by the WhatWG HTML specification, thereby increasing the email addresses that will validate correctly from web forms submitted. See