From 7cc0b4718b967c8deee2bd9d85f3af97a5263b72 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 10 Sep 2019 13:32:28 -0500 Subject: [PATCH 1/4] Protection against ReDoS The regex module is compatible with the re module (VERSION0 flag). It is also faster. ```python >>> import re >>> import regex >>> import timeit >>> pattert = "(a+)+b" >>> input = "a" * 25 >>> timeit.timeit(lambda: re.search(pattern, input), number=10) 32.332445038000515 >>> timeit.timeit(lambda: regex.search(pattern, input, flags=regex.VERSION0), number=10) 0.003861578001306043 >>> input = "a" * 10000 >>> regex.search(pattern, input, flags=regex.VERSION0, timeout=5) Traceback (most recent call last): File "", line 1, in File "/home/stsewd/.pyenv/versions/readthedocs.org/lib/python3.6/site-packages/regex/regex.py", line 266, in search concurrent, partial, timeout) TimeoutError: regex timed out ``` --- readthedocs/builds/models.py | 26 +++++++++++++++++++++++--- requirements/pip.txt | 1 + 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/readthedocs/builds/models.py b/readthedocs/builds/models.py index 82b96c70ce8..1905fd9a66b 100644 --- a/readthedocs/builds/models.py +++ b/readthedocs/builds/models.py @@ -6,6 +6,7 @@ import re from shutil import rmtree +import regex from django.conf import settings from django.db import models from django.db.models import F @@ -1137,6 +1138,8 @@ def __str__(self): class RegexAutomationRule(VersionAutomationRule): + TIMEOUT = 15 # timout in seconds + allowed_actions = { VersionAutomationRule.ACTIVATE_VERSION_ACTION: actions.activate_version, VersionAutomationRule.SET_DEFAULT_VERSION_ACTION: actions.set_default_version, @@ -1146,11 +1149,28 @@ class Meta: proxy = True def match(self, version, match_arg): + """ + Find a match using regex.search. + + .. note:: + + We use the regex module with the timeout + arg to avoid ReDoS. + """ try: - match = re.search( - match_arg, version.verbose_name + match = regex.search( + match_arg, + version.verbose_name, + # Compatible with the re module + flags=regex.VERSION0, + timeout=self.TIMEOUT, ) return bool(match), match + except TimeoutError: + log.warning( + 'Timeout while parsing regex. pattern=%s, input=%s', + match_arg, version.verbose_name, + ) except Exception as e: log.info('Error parsing regex: %s', e) - return False, None + return False, None diff --git a/requirements/pip.txt b/requirements/pip.txt index 518921be45e..7af4c612e35 100644 --- a/requirements/pip.txt +++ b/requirements/pip.txt @@ -75,6 +75,7 @@ Unipath==1.1 django-kombu==0.9.4 mock==3.0.5 stripe==2.37.2 +regex==2019.11.1 # unicode-slugify==0.1.5 is not released on PyPI yet git+https://github.com/mozilla/unicode-slugify@b696c37#egg=unicode-slugify==0.1.5 From 6a5dad7143c8896e1230aefb96ed9659c43c489b Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 6 Nov 2019 13:59:01 -0500 Subject: [PATCH 2/4] Update timeout --- readthedocs/builds/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/builds/models.py b/readthedocs/builds/models.py index 1905fd9a66b..d73cd8096b4 100644 --- a/readthedocs/builds/models.py +++ b/readthedocs/builds/models.py @@ -1138,7 +1138,7 @@ def __str__(self): class RegexAutomationRule(VersionAutomationRule): - TIMEOUT = 15 # timout in seconds + TIMEOUT = 2 # timeout in seconds allowed_actions = { VersionAutomationRule.ACTIVATE_VERSION_ACTION: actions.activate_version, From 87b9f368205fffac5f64acc156acd4b16f147a71 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 6 Nov 2019 14:01:47 -0500 Subject: [PATCH 3/4] Comment about another alternative --- readthedocs/builds/models.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/readthedocs/builds/models.py b/readthedocs/builds/models.py index d73cd8096b4..5a541076324 100644 --- a/readthedocs/builds/models.py +++ b/readthedocs/builds/models.py @@ -1156,6 +1156,9 @@ def match(self, version, match_arg): We use the regex module with the timeout arg to avoid ReDoS. + + We could use a finite state machine type of regex too, + but there isn't a stable library at the time of writting this code. """ try: match = regex.search( From 720954e3ce7dd7ed58e30c8ff23b43850161ccaa Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 6 Nov 2019 14:03:00 -0500 Subject: [PATCH 4/4] Low timeout --- readthedocs/builds/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/builds/models.py b/readthedocs/builds/models.py index 5a541076324..4a5a1896d0b 100644 --- a/readthedocs/builds/models.py +++ b/readthedocs/builds/models.py @@ -1138,7 +1138,7 @@ def __str__(self): class RegexAutomationRule(VersionAutomationRule): - TIMEOUT = 2 # timeout in seconds + TIMEOUT = 1 # timeout in seconds allowed_actions = { VersionAutomationRule.ACTIVATE_VERSION_ACTION: actions.activate_version,