pythongh-89973: Fix re.error in the fnmatch module.

Character ranges with upper bound less that lower bound are now interpreted as empty ranges, for compatibility with other glob pattern implementations. Previously it was re.error.
serhiy-storchaka · May 22, 2022 · ab4b759 · ab4b759
1 parent e5d8dbd
commit ab4b759
Show file tree

Hide file tree

Showing 3 changed files with 91 additions and 6 deletions.
diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
@@ -102,7 +102,7 @@ def translate(pat):
                 add('\\[')
             else:
                 stuff = pat[i:j]
-                if '--' not in stuff:
+                if '-' not in stuff:
                     stuff = stuff.replace('\\', r'\\')
                 else:
                     chunks = []
@@ -115,18 +115,29 @@ def translate(pat):
                         i = k+1
                         k = k+3
                     chunks.append(pat[i:j])
+                    if not chunks[-1]:
+                        del chunks[-1]
+                        chunks[-1] += '-'
+                    for k in range(len(chunks)-1, 0, -1):
+                        if chunks[k-1][-1] > chunks[k][0]:
+                            chunks[k-1:k+1] = [chunks[k-1][:-1] + chunks[k][1:]]
                     # Escape backslashes and hyphens for set difference (--).
                     # Hyphens that create ranges shouldn't be escaped.
                     stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
                                      for s in chunks)
                 # Escape set operations (&&, ~~ and ||).
                 stuff = re.sub(r'([&~|])', r'\\\1', stuff)
                 i = j+1
-                if stuff[0] == '!':
-                    stuff = '^' + stuff[1:]
-                elif stuff[0] in ('^', '['):
-                    stuff = '\\' + stuff
-                add(f'[{stuff}]')
+                if not stuff:
+                    add(f'(?!)')  # never match
+                elif stuff == '!':
+                    add(f'.')  # match any character
+                else:
+                    if stuff[0] == '!':
+                        stuff = '^' + stuff[1:]
+                    elif stuff[0] in ('^', '['):
+                        stuff = '\\' + stuff
+                    add(f'[{stuff}]')
         else:
             add(re.escape(c))
     assert i == n

diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
@@ -2,6 +2,7 @@
 
 import unittest
 import os
+import string
 import warnings
 
 from fnmatch import fnmatch, fnmatchcase, translate, filter
@@ -91,6 +92,76 @@ def test_sep(self):
         check('usr/bin', 'usr\\bin', normsep)
         check('usr\\bin', 'usr\\bin')
 
+    def test_char_set(self):
+        ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
+        check = self.check_match
+        tescases = string.ascii_lowercase + string.digits + string.punctuation
+        for c in tescases:
+            check(c, '[az]', c in 'az')
+            check(c, '[!az]', c not in 'az')
+        # Case insensitive.
+        for c in tescases:
+            check(c, '[AZ]', (c in 'az') and ignorecase)
+            check(c, '[!AZ]', (c not in 'az') or not ignorecase)
+        for c in string.ascii_uppercase:
+            check(c, '[az]', (c in 'AZ') and ignorecase)
+            check(c, '[!az]', (c not in 'AZ') or not ignorecase)
+        # Repeated same character.
+        for c in tescases:
+            check(c, '[aa]', c == 'a')
+        # Special cases.
+        for c in tescases:
+            check(c, '[^az]', c in '^az')
+            check(c, '[[az]', c in '[az')
+            check(c, r'[\]', c == '\\')
+            check(c, r'[\az]', c in r'\az')
+            check(c, r'[!]]', c != ']')
+        check('[', '[')
+        check('[]', '[]')
+        check('[!', '[!')
+        check('[!]', '[!]')
+
+    def test_range(self):
+        ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
+        check = self.check_match
+        tescases = string.ascii_lowercase + string.digits + string.punctuation
+        for c in tescases:
+            check(c, '[b-d]', c in 'bcd')
+            check(c, '[!b-d]', c not in 'bcd')
+            check(c, '[b-dx-z]', c in 'bcdxyz')
+            check(c, '[!b-dx-z]', c not in 'bcdxyz')
+        # Case insensitive.
+        for c in tescases:
+            check(c, '[B-D]', (c in 'bcd') and ignorecase)
+            check(c, '[!B-D]', (c not in 'bcd') or not ignorecase)
+        for c in string.ascii_uppercase:
+            check(c, '[b-d]', (c in 'BCD') and ignorecase)
+            check(c, '[!b-d]', (c not in 'BCD') or not ignorecase)
+        # Upper bound == lower bound.
+        for c in tescases:
+            check(c, '[b-b]', c == 'b')
+        # Special cases.
+        for c in tescases:
+            check(c, '[!-#]', c not in '-#')
+            check(c, '[!--/]', c not in '-./')
+            check(c, '[^-`]', c in '^_`')
+            check(c, '[[-^]', c in r'[\]^')
+            check(c, r'[\-^]', c in r'\]^')
+            check(c, '[b-]', c in '-b')
+            check(c, '[!b-]', c not in '-b')
+            check(c, '[-b]', c in '-b')
+            check(c, '[!-b]', c not in '-b')
+            check(c, '[-]', c in '-')
+            check(c, '[!-]', c not in '-')
+        # Upper bound is less that lower bound: error in RE.
+        for c in tescases:
+            check(c, '[d-b]', False)
+            check(c, '[!d-b]', True)
+            check(c, '[d-bx-z]', c in 'xyz')
+            check(c, '[!d-bx-z]', c not in 'xyz')
+            check(c, '[d-b^-`]', c in '^_`')
+            check(c, '[d-b[-^]', c in '[\\]^')
+
     def test_warnings(self):
         with warnings.catch_warnings():
             warnings.simplefilter('error', Warning)

diff --git a/Misc/NEWS.d/next/Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst b/Misc/NEWS.d/next/Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst
@@ -0,0 +1,3 @@
+Fix :exc:`re.error` raised in :mod:`fnmatch` if the patterna contains
+character range with upeer bound lower than lower bound (e.g. ``[c-a]``).
+Now such ranges are interpreted as empty ranges.