From 2f8aae38b9ce19dfd00356927a68cd00366331bc Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Sun, 5 Jun 2022 02:39:03 -0700 Subject: [PATCH] gh-89973: Fix re.error in the fnmatch module. (GH-93072) Character ranges with upper bound less that lower bound (e.g. [c-a]) are now interpreted as empty ranges, for compatibility with other glob pattern implementations. Previously it was re.error. (cherry picked from commit 0902c3d8edf7ef67972dd95f6a21670f5d1a4251) Co-authored-by: Serhiy Storchaka --- Lib/fnmatch.py | 30 +++-- Lib/test/test_fnmatch.py | 114 ++++++++++++++++++ ...2-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst | 3 + 3 files changed, 140 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 7c52c23067d40f..fee59bf73ff264 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -108,7 +108,7 @@ def translate(pat): add('\\[') else: stuff = pat[i:j] - if '--' not in stuff: + if '-' not in stuff: stuff = stuff.replace('\\', r'\\') else: chunks = [] @@ -120,7 +120,16 @@ def translate(pat): chunks.append(pat[i:k]) i = k+1 k = k+3 - chunks.append(pat[i:j]) + chunk = pat[i:j] + if chunk: + chunks.append(chunk) + else: + chunks[-1] += '-' + # Remove empty ranges -- invalid in RE. + for k in range(len(chunks)-1, 0, -1): + if chunks[k-1][-1] > chunks[k][0]: + chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:] + del chunks[k] # Escape backslashes and hyphens for set difference (--). # Hyphens that create ranges shouldn't be escaped. stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-') @@ -128,11 +137,18 @@ def translate(pat): # Escape set operations (&&, ~~ and ||). stuff = re.sub(r'([&~|])', r'\\\1', stuff) i = j+1 - if stuff[0] == '!': - stuff = '^' + stuff[1:] - elif stuff[0] in ('^', '['): - stuff = '\\' + stuff - add(f'[{stuff}]') + if not stuff: + # Empty range: never match. + add('(?!)') + elif stuff == '!': + # Negated empty range: match any character. + add('.') + else: + if stuff[0] == '!': + stuff = '^' + stuff[1:] + elif stuff[0] in ('^', '['): + stuff = '\\' + stuff + add(f'[{stuff}]') else: add(re.escape(c)) assert i == n diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 10668e4f6103aa..8e2d274bba2bb3 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -2,6 +2,7 @@ import unittest import os +import string import warnings from fnmatch import fnmatch, fnmatchcase, translate, filter @@ -91,6 +92,119 @@ def test_sep(self): check('usr/bin', 'usr\\bin', normsep) check('usr\\bin', 'usr\\bin') + def test_char_set(self): + ignorecase = os.path.normcase('ABC') == os.path.normcase('abc') + check = self.check_match + tescases = string.ascii_lowercase + string.digits + string.punctuation + for c in tescases: + check(c, '[az]', c in 'az') + check(c, '[!az]', c not in 'az') + # Case insensitive. + for c in tescases: + check(c, '[AZ]', (c in 'az') and ignorecase) + check(c, '[!AZ]', (c not in 'az') or not ignorecase) + for c in string.ascii_uppercase: + check(c, '[az]', (c in 'AZ') and ignorecase) + check(c, '[!az]', (c not in 'AZ') or not ignorecase) + # Repeated same character. + for c in tescases: + check(c, '[aa]', c == 'a') + # Special cases. + for c in tescases: + check(c, '[^az]', c in '^az') + check(c, '[[az]', c in '[az') + check(c, r'[!]]', c != ']') + check('[', '[') + check('[]', '[]') + check('[!', '[!') + check('[!]', '[!]') + + def test_range(self): + ignorecase = os.path.normcase('ABC') == os.path.normcase('abc') + normsep = os.path.normcase('\\') == os.path.normcase('/') + check = self.check_match + tescases = string.ascii_lowercase + string.digits + string.punctuation + for c in tescases: + check(c, '[b-d]', c in 'bcd') + check(c, '[!b-d]', c not in 'bcd') + check(c, '[b-dx-z]', c in 'bcdxyz') + check(c, '[!b-dx-z]', c not in 'bcdxyz') + # Case insensitive. + for c in tescases: + check(c, '[B-D]', (c in 'bcd') and ignorecase) + check(c, '[!B-D]', (c not in 'bcd') or not ignorecase) + for c in string.ascii_uppercase: + check(c, '[b-d]', (c in 'BCD') and ignorecase) + check(c, '[!b-d]', (c not in 'BCD') or not ignorecase) + # Upper bound == lower bound. + for c in tescases: + check(c, '[b-b]', c == 'b') + # Special cases. + for c in tescases: + check(c, '[!-#]', c not in '-#') + check(c, '[!--.]', c not in '-.') + check(c, '[^-`]', c in '^_`') + if not (normsep and c == '/'): + check(c, '[[-^]', c in r'[\]^') + check(c, r'[\-^]', c in r'\]^') + check(c, '[b-]', c in '-b') + check(c, '[!b-]', c not in '-b') + check(c, '[-b]', c in '-b') + check(c, '[!-b]', c not in '-b') + check(c, '[-]', c in '-') + check(c, '[!-]', c not in '-') + # Upper bound is less that lower bound: error in RE. + for c in tescases: + check(c, '[d-b]', False) + check(c, '[!d-b]', True) + check(c, '[d-bx-z]', c in 'xyz') + check(c, '[!d-bx-z]', c not in 'xyz') + check(c, '[d-b^-`]', c in '^_`') + if not (normsep and c == '/'): + check(c, '[d-b[-^]', c in r'[\]^') + + def test_sep_in_char_set(self): + normsep = os.path.normcase('\\') == os.path.normcase('/') + check = self.check_match + check('/', r'[/]') + check('\\', r'[\]') + check('/', r'[\]', normsep) + check('\\', r'[/]', normsep) + check('[/]', r'[/]', False) + check(r'[\\]', r'[/]', False) + check('\\', r'[\t]') + check('/', r'[\t]', normsep) + check('t', r'[\t]') + check('\t', r'[\t]', False) + + def test_sep_in_range(self): + normsep = os.path.normcase('\\') == os.path.normcase('/') + check = self.check_match + check('a/b', 'a[.-0]b', not normsep) + check('a\\b', 'a[.-0]b', False) + check('a\\b', 'a[Z-^]b', not normsep) + check('a/b', 'a[Z-^]b', False) + + check('a/b', 'a[/-0]b', not normsep) + check(r'a\b', 'a[/-0]b', False) + check('a[/-0]b', 'a[/-0]b', False) + check(r'a[\-0]b', 'a[/-0]b', False) + + check('a/b', 'a[.-/]b') + check(r'a\b', 'a[.-/]b', normsep) + check('a[.-/]b', 'a[.-/]b', False) + check(r'a[.-\]b', 'a[.-/]b', False) + + check(r'a\b', r'a[\-^]b') + check('a/b', r'a[\-^]b', normsep) + check(r'a[\-^]b', r'a[\-^]b', False) + check('a[/-^]b', r'a[\-^]b', False) + + check(r'a\b', r'a[Z-\]b', not normsep) + check('a/b', r'a[Z-\]b', False) + check(r'a[Z-\]b', r'a[Z-\]b', False) + check('a[Z-/]b', r'a[Z-\]b', False) + def test_warnings(self): with warnings.catch_warnings(): warnings.simplefilter('error', Warning) diff --git a/Misc/NEWS.d/next/Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst b/Misc/NEWS.d/next/Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst new file mode 100644 index 00000000000000..7e61fd7d46a0bb --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-05-22-16-08-01.gh-issue-89973.jc-Q4g.rst @@ -0,0 +1,3 @@ +Fix :exc:`re.error` raised in :mod:`fnmatch` if the pattern contains a +character range with upper bound lower than lower bound (e.g. ``[c-a]``). +Now such ranges are interpreted as empty ranges.