Skip to content

Commit

Permalink
pythongh-89973: Fix re.error in the fnmatch module.
Browse files Browse the repository at this point in the history
Character ranges with upper bound less that lower bound are now
interpreted as empty ranges, for compatibility with other glob
pattern implementations. Previously it was re.error.
  • Loading branch information
serhiy-storchaka committed May 22, 2022
1 parent e5d8dbd commit ab4b759
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 6 deletions.
23 changes: 17 additions & 6 deletions Lib/fnmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def translate(pat):
add('\\[')
else:
stuff = pat[i:j]
if '--' not in stuff:
if '-' not in stuff:
stuff = stuff.replace('\\', r'\\')
else:
chunks = []
Expand All @@ -115,18 +115,29 @@ def translate(pat):
i = k+1
k = k+3
chunks.append(pat[i:j])
if not chunks[-1]:
del chunks[-1]
chunks[-1] += '-'
for k in range(len(chunks)-1, 0, -1):
if chunks[k-1][-1] > chunks[k][0]:
chunks[k-1:k+1] = [chunks[k-1][:-1] + chunks[k][1:]]
# Escape backslashes and hyphens for set difference (--).
# Hyphens that create ranges shouldn't be escaped.
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
for s in chunks)
# Escape set operations (&&, ~~ and ||).
stuff = re.sub(r'([&~|])', r'\\\1', stuff)
i = j+1
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] in ('^', '['):
stuff = '\\' + stuff
add(f'[{stuff}]')
if not stuff:
add(f'(?!)') # never match
elif stuff == '!':
add(f'.') # match any character
else:
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] in ('^', '['):
stuff = '\\' + stuff
add(f'[{stuff}]')
else:
add(re.escape(c))
assert i == n
Expand Down
71 changes: 71 additions & 0 deletions Lib/test/test_fnmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import unittest
import os
import string
import warnings

from fnmatch import fnmatch, fnmatchcase, translate, filter
Expand Down Expand Up @@ -91,6 +92,76 @@ def test_sep(self):
check('usr/bin', 'usr\\bin', normsep)
check('usr\\bin', 'usr\\bin')

def test_char_set(self):
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
check = self.check_match
tescases = string.ascii_lowercase + string.digits + string.punctuation
for c in tescases:
check(c, '[az]', c in 'az')
check(c, '[!az]', c not in 'az')
# Case insensitive.
for c in tescases:
check(c, '[AZ]', (c in 'az') and ignorecase)
check(c, '[!AZ]', (c not in 'az') or not ignorecase)
for c in string.ascii_uppercase:
check(c, '[az]', (c in 'AZ') and ignorecase)
check(c, '[!az]', (c not in 'AZ') or not ignorecase)
# Repeated same character.
for c in tescases:
check(c, '[aa]', c == 'a')
# Special cases.
for c in tescases:
check(c, '[^az]', c in '^az')
check(c, '[[az]', c in '[az')
check(c, r'[\]', c == '\\')
check(c, r'[\az]', c in r'\az')
check(c, r'[!]]', c != ']')
check('[', '[')
check('[]', '[]')
check('[!', '[!')
check('[!]', '[!]')

def test_range(self):
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
check = self.check_match
tescases = string.ascii_lowercase + string.digits + string.punctuation
for c in tescases:
check(c, '[b-d]', c in 'bcd')
check(c, '[!b-d]', c not in 'bcd')
check(c, '[b-dx-z]', c in 'bcdxyz')
check(c, '[!b-dx-z]', c not in 'bcdxyz')
# Case insensitive.
for c in tescases:
check(c, '[B-D]', (c in 'bcd') and ignorecase)
check(c, '[!B-D]', (c not in 'bcd') or not ignorecase)
for c in string.ascii_uppercase:
check(c, '[b-d]', (c in 'BCD') and ignorecase)
check(c, '[!b-d]', (c not in 'BCD') or not ignorecase)
# Upper bound == lower bound.
for c in tescases:
check(c, '[b-b]', c == 'b')
# Special cases.
for c in tescases:
check(c, '[!-#]', c not in '-#')
check(c, '[!--/]', c not in '-./')
check(c, '[^-`]', c in '^_`')
check(c, '[[-^]', c in r'[\]^')
check(c, r'[\-^]', c in r'\]^')
check(c, '[b-]', c in '-b')
check(c, '[!b-]', c not in '-b')
check(c, '[-b]', c in '-b')
check(c, '[!-b]', c not in '-b')
check(c, '[-]', c in '-')
check(c, '[!-]', c not in '-')
# Upper bound is less that lower bound: error in RE.
for c in tescases:
check(c, '[d-b]', False)
check(c, '[!d-b]', True)
check(c, '[d-bx-z]', c in 'xyz')
check(c, '[!d-bx-z]', c not in 'xyz')
check(c, '[d-b^-`]', c in '^_`')
check(c, '[d-b[-^]', c in '[\\]^')

def test_warnings(self):
with warnings.catch_warnings():
warnings.simplefilter('error', Warning)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix :exc:`re.error` raised in :mod:`fnmatch` if the patterna contains
character range with upeer bound lower than lower bound (e.g. ``[c-a]``).
Now such ranges are interpreted as empty ranges.

0 comments on commit ab4b759

Please sign in to comment.