Skip to content

Commit

Permalink
gh-89973: Fix re.error in the fnmatch module. (GH-93072)
Browse files Browse the repository at this point in the history
Character ranges with upper bound less that lower bound (e.g. [c-a])
are now interpreted as empty ranges, for compatibility with other glob
pattern implementations. Previously it was re.error.
(cherry picked from commit 0902c3d)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
  • Loading branch information
miss-islington and serhiy-storchaka authored Jun 5, 2022
1 parent a8bd502 commit 2220dc5
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 7 deletions.
30 changes: 23 additions & 7 deletions Lib/fnmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def translate(pat):
add('\\[')
else:
stuff = pat[i:j]
if '--' not in stuff:
if '-' not in stuff:
stuff = stuff.replace('\\', r'\\')
else:
chunks = []
Expand All @@ -114,19 +114,35 @@ def translate(pat):
chunks.append(pat[i:k])
i = k+1
k = k+3
chunks.append(pat[i:j])
chunk = pat[i:j]
if chunk:
chunks.append(chunk)
else:
chunks[-1] += '-'
# Remove empty ranges -- invalid in RE.
for k in range(len(chunks)-1, 0, -1):
if chunks[k-1][-1] > chunks[k][0]:
chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
del chunks[k]
# Escape backslashes and hyphens for set difference (--).
# Hyphens that create ranges shouldn't be escaped.
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
for s in chunks)
# Escape set operations (&&, ~~ and ||).
stuff = re.sub(r'([&~|])', r'\\\1', stuff)
i = j+1
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] in ('^', '['):
stuff = '\\' + stuff
add(f'[{stuff}]')
if not stuff:
# Empty range: never match.
add('(?!)')
elif stuff == '!':
# Negated empty range: match any character.
add('.')
else:
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] in ('^', '['):
stuff = '\\' + stuff
add(f'[{stuff}]')
else:
add(re.escape(c))
assert i == n
Expand Down
114 changes: 114 additions & 0 deletions Lib/test/test_fnmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import unittest
import os
import string
import warnings

from fnmatch import fnmatch, fnmatchcase, translate, filter
Expand Down Expand Up @@ -91,6 +92,119 @@ def test_sep(self):
check('usr/bin', 'usr\\bin', normsep)
check('usr\\bin', 'usr\\bin')

def test_char_set(self):
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
check = self.check_match
tescases = string.ascii_lowercase + string.digits + string.punctuation
for c in tescases:
check(c, '[az]', c in 'az')
check(c, '[!az]', c not in 'az')
# Case insensitive.
for c in tescases:
check(c, '[AZ]', (c in 'az') and ignorecase)
check(c, '[!AZ]', (c not in 'az') or not ignorecase)
for c in string.ascii_uppercase:
check(c, '[az]', (c in 'AZ') and ignorecase)
check(c, '[!az]', (c not in 'AZ') or not ignorecase)
# Repeated same character.
for c in tescases:
check(c, '[aa]', c == 'a')
# Special cases.
for c in tescases:
check(c, '[^az]', c in '^az')
check(c, '[[az]', c in '[az')
check(c, r'[!]]', c != ']')
check('[', '[')
check('[]', '[]')
check('[!', '[!')
check('[!]', '[!]')

def test_range(self):
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
normsep = os.path.normcase('\\') == os.path.normcase('/')
check = self.check_match
tescases = string.ascii_lowercase + string.digits + string.punctuation
for c in tescases:
check(c, '[b-d]', c in 'bcd')
check(c, '[!b-d]', c not in 'bcd')
check(c, '[b-dx-z]', c in 'bcdxyz')
check(c, '[!b-dx-z]', c not in 'bcdxyz')
# Case insensitive.
for c in tescases:
check(c, '[B-D]', (c in 'bcd') and ignorecase)
check(c, '[!B-D]', (c not in 'bcd') or not ignorecase)
for c in string.ascii_uppercase:
check(c, '[b-d]', (c in 'BCD') and ignorecase)
check(c, '[!b-d]', (c not in 'BCD') or not ignorecase)
# Upper bound == lower bound.
for c in tescases:
check(c, '[b-b]', c == 'b')
# Special cases.
for c in tescases:
check(c, '[!-#]', c not in '-#')
check(c, '[!--.]', c not in '-.')
check(c, '[^-`]', c in '^_`')
if not (normsep and c == '/'):
check(c, '[[-^]', c in r'[\]^')
check(c, r'[\-^]', c in r'\]^')
check(c, '[b-]', c in '-b')
check(c, '[!b-]', c not in '-b')
check(c, '[-b]', c in '-b')
check(c, '[!-b]', c not in '-b')
check(c, '[-]', c in '-')
check(c, '[!-]', c not in '-')
# Upper bound is less that lower bound: error in RE.
for c in tescases:
check(c, '[d-b]', False)
check(c, '[!d-b]', True)
check(c, '[d-bx-z]', c in 'xyz')
check(c, '[!d-bx-z]', c not in 'xyz')
check(c, '[d-b^-`]', c in '^_`')
if not (normsep and c == '/'):
check(c, '[d-b[-^]', c in r'[\]^')

def test_sep_in_char_set(self):
normsep = os.path.normcase('\\') == os.path.normcase('/')
check = self.check_match
check('/', r'[/]')
check('\\', r'[\]')
check('/', r'[\]', normsep)
check('\\', r'[/]', normsep)
check('[/]', r'[/]', False)
check(r'[\\]', r'[/]', False)
check('\\', r'[\t]')
check('/', r'[\t]', normsep)
check('t', r'[\t]')
check('\t', r'[\t]', False)

def test_sep_in_range(self):
normsep = os.path.normcase('\\') == os.path.normcase('/')
check = self.check_match
check('a/b', 'a[.-0]b', not normsep)
check('a\\b', 'a[.-0]b', False)
check('a\\b', 'a[Z-^]b', not normsep)
check('a/b', 'a[Z-^]b', False)

check('a/b', 'a[/-0]b', not normsep)
check(r'a\b', 'a[/-0]b', False)
check('a[/-0]b', 'a[/-0]b', False)
check(r'a[\-0]b', 'a[/-0]b', False)

check('a/b', 'a[.-/]b')
check(r'a\b', 'a[.-/]b', normsep)
check('a[.-/]b', 'a[.-/]b', False)
check(r'a[.-\]b', 'a[.-/]b', False)

check(r'a\b', r'a[\-^]b')
check('a/b', r'a[\-^]b', normsep)
check(r'a[\-^]b', r'a[\-^]b', False)
check('a[/-^]b', r'a[\-^]b', False)

check(r'a\b', r'a[Z-\]b', not normsep)
check('a/b', r'a[Z-\]b', False)
check(r'a[Z-\]b', r'a[Z-\]b', False)
check('a[Z-/]b', r'a[Z-\]b', False)

def test_warnings(self):
with warnings.catch_warnings():
warnings.simplefilter('error', Warning)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix :exc:`re.error` raised in :mod:`fnmatch` if the pattern contains a
character range with upper bound lower than lower bound (e.g. ``[c-a]``).
Now such ranges are interpreted as empty ranges.

0 comments on commit 2220dc5

Please sign in to comment.