Skip to content

Commit

Permalink
Revert "bpo-23689: re module, fix memory leak when a match is termina…
Browse files Browse the repository at this point in the history
…ted by a signal or memory allocation failure (pythonGH-32283)"

This reverts commit 6e3eee5.

Manual fixups to increase the MAGIC number and to handle conflicts with
a couple of changes that landed after that.
  • Loading branch information
gpshead committed Jun 15, 2022
1 parent 8ba1c7f commit 193d838
Show file tree
Hide file tree
Showing 8 changed files with 109 additions and 144 deletions.
97 changes: 60 additions & 37 deletions Lib/re/_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,54 @@
POSSESSIVE_REPEAT: (POSSESSIVE_REPEAT, SUCCESS, POSSESSIVE_REPEAT_ONE),
}

class _CompileData:
__slots__ = ('code', 'repeat_count')
def __init__(self):
self.code = []
self.repeat_count = 0
# Sets of lowercase characters which have the same uppercase.
_equivalences = (
# LATIN SMALL LETTER I, LATIN SMALL LETTER DOTLESS I
(0x69, 0x131), # iı
# LATIN SMALL LETTER S, LATIN SMALL LETTER LONG S
(0x73, 0x17f), # sſ
# MICRO SIGN, GREEK SMALL LETTER MU
(0xb5, 0x3bc), # µμ
# COMBINING GREEK YPOGEGRAMMENI, GREEK SMALL LETTER IOTA, GREEK PROSGEGRAMMENI
(0x345, 0x3b9, 0x1fbe), # \u0345ιι
# GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS, GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
(0x390, 0x1fd3), # ΐΐ
# GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS, GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
(0x3b0, 0x1fe3), # ΰΰ
# GREEK SMALL LETTER BETA, GREEK BETA SYMBOL
(0x3b2, 0x3d0), # βϐ
# GREEK SMALL LETTER EPSILON, GREEK LUNATE EPSILON SYMBOL
(0x3b5, 0x3f5), # εϵ
# GREEK SMALL LETTER THETA, GREEK THETA SYMBOL
(0x3b8, 0x3d1), # θϑ
# GREEK SMALL LETTER KAPPA, GREEK KAPPA SYMBOL
(0x3ba, 0x3f0), # κϰ
# GREEK SMALL LETTER PI, GREEK PI SYMBOL
(0x3c0, 0x3d6), # πϖ
# GREEK SMALL LETTER RHO, GREEK RHO SYMBOL
(0x3c1, 0x3f1), # ρϱ
# GREEK SMALL LETTER FINAL SIGMA, GREEK SMALL LETTER SIGMA
(0x3c2, 0x3c3), # ςσ
# GREEK SMALL LETTER PHI, GREEK PHI SYMBOL
(0x3c6, 0x3d5), # φϕ
# LATIN SMALL LETTER S WITH DOT ABOVE, LATIN SMALL LETTER LONG S WITH DOT ABOVE
(0x1e61, 0x1e9b), # ṡẛ
# LATIN SMALL LIGATURE LONG S T, LATIN SMALL LIGATURE ST
(0xfb05, 0xfb06), # ſtst
)

# Maps the lowercase code to lowercase codes which have the same uppercase.
_ignorecase_fixes = {i: tuple(j for j in t if i != j)
for t in _equivalences for i in t}

def _combine_flags(flags, add_flags, del_flags,
TYPE_FLAGS=_parser.TYPE_FLAGS):
if add_flags & TYPE_FLAGS:
flags &= ~TYPE_FLAGS
return (flags | add_flags) & ~del_flags

def _compile(data, pattern, flags):
def _compile(code, pattern, flags):
# internal: compile a (sub)pattern
code = data.code
emit = code.append
_len = len
LITERAL_CODES = _LITERAL_CODES
Expand Down Expand Up @@ -115,19 +148,15 @@ def _compile(data, pattern, flags):
skip = _len(code); emit(0)
emit(av[0])
emit(av[1])
_compile(data, av[2], flags)
_compile(code, av[2], flags)
emit(SUCCESS)
code[skip] = _len(code) - skip
else:
emit(REPEATING_CODES[op][0])
skip = _len(code); emit(0)
emit(av[0])
emit(av[1])
# now op is in (MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT)
if op != POSSESSIVE_REPEAT:
emit(data.repeat_count)
data.repeat_count += 1
_compile(data, av[2], flags)
_compile(code, av[2], flags)
code[skip] = _len(code) - skip
emit(REPEATING_CODES[op][1])
elif op is SUBPATTERN:
Expand All @@ -136,7 +165,7 @@ def _compile(data, pattern, flags):
emit(MARK)
emit((group-1)*2)
# _compile_info(code, p, _combine_flags(flags, add_flags, del_flags))
_compile(data, p, _combine_flags(flags, add_flags, del_flags))
_compile(code, p, _combine_flags(flags, add_flags, del_flags))
if group:
emit(MARK)
emit((group-1)*2+1)
Expand All @@ -148,7 +177,7 @@ def _compile(data, pattern, flags):
# pop their stack if they reach it
emit(ATOMIC_GROUP)
skip = _len(code); emit(0)
_compile(data, av, flags)
_compile(code, av, flags)
emit(SUCCESS)
code[skip] = _len(code) - skip
elif op in SUCCESS_CODES:
Expand All @@ -163,7 +192,7 @@ def _compile(data, pattern, flags):
if lo != hi:
raise error("look-behind requires fixed-width pattern")
emit(lo) # look behind
_compile(data, av[1], flags)
_compile(code, av[1], flags)
emit(SUCCESS)
code[skip] = _len(code) - skip
elif op is AT:
Expand All @@ -182,7 +211,7 @@ def _compile(data, pattern, flags):
for av in av[1]:
skip = _len(code); emit(0)
# _compile_info(code, av, flags)
_compile(data, av, flags)
_compile(code, av, flags)
emit(JUMP)
tailappend(_len(code)); emit(0)
code[skip] = _len(code) - skip
Expand Down Expand Up @@ -210,12 +239,12 @@ def _compile(data, pattern, flags):
emit(op)
emit(av[0]-1)
skipyes = _len(code); emit(0)
_compile(data, av[1], flags)
_compile(code, av[1], flags)
if av[2]:
emit(JUMP)
skipno = _len(code); emit(0)
code[skipyes] = _len(code) - skipyes + 1
_compile(data, av[2], flags)
_compile(code, av[2], flags)
code[skipno] = _len(code) - skipno
else:
code[skipyes] = _len(code) - skipyes + 1
Expand Down Expand Up @@ -582,17 +611,17 @@ def isstring(obj):
def _code(p, flags):

flags = p.state.flags | flags
data = _CompileData()
code = []

# compile info block
_compile_info(data.code, p, flags)
_compile_info(code, p, flags)

# compile the pattern
_compile(data, p.data, flags)
_compile(code, p.data, flags)

data.code.append(SUCCESS)
code.append(SUCCESS)

return data
return code

def _hex_code(code):
return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code)
Expand Down Expand Up @@ -693,21 +722,14 @@ def print_2(*args):
else:
print_(FAILURE)
i += 1
elif op in (REPEAT_ONE, MIN_REPEAT_ONE,
elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE,
POSSESSIVE_REPEAT, POSSESSIVE_REPEAT_ONE):
skip, min, max = code[i: i+3]
if max == MAXREPEAT:
max = 'MAXREPEAT'
print_(op, skip, min, max, to=i+skip)
dis_(i+3, i+skip)
i += skip
elif op is REPEAT:
skip, min, max, repeat_index = code[i: i+4]
if max == MAXREPEAT:
max = 'MAXREPEAT'
print_(op, skip, min, max, repeat_index, to=i+skip)
dis_(i+4, i+skip)
i += skip
elif op is GROUPREF_EXISTS:
arg, skip = code[i: i+2]
print_(op, arg, skip, to=i+skip)
Expand Down Expand Up @@ -762,11 +784,11 @@ def compile(p, flags=0):
else:
pattern = None

data = _code(p, flags)
code = _code(p, flags)

if flags & SRE_FLAG_DEBUG:
print()
dis(data.code)
dis(code)

# map in either direction
groupindex = p.state.groupdict
Expand All @@ -775,6 +797,7 @@ def compile(p, flags=0):
indexgroup[i] = k

return _sre.compile(
pattern, flags | p.state.flags, data.code,
p.state.groups-1, groupindex, tuple(indexgroup),
data.repeat_count)
pattern, flags | p.state.flags, code,
p.state.groups-1,
groupindex, tuple(indexgroup)
)
2 changes: 1 addition & 1 deletion Lib/re/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

# update when constants are added or removed

MAGIC = 20220423
MAGIC = 20220615

from _sre import MAXREPEAT, MAXGROUPS

Expand Down
28 changes: 2 additions & 26 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -1765,12 +1765,9 @@ def test_dealloc(self):
long_overflow = 2**128
self.assertRaises(TypeError, re.finditer, "a", {})
with self.assertRaises(OverflowError):
_sre.compile("abc", 0, [long_overflow], 0, {}, (), 0)
_sre.compile("abc", 0, [long_overflow], 0, {}, ())
with self.assertRaises(TypeError):
_sre.compile({}, 0, [], 0, [], [], 0)
with self.assertRaises(RuntimeError):
# invalid repeat_count -1
_sre.compile("abc", 0, [1], 0, {}, (), -1)
_sre.compile({}, 0, [], 0, [], [])

def test_search_dot_unicode(self):
self.assertTrue(re.search("123.*-", '123abc-'))
Expand Down Expand Up @@ -2509,27 +2506,6 @@ def test_possesive_repeat(self):
14. SUCCESS
''')

def test_repeat_index(self):
self.assertEqual(get_debug_out(r'(?:ab)*?(?:cd)*'), '''\
MIN_REPEAT 0 MAXREPEAT
LITERAL 97
LITERAL 98
MAX_REPEAT 0 MAXREPEAT
LITERAL 99
LITERAL 100
0. INFO 4 0b0 0 MAXREPEAT (to 5)
5: REPEAT 8 0 MAXREPEAT 0 (to 14)
10. LITERAL 0x61 ('a')
12. LITERAL 0x62 ('b')
14: MIN_UNTIL
15. REPEAT 8 0 MAXREPEAT 1 (to 24)
20. LITERAL 0x63 ('c')
22. LITERAL 0x64 ('d')
24: MAX_UNTIL
25. SUCCESS
''')


class PatternReprTests(unittest.TestCase):
def check(self, pattern, expected):
Expand Down
25 changes: 6 additions & 19 deletions Modules/_sre/clinic/sre.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 193d838

Please sign in to comment.