Skip to content

Commit

Permalink
pythongh-91404: Revert "bpo-23689: re module, fix memory leak when a …
Browse files Browse the repository at this point in the history
…match is terminated by a signal or allocation failure (pythonGH-32283) (pythonGH-93882)

Revert "bpo-23689: re module, fix memory leak when a match is terminated by a signal or memory allocation failure (pythonGH-32283)"

This reverts commit 6e3eee5.

Manual fixups to increase the MAGIC number and to handle conflicts with
a couple of changes that landed after that.

Thanks for reviews by Ma Lin and Serhiy Storchaka.
(cherry picked from commit 4beee0c)

Co-authored-by: Gregory P. Smith <greg@krypto.org>
  • Loading branch information
gpshead authored and miss-islington committed Jun 17, 2022
1 parent 5ee86d4 commit 3c948e8
Show file tree
Hide file tree
Showing 9 changed files with 74 additions and 146 deletions.
59 changes: 21 additions & 38 deletions Lib/re/_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,14 @@
POSSESSIVE_REPEAT: (POSSESSIVE_REPEAT, SUCCESS, POSSESSIVE_REPEAT_ONE),
}

class _CompileData:
__slots__ = ('code', 'repeat_count')
def __init__(self):
self.code = []
self.repeat_count = 0

def _combine_flags(flags, add_flags, del_flags,
TYPE_FLAGS=_parser.TYPE_FLAGS):
if add_flags & TYPE_FLAGS:
flags &= ~TYPE_FLAGS
return (flags | add_flags) & ~del_flags

def _compile(data, pattern, flags):
def _compile(code, pattern, flags):
# internal: compile a (sub)pattern
code = data.code
emit = code.append
_len = len
LITERAL_CODES = _LITERAL_CODES
Expand Down Expand Up @@ -115,19 +108,15 @@ def _compile(data, pattern, flags):
skip = _len(code); emit(0)
emit(av[0])
emit(av[1])
_compile(data, av[2], flags)
_compile(code, av[2], flags)
emit(SUCCESS)
code[skip] = _len(code) - skip
else:
emit(REPEATING_CODES[op][0])
skip = _len(code); emit(0)
emit(av[0])
emit(av[1])
# now op is in (MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT)
if op != POSSESSIVE_REPEAT:
emit(data.repeat_count)
data.repeat_count += 1
_compile(data, av[2], flags)
_compile(code, av[2], flags)
code[skip] = _len(code) - skip
emit(REPEATING_CODES[op][1])
elif op is SUBPATTERN:
Expand All @@ -136,7 +125,7 @@ def _compile(data, pattern, flags):
emit(MARK)
emit((group-1)*2)
# _compile_info(code, p, _combine_flags(flags, add_flags, del_flags))
_compile(data, p, _combine_flags(flags, add_flags, del_flags))
_compile(code, p, _combine_flags(flags, add_flags, del_flags))
if group:
emit(MARK)
emit((group-1)*2+1)
Expand All @@ -148,7 +137,7 @@ def _compile(data, pattern, flags):
# pop their stack if they reach it
emit(ATOMIC_GROUP)
skip = _len(code); emit(0)
_compile(data, av, flags)
_compile(code, av, flags)
emit(SUCCESS)
code[skip] = _len(code) - skip
elif op in SUCCESS_CODES:
Expand All @@ -163,7 +152,7 @@ def _compile(data, pattern, flags):
if lo != hi:
raise error("look-behind requires fixed-width pattern")
emit(lo) # look behind
_compile(data, av[1], flags)
_compile(code, av[1], flags)
emit(SUCCESS)
code[skip] = _len(code) - skip
elif op is AT:
Expand All @@ -182,7 +171,7 @@ def _compile(data, pattern, flags):
for av in av[1]:
skip = _len(code); emit(0)
# _compile_info(code, av, flags)
_compile(data, av, flags)
_compile(code, av, flags)
emit(JUMP)
tailappend(_len(code)); emit(0)
code[skip] = _len(code) - skip
Expand Down Expand Up @@ -210,12 +199,12 @@ def _compile(data, pattern, flags):
emit(op)
emit(av[0]-1)
skipyes = _len(code); emit(0)
_compile(data, av[1], flags)
_compile(code, av[1], flags)
if av[2]:
emit(JUMP)
skipno = _len(code); emit(0)
code[skipyes] = _len(code) - skipyes + 1
_compile(data, av[2], flags)
_compile(code, av[2], flags)
code[skipno] = _len(code) - skipno
else:
code[skipyes] = _len(code) - skipyes + 1
Expand Down Expand Up @@ -582,17 +571,17 @@ def isstring(obj):
def _code(p, flags):

flags = p.state.flags | flags
data = _CompileData()
code = []

# compile info block
_compile_info(data.code, p, flags)
_compile_info(code, p, flags)

# compile the pattern
_compile(data, p.data, flags)
_compile(code, p.data, flags)

data.code.append(SUCCESS)
code.append(SUCCESS)

return data
return code

def _hex_code(code):
return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code)
Expand Down Expand Up @@ -693,21 +682,14 @@ def print_2(*args):
else:
print_(FAILURE)
i += 1
elif op in (REPEAT_ONE, MIN_REPEAT_ONE,
elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE,
POSSESSIVE_REPEAT, POSSESSIVE_REPEAT_ONE):
skip, min, max = code[i: i+3]
if max == MAXREPEAT:
max = 'MAXREPEAT'
print_(op, skip, min, max, to=i+skip)
dis_(i+3, i+skip)
i += skip
elif op is REPEAT:
skip, min, max, repeat_index = code[i: i+4]
if max == MAXREPEAT:
max = 'MAXREPEAT'
print_(op, skip, min, max, repeat_index, to=i+skip)
dis_(i+4, i+skip)
i += skip
elif op is GROUPREF_EXISTS:
arg, skip = code[i: i+2]
print_(op, arg, skip, to=i+skip)
Expand Down Expand Up @@ -762,11 +744,11 @@ def compile(p, flags=0):
else:
pattern = None

data = _code(p, flags)
code = _code(p, flags)

if flags & SRE_FLAG_DEBUG:
print()
dis(data.code)
dis(code)

# map in either direction
groupindex = p.state.groupdict
Expand All @@ -775,6 +757,7 @@ def compile(p, flags=0):
indexgroup[i] = k

return _sre.compile(
pattern, flags | p.state.flags, data.code,
p.state.groups-1, groupindex, tuple(indexgroup),
data.repeat_count)
pattern, flags | p.state.flags, code,
p.state.groups-1,
groupindex, tuple(indexgroup)
)
2 changes: 1 addition & 1 deletion Lib/re/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

# update when constants are added or removed

MAGIC = 20220423
MAGIC = 20220615

from _sre import MAXREPEAT, MAXGROUPS

Expand Down
28 changes: 2 additions & 26 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -1796,12 +1796,9 @@ def test_dealloc(self):
long_overflow = 2**128
self.assertRaises(TypeError, re.finditer, "a", {})
with self.assertRaises(OverflowError):
_sre.compile("abc", 0, [long_overflow], 0, {}, (), 0)
_sre.compile("abc", 0, [long_overflow], 0, {}, ())
with self.assertRaises(TypeError):
_sre.compile({}, 0, [], 0, [], [], 0)
with self.assertRaises(RuntimeError):
# invalid repeat_count -1
_sre.compile("abc", 0, [1], 0, {}, (), -1)
_sre.compile({}, 0, [], 0, [], [])

def test_search_dot_unicode(self):
self.assertTrue(re.search("123.*-", '123abc-'))
Expand Down Expand Up @@ -2540,27 +2537,6 @@ def test_possesive_repeat(self):
14. SUCCESS
''')

def test_repeat_index(self):
self.assertEqual(get_debug_out(r'(?:ab)*?(?:cd)*'), '''\
MIN_REPEAT 0 MAXREPEAT
LITERAL 97
LITERAL 98
MAX_REPEAT 0 MAXREPEAT
LITERAL 99
LITERAL 100
0. INFO 4 0b0 0 MAXREPEAT (to 5)
5: REPEAT 8 0 MAXREPEAT 0 (to 14)
10. LITERAL 0x61 ('a')
12. LITERAL 0x62 ('b')
14: MIN_UNTIL
15. REPEAT 8 0 MAXREPEAT 1 (to 24)
20. LITERAL 0x63 ('c')
22. LITERAL 0x64 ('d')
24: MAX_UNTIL
25. SUCCESS
''')


class PatternReprTests(unittest.TestCase):
def check(self, pattern, expected):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Revert the :mod:`re` memory leak when a match is terminated by a signal or
memory allocation failure as the implemented fix caused a major performance
regression.
27 changes: 7 additions & 20 deletions Modules/_sre/clinic/sre.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 3c948e8

Please sign in to comment.