Skip to content

Commit

Permalink
Fix issue lark-parser#1414 by correctly putting patterns into non-cap…
Browse files Browse the repository at this point in the history
…turing groups
  • Loading branch information
MegaIng committed May 16, 2024
1 parent 53c3964 commit 501eb77
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 3 deletions.
6 changes: 3 additions & 3 deletions lark/load_grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,7 @@ def expansion(self, items: List[Pattern]) -> Pattern:
if len(items) == 1:
return items[0]

pattern = ''.join(i.to_regexp() for i in items)
pattern = ''.join(f'(?:{i.to_regexp()})' for i in items)
return _make_joined_pattern(pattern, {i.flags for i in items})

def expansions(self, exps: List[Pattern]) -> Pattern:
Expand All @@ -636,7 +636,7 @@ def expansions(self, exps: List[Pattern]) -> Pattern:
# (Python's re module otherwise prefers just 'l' when given (l|ll) and both could match)
exps.sort(key=lambda x: (-x.max_width, -x.min_width, -len(x.value)))

pattern = '(?:%s)' % ('|'.join(i.to_regexp() for i in exps))
pattern = '(?:%s)' % ('|'.join(f'(?:{i.to_regexp()})' for i in exps))
return _make_joined_pattern(pattern, {i.flags for i in exps})

def expr(self, args) -> Pattern:
Expand All @@ -652,7 +652,7 @@ def expr(self, args) -> Pattern:
op = "{%d,%d}" % (mn, mx)
else:
assert len(args) == 2
return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags)
return PatternRE(f'(?:{inner.to_regexp()}){op}', inner.flags)

def maybe(self, expr):
return self.expr(expr + ['?'])
Expand Down
13 changes: 13 additions & 0 deletions tests/test_grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,19 @@ def test_ranged_repeat_large(self):
self.assertRaises(UnexpectedInput, l.parse, u'A' * 8190)
self.assertRaises(UnexpectedInput, l.parse, u'A' * 8192)

def test_term_combine(self):
# Issue #1414
g = """
start: START
START: A B C
A: "a"
B: "b"
C: /c|d/
"""
l = Lark(g, parser='lalr')
self.assertEqual(l.parse('abc'), Tree('start', ['abc']))
self.assertEqual(l.parse('abd'), Tree('start', ['abd']))

def test_large_terminal(self):
g = "start: NUMBERS\n"
g += "NUMBERS: " + '|'.join('"%s"' % i for i in range(0, 1000))
Expand Down

0 comments on commit 501eb77

Please sign in to comment.