-
Notifications
You must be signed in to change notification settings - Fork 1
/
reference.py
executable file
·134 lines (105 loc) · 3.52 KB
/
reference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/env python3
from pprint import pprint
import sys
def parse_grammar(grammar_file):
grammar = {}
# Parse grammar
for line in grammar_file:
line = line.strip() # Strip trailing whitespace
line = line.split('#')[0] # Stop reading after comment starts
if len(line) == 0: continue
# Split into component parts
line = line.split()
lhs = line[0]
rhs = line[1:]
# Add to grammar
if lhs not in grammar:
grammar[lhs] = []
grammar[lhs].append(rhs)
return grammar
class StateSet(object):
def __init__(self, states=[]):
self._states = set(states)
self._work = list(self._states)
def __str__(self):
return '{%s}' % ', '.join([str(s) for s in self._states])
__repr__ = __str__
def add(self, s):
if s not in self._states:
self._states.add(s)
self._work.append(s)
def has_next(self):
return self._work != []
def next(self):
return self._work.pop(0)
class State(object):
def __init__(self, lhs, rhs, pos=0, origin=0):
self.lhs = lhs
self.rhs = tuple(rhs)
self.pos = pos
self.origin = origin
def __str__(self):
rhs_dot = list(self.rhs)
rhs_dot.insert(self.pos, '•')
rhs_str = ' '.join(rhs_dot)
return '%s -> %s' % (self.lhs, rhs_str)
__repr__ = __str__
def __eq__(self, other):
return (self.lhs, self.rhs, self.pos, self.origin) == \
(other.lhs, other.rhs, other.pos, other.origin)
def __hash__(self):
return hash((self.lhs, self.rhs, self.pos, self.origin))
def finished(self):
return self.pos == len(self.rhs)
def next_elem(self):
return self.rhs[self.pos]
def incr_pos(self):
return State(self.lhs, self.rhs, self.pos + 1, self.origin)
def parse(grammar, words):
# Create chart.
chart = [StateSet() for _ in range(len(words) + 1)]
def predictor(state, k):
lhs = state.next_elem()
for rhs in grammar[lhs]:
chart[k].add(State(lhs, rhs, origin=k))
def scanner(state, k):
if k >= len(words):
return
if words[k] == state.next_elem():
chart[k+1].add(state.incr_pos())
def completer(state, k):
assert state.origin != k
for s in chart[state.origin]._states:
if not (s.finished()) and s.rhs[s.pos] == state.lhs:
chart[k].add(s.incr_pos())
# Initialize.
for rhs in grammar['START']:
chart[0].add(State('START', rhs))
for k in range(len(words) + 1):
while chart[k].has_next():
state = chart[k].next()
if not state.finished():
if state.next_elem() in grammar:
predictor(state, k)
else:
scanner(state, k)
else:
completer(state, k)
return chart
def main(grammar_file, in_file):
grammar = parse_grammar(grammar_file)
words = in_file.read().split()
chart = parse(grammar, words)
for (k, states) in enumerate(chart):
for state in states._states:
print("(0, ({}, {}, {}))".format(k, state, state.origin))
if __name__ == '__main__':
if len(sys.argv) > 3 or len(sys.argv) < 2:
print('Usage: earley.py GRAMMAR [FILE]')
exit(1)
grammar_file = open(sys.argv[1], 'r')
if len(sys.argv) == 3:
in_file = open(sys.argv[2], 'r')
else:
in_file = sys.stdin
main(grammar_file, in_file)