forked from vsbenas/parser-gen
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser-gen.lua
563 lines (470 loc) · 11.8 KB
/
parser-gen.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
local m = require "lpeglabel"
local peg = require "peg-parser"
local eg = require "errorgen"
local s = require "stack"
-- create stack for tokens inside captures. nil - not inside capture, 0 - inside capture, 1 - token found inside capture
local tokenstack = Stack:Create()
-- Lua 5.1 compatibility:
local unpack = unpack or table.unpack
local Predef = { nl = m.P"\n", cr = m.P"\r", tab = m.P"\t" }
local mem = {} -- for compiled grammars
local function updatelocale()
m.locale(Predef)
local any = m.P(1)
Predef.a = Predef.alpha
Predef.c = Predef.cntrl
Predef.d = Predef.digit
Predef.g = Predef.graph
Predef.l = Predef.lower
Predef.p = Predef.punct
Predef.s = Predef.space
Predef.u = Predef.upper
Predef.w = Predef.alnum
Predef.x = Predef.xdigit
Predef.A = any - Predef.a
Predef.C = any - Predef.c
Predef.D = any - Predef.d
Predef.G = any - Predef.g
Predef.L = any - Predef.l
Predef.P = any - Predef.p
Predef.S = any - Predef.s
Predef.U = any - Predef.u
Predef.W = any - Predef.w
Predef.X = any - Predef.x
mem = {}
end
updatelocale()
local definitions = {}
local tlabels = {}
local totallabels = 0
local tlabelnames = {} -- reverse table
local tdescs = {}
local trecs = {} -- recovery for each error
local currentrule
local followset = {}
local function defaultsync(patt)
return (m.P(1)^-1) * (-patt * m.P(1))^0
end
-- TODO: store these variables for each grammar
local SKIP = (Predef.space + Predef.nl)
local SYNC = defaultsync(SKIP)
local recovery = true
local skipspaces = true
local buildast = true
local function sync (patt)
return patt --(-patt * m.P(1))^0 * patt^0 -- skip until we find the pattern and consume it(if we do)
end
local function pattspaces (patt)
if skipspaces then
return patt * SKIP^0
else
return patt
end
end
local function token (patt)
local incapture = tokenstack:pop() -- returns nil if not in capture
if not incapture then
return pattspaces(patt)
end
tokenstack:push(1)
return patt
end
-- functions used by the tool
local function iscompiled (gr)
return m.type(gr) == "pattern"
end
local function istoken (t)
return t["token"] == "1"
end
local function isfinal(t)
return t["t"] or t["nt"] or t["func"] or t["s"] or t["num"]
end
local function isaction(t)
return t["action"]
end
local function isrule(t)
return t and t["rulename"]
end
local function isgrammar(t)
if type(t) == "table" and not(t["action"]) then
return isrule(t[1])
end
return false
end
local function iscapture (action)
return action == "=>" or action == "gcap" or action == "scap" or action == "subcap" or action == "poscap"
end
local function finalNode (t)
if t["t"] then
return"t",t["t"] -- terminal
elseif t["nt"] then
return "nt", t["nt"], istoken(t) -- nonterminal
elseif t["func"] then
return "func", t["func"] -- function
elseif t["s"] then
return "s", t["s"]
elseif t["num"] then
return "num", t["num"]
end
return nil
end
local function specialrules(t, builder)
-- initialize values
SKIP = (Predef.space + Predef.nl)
skipspaces = true
SYNC = nil
recovery = true
-- find SPACE and SYNC rules
for i, v in ipairs(ast) do
local name = v["rulename"]
local rule
if name == "SKIP" then
rule = traverse(v["rule"], true)
if v["rule"]["t"] == '' then
skipspaces = false
else
skipspaces = true
SKIP = rule
end
builder[name] = rule
elseif name == "SYNC" then
rule = traverse(v["rule"], true)
if v["rule"]["t"] == '' then-- SYNC <- ''
recovery=false
else
recovery= true
SYNC = rule
end
builder[name] = rule
end
end
if not SYNC and recovery then
SYNC = defaultsync(SKIP)
end
end
local function buildrecovery(grammar)
local synctoken = sync(SYNC)
local grec = grammar
for k,v in pairs(tlabels) do
if trecs[v] then -- custom sync token
grec = m.Rec(grec,record(v) * trecs[v], v)
else -- use global sync token
grec = m.Rec(grec,record(v) * synctoken, v)
end
end
return grec
end
local function buildgrammar (ast)
local builder = {}
specialrules(ast, builder)
local initialrule
for i, v in ipairs(ast) do
local istokenrule = v["token"] == "1"
local isfragment = v["fragment"] == "1"
local name = v["rulename"]
currentrule = name
local isspecial = name == "SKIP" or name == "SYNC"
local rule = v["rule"]
if i == 1 then
initialrule = name
table.insert(builder, name) -- lpeg syntax
builder[name] = traverse(rule, istokenrule)
else
if not builder[name] then -- dont traverse rules for SKIP and SYNC twice
builder[name] = traverse(rule, istokenrule)
end
end
if buildast and not isfragment and not isspecial then
if istokenrule then
builder[name] = m.C(builder[name])
end
builder[name] = m.Ct(m.Cg(m.Cc(name),"rule") * builder[name])
end
end
if skipspaces then
builder[initialrule] = SKIP^0 * builder[initialrule] -- skip spaces at the beginning of the input
end
if recovery then
builder[initialrule] = buildrecovery(builder[initialrule]) -- build recovery on top of initial rule
end
return builder
end
local function addspaces (caps)
local hastoken = tokenstack:pop()
if hastoken == 1 then
return pattspaces(caps)
end
return caps
end
local function applyaction(action, op1, op2, labels,tokenrule)
if action == "or" then
if labels then -- labels = {{s="errName"},{s="errName2"}}
for i, v in ipairs(labels) do
local labname = v["s"]
local lab = tlabels[labname]
if not lab then
error("Label '"..labname.."' undefined")
end
labels[i] = lab
end
return m.Rec(op1,op2,unpack(labels))
end
return op1 + op2
elseif action == "and" then
return op1 * op2
elseif action == "&" then
return #op1
elseif action == "!" then
return -op1
elseif action == "+" then
return op1^1
elseif action == "*" then
return op1^0
elseif action == "?" then
return op1^-1
elseif action == "^" then
return op1^op2
elseif action == "^LABEL" then
local lab = tlabels[op2]
if not lab then
error("Label '"..op2.."' unspecified using setlabels()")
end
return op1 + m.T(lab)
elseif action == "->" then
return op1 / op2
-- in captures we add SPACES^0
elseif action == "=>" then
return addspaces(m.Cmt(op1,op2))
elseif action == "tcap" then
return m.Ct(op1) -- nospaces
elseif action == "gcap" then
return addspaces(m.Cg(op1, op2))
elseif action == "bref" then
return m.Cb(op1) --m.Cmt(m.Cb(op1), equalcap) -- do we need to add spaces to bcap?
elseif action == "poscap" then
return addspaces(m.Cp())
elseif action == "subcap" then
return addspaces(m.Cs(op1))
elseif action == "scap" then
return addspaces(m.C(op1))
elseif action == "anychar" then
if buildast and not tokenrule then
return m.C(m.P(1))
end
return m.P(1)
elseif action == "label" then
local lab = tlabels[op1]
if not lab then
error("Label '"..op1.."' unspecified using setlabels()")
end
return m.T(lab) -- lpeglabel
elseif action == "%" then
if definitions[op1] then
return definitions[op1]
elseif Predef[op1] then
return Predef[op1]
else
error("Definition for '%"..op1.."' unspecified(use second parameter of parser-gen.compile())")
end
elseif action == "invert" then
return m.P(1) - op1
elseif action == "range" then
local res = m.R(op1)
if not tokenrule then
if buildast then
res = m.C(res)
end
res = token(res)
end
return res
else
error("Unsupported action '"..action.."'")
end
end
local function applyfinal(action, term, tokenterm, tokenrule)
if action == "t" then
local res = m.P(term)
if not tokenrule then
if buildast then
res = m.C(res)
end
if skipspaces then
res = token(res)
end
end
return res
elseif action == "nt" then
if skipspaces and tokenterm and (not tokenrule) then
return token(m.V(term))
else
return m.V(term)
end
elseif action == "func" then
if definitions[term] then
return definitions[term]
else
error("Definition for function '"..term.."' unspecified (use second parameter of parser-gen.compile())")
end
elseif action == "s" then -- simple string
return term
elseif action == "num" then -- numbered string
return tonumber(term)
end
end
local function applygrammar(gram)
return m.P(gram)
end
local function build(ast, defs)
if defs then
definitions = defs
end
if isgrammar(ast) then
return traverse(ast)
else
SKIP = (Predef.space + Predef.nl)
skipspaces = true
SYNC = nil
recovery = true
SYNC = defaultsync(SKIP)
currentrule = ''
local res = SKIP ^0 * traverse(ast)
if buildast then
res = m.Ct(res)
end
return res -- input is not a grammar - skip spaces and sync by default
end
end
function traverse (ast, tokenrule)
if not ast then
return nil
end
if isfinal(ast) then
local typefn, fn, tok = finalNode(ast)
return applyfinal(typefn, fn, tok, tokenrule)
elseif isaction(ast) then
local act, op1, op2, labs, ret1, ret2
act = ast["action"]
op1 = ast["op1"]
op2 = ast["op2"]
labs = ast["condition"] -- recovery operations
-- post-order traversal
if iscapture(act) then
tokenstack:push(0) -- not found any tokens yet
end
ret1 = traverse(op1, tokenrule)
ret2 = traverse(op2, tokenrule)
return applyaction(act, ret1, ret2, labs, tokenrule)
elseif isgrammar(ast) then
--
local g = buildgrammar (ast)
return applygrammar (g)
else
peg.print_r(ast)
error("Unsupported AST")
end
end
-- recovery grammar
-- from relabel.lua module
local function calcline (s, i)
if i == 1 then return 1, 1 end
local rest, line = s:sub(1,i):gsub("[^\n]*\n", "")
local col = #rest
return 1 + line, col ~= 0 and col or 1
end
local subject, errors, errorfunc
function record(label)
return (m.Cp() * m.Cc(label)) / recorderror
end
function recorderror(position,label)
-- call error function here
local line, col = calcline(subject, position)
local desc
if label == 0 then
desc = "Syntax error"
else
desc = tdescs[label]
end
if errorfunc then
local temp = string.sub(subject,position)
local strend = string.find(temp, "\n")
local sfail = string.sub(temp, 1, strend)
errorfunc(desc,line,col,sfail,trecs[label])
end
local err = { line = line, col = col, label=tlabelnames[label], msg = desc }
table.insert(errors, err)
end
-- end
-- t = {errName="Error description",...}
local function setlabels (t, errorgen)
local index
if errorgen then
index = totallabels + 1
else
-- reset error tables
index = 1
tlabels = {}
tdescs = {}
trecs = {}
end
for key,value in pairs(t) do
if index >= 255 then
error("Error label limit reached(255)")
end
if type(value) == "table" then -- we have a recovery expression
tdescs[index] = value[1]
trecs[index] = traverse(peg.pegToAST(value[2]), true)-- PEG to LPEG
else
tdescs[index] = value
end
tlabels[key] = index
tlabelnames[index] = key -- reverse table
index = index + 1
end
totallabels = index-1
end
local function compile (input, defs, errors, nocaptures)
if iscompiled(input) then
return input
end
if not mem[input] then
buildast = true
if nocaptures then
buildast=false
end
--re.setlabels(tlabels)
--re.compile(input,defs)
-- build ast
ast = peg.pegToAST(input)
if errors then
local follow = eg.follow(ast)
local errors = eg.adderrors(ast, follow)
setlabels (errors, true) -- add errors generated by errorgen
end
local gram = build(ast,defs)
if not gram then
-- find error using relabel module
end
mem[input] = gram-- store if the user forgets to compile it
end
return mem[input]
end
local function parse (input, grammar, errorfunction)
sp = {}
if not iscompiled(grammar) then
cp = compile(grammar)
grammar = cp
end
-- set up recovery table
errorfunc = errorfunction
subject = input
errors = {}
-- end
local r, e, sfail = m.match(grammar,input)
if not r then
recorderror(#input - #sfail, e)
end
if #errors == 0 then errors=nil end
return r, errors
end
local pg = {compile=compile, setlabels=setlabels, parse=parse,follow=follow}
return pg