-
Notifications
You must be signed in to change notification settings - Fork 0
/
jsg.jsg
376 lines (293 loc) · 9.08 KB
/
jsg.jsg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
// TODO: no literals in InputElementFile ATM which wont work
// and it brings the while Literal issue back up
//
// I can't even remember why I need InputElementGrammar and GrammarLiteral!!!
//
// Still have problem with U+ being literals
// and javascript definitions for patterns FlagChar[U]
// where U would be a ProductionParameter (uses ProductionName token)
// At the moment to support U+ a ProductName first character cannot be followed by AsciiSymbol
// but where does that leave [U]
// Idea: [ ProductionParameter ]
// ProductionParemeter: + PPName or ? PPName or ~ PPName or PPName
// ProductionParameterName: [lexical goal InputElementPPName]
// which only recognizes A[a]*
// Then define ProductionName as being A[a]+
// So U is a PPName but not a ProductionName
//
// Also the difference between
// [ but not one of ... multiple ProductionLiterals
// [but not one of ... a SpecialInstruction
// and, from above
// [ Aa ] Ascii [, ProductionName, ]
// [Aa] ProductionParameter
//
// Also, may want to recognize
// [~, [+, [? as as literal but
// [~PPName] as ProductionParameter
//
// All seem to boil down to whitespace handling
//
// Note:
// LHS suffix can be [Xx] [Xx, Yy] etc
// RHS prefix can be [+Xx] or [~Xx]
// RHS suffix can be [Xx], [?Xx], [Xx, Yy] [Xx, ?Yy] [?Xx, ?Yy] etc
//
// Another, separate issue,
// How to recognize sequences of symbol literals in comma lists?
// [not one of &, -), +,, -] for example where this means
// not one of "&" "-)" "+," "-"
// No idea how to handle this
//
// Many more to come I am sure :)
/*
* Provided abbreviations
* <ANY> - any Unicode code point
* <USP> - any Unicode space (non-ascii character from class Zs)
* <IDSTART> - any Unicode ID_Start character
* <IDCONTINUE> - any Unicode ID_Continue character
* <EOF> - end of file
*/
<TAB> ::
U+0009
<SP> ::
U+0020
<LF> ::
U+000A
<CR> ::
U+000D
[syntactic goals JsgFile]
[lexical goals InputElementFile or InputElementGrammar]
InputElementFile ::
WhiteSpace
LineTerminatorSequence
Comment
Abbreviation
ProductionName
ProductionPunctuator
GrammarStart
GrammarEnd
InputElementGrammar ::
WhiteSpace
LineTerminatorSequence
Abbreviation
ProductionName
GrammarLiteral
ArgumentNumber
SourceCharacter ::
<ANY>
AsciiSymbol :: one of
! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~
AsciiUpperCase :: one of
A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
AsciiLowerCase :: one of
a b c d e f g h i j k l m n o p q r s t u v w x y z
DecimalDigit :: one of
0 1 2 3 4 5 6 7 8 9
NonZeroDecimalDigit :: one of
1 2 3 4 5 6 7 8 9
HexDigit :: one of
0 1 2 3 4 5 6 7 8 9 a A b B c C d D e E f F
WhiteSpace ::
<TAB>
<SP>
LineTerminator ::
<LF>
<CR>
LineTerminatorSequence ::
<LF>
<CR>
<CR> <LF>
Comment ::
MultiLineComment
SingleLineComment
MultiLineComment ::
/* MultiLineCommentChars[opt] */
MultiLineCommentChars ::
MultiLineCommentChar MultiLineCommentChars[opt]
MultiLineCommentChar ::
LineTerminatorSequence
* [lookahead not /]
SourceCharacter [but not one of * or LineTerminator]
SingleLineComment ::
// SingleLineCommentChars[opt]
SingleLineCommentChars ::
SingleLineCommentChar SingleLineCommentChars[opt]
SingleLineCommentChar ::
SourceCharacter [but not LineTerminator]
Abbreviation ::
< AbbreviationLetters >
AbbreviationLetters ::
AsciiUpperCase
AbbreviationLetters AsciiUpperCase
ProductionName ::
AsciiUpperCase [lookahead not in AsciiSymbol]
ProductionName ProductionNamePart
ProductionNamePart ::
AsciiUpperCase
AsciiLowerCase
ProductionPunctuator :: one of
: ::
GrammarStart ::
[ [but not LiteralFirstSymbol]
GrammarLiteral ::
LiteralChars
LiteralChars[Literal, Production] ::
LiteralFirstChar[?Literal]
LiteralChars[?Literal, ?Production] LiteralNextChar[?Literal, ?Production]
LiteralFirstChar[Literal] ::
LiteralFirstSymbol
[+Literal] AsciiUpperCase
AsciiLowerCase
DecimalDigit
/*
* Literals can only start with a symbol if they are not ambigous with
* the begining of the following grammar constructs:
* [A-Z - production name parameter
* [?A-Z - production name query parameter
* [+A-Z - production name only restriction
* [~A-Z - production name not restriction
* [a-z - special instruction
* <A-Z - abbreviation
* $1-9 - argument number
*
* Recognition of these sequences requires the use of a lexical production
* so that the characters can be specified with spaces between them
*/
LiteralFirstSymbol ::
AsciiSymbol [but not [ or < or $]
[ [lookahead not in GrammarStartLookahead]
< [lookahead not in AsciiUpperCase]
$ [lookahead not in NonZeroDecimalDigit]
GrammarStartLookahead ::
AsciiUpperCase
? AsciiUpperCase
+ AsciiUpperCase
~ AsciiUpperCase
AsciiLowerCase
LiteralNextChar[Literal, Production] ::
AsciiLowerCase
DecimalDigit
[+Literal] AsciiUpperCase
[+Literal] AsciiSymbol
[+Production] AsciiSymbol
UnicodeCharacter ::
U+ HexDigit HexDigit HexDigit HexDigit
ProductionLiteral ::
LiteralChars[Production]
Literal ::
LiteralChars[Literal]
ArgumentNumber ::
$ NonZeroDecimalDigit
JsgFile :
Definitions
Definitions :
Definition
Definitions Definition
Definition :
SpecialInstruction
AbbreviationDefinition
ProductionDefinition
AbbreviationDefinition :
Abbreviation :: [indent] UnicodeCharacter
ProductionDefinition :
LexicalProduction
SyntacticProduction
LexicalProduction :
ParameterizedProductionName :: [indent] ProductionDetail
SyntacticProduction :
ParameterizedProductionName : [indent] ProductionDetail
ProductionDetail :
one of OneOfLiterals
Productions
ParameterizedProductionName[Query] :
ProductionName ProductionParameters[?Query][opt]
ProductionParameters[Query] : [lexical goal InputElementGrammar]
GrammarStart ProductionParameterList[?Query] ]
ProductionParameterList[Query] : [lexical goal InputElementGrammar]
ProductionParameter[?Query]
ProductionParameterList[?Query] , ProductionParameter[?Query]
ProductionParameter[Query] : [lexical goal InputElementGrammar]
ProductionName
[+Query] ? ProductionName
OneOfLiterals :
OneOfLiteral
OneOfLiterals OneOfLiteral
OneOfLiteral ::
Literal
SpecialInstruction
Productions :
Production
Production Production
Production :
ProductionStartItem [indent] ProductionList
ProductionStartItem :
ProductionRestriction
ProductionItem
ProductionRestriction :
GrammarStart IncludeProduction ]
GrammarStart IncludeProduction ]
IncludeProduction : [lexical goal InputElementGrammar]
+ ProductionName
~ ProductionName
ProductionList :
[empty]
ProductionList ProductionItem
ProductionItem :
OptionalParameterizedProductionName
SpecialInstruction
ProductionLiteral
OptionalParameterizedProductionName :
ParameterizedProductionName[Query] OptionalMarker[opt]
OptionalMarker :
GrammarStart Optional ]
Optional : [lexical goal InputElementGrammar]
opt
SpecialInstruction :
GrammarStart CoreSpecialInstruction ]
CoreSpecialInstruction : [lexical goal InputElementGrammar]
EmptyProduction
LookaheadRestriction
ButNotRestriction
ProductionRestriction
GoalProduction
LocationInstruction
ReparseInstruction
EmptyProduction : [lexical goal InputElementGrammar]
empty
LookaheadRestriction : [lexical goal InputElementGrammar]
lookahead not RestrictedTerminal
lookahead not in ProductionName
lookahead not in { RestrictedItemList }
RestrictedTerminal : [lexical goal InputElementGrammar]
Abbreviation
GrammarLiteral
RestrictedItem : [lexical goal InputElementGrammar]
RestrictedTerminal RestrictedTerminal[opt]
ProductionName
RestrictedItemList : [lexical goal InputElementGrammar]
RestrictedItem
RestrictedItemList , RestrictedItem
RestrictedItemOrList : [lexical goal InputElementGrammar]
RestrictedItem or RestrictedItem
RestrictedItemOrList or RestrictedItem
ButNotRestriction : [lexical goal InputElementGrammar]
but not RestrictedItem
but not one of RestrictedItemOrList
ProductionRestriction : [lexical goal InputElementGrammar]
no ProductionName here
GoalProduction : [lexical goal InputElementGrammar]
lexical goal ProductionName
lexical goals ProductionNameOrList
syntactic goals ProductionNameOrList
if syntactic goal ProductionName
ProductionNameOrList : [lexical goal InputElementGrammar]
ProductionName
ProductionNameOrList or ProductionName
LocationInstruction : [lexical goal InputElementGrammar]
before
after
indent
ReparseInstruction : [lexical goal InputElementGrammar]
reparse ArgumentNumber[opt] using ParameterizedProductionName[Query]
reparse or error