Skip to content

Commit

Permalink
[spec/interpreter/test] Align definition of newline with Unicode reco…
Browse files Browse the repository at this point in the history
…mmendation (#1684)
  • Loading branch information
rossberg authored Oct 25, 2023
1 parent b39baf7 commit 43d405f
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 16 deletions.
10 changes: 6 additions & 4 deletions document/core/text/lexical.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@ The allowed formatting characters correspond to a subset of the |ASCII|_ *format
\production{white space} & \Tspace &::=&
(\text{~~} ~|~ \Tformat ~|~ \Tcomment)^\ast \\
\production{format} & \Tformat &::=&
\unicode{09} ~|~ \unicode{0A} ~|~ \unicode{0D} \\
\Tnewline ~|~ \unicode{09} \\
\production{newline} & \Tnewline &::=&
\unicode{0A} ~|~ \unicode{0D} ~|~ \unicode{0D}~\unicode{0A} \\
\end{array}
The only relevance of white space is to separate :ref:`tokens <text-token>`. It is otherwise ignored.
Expand All @@ -107,13 +109,13 @@ Block comments can be nested.
\production{comment} & \Tcomment &::=&
\Tlinecomment ~|~ \Tblockcomment \\
\production{line comment} & \Tlinecomment &::=&
\Tcommentd~~\Tlinechar^\ast~~(\unicode{0A} ~|~ \T{eof}) \\
\Tcommentd~~\Tlinechar^\ast~~(\Tnewline ~|~ \T{eof}) \\
\production{line character} & \Tlinechar &::=&
c{:}\Tchar & (\iff c \neq \unicode{0A}) \\
c{:}\Tchar & (\iff c \neq \unicode{0A} \land c \neq \unicode{0D}) \\
\production{block comment} & \Tblockcomment &::=&
\Tcommentl~~\Tblockchar^\ast~~\Tcommentr \\
\production{block character} & \Tblockchar &::=&
c{:}\Tchar & (\iff c \neq \text{;} \wedge c \neq \text{(}) \\ &&|&
c{:}\Tchar & (\iff c \neq \text{;} \land c \neq \text{(}) \\ &&|&
\text{;} & (\iff~\mbox{the next character is not}~\text{)}) \\ &&|&
\text{(} & (\iff~\mbox{the next character is not}~\text{;}) \\ &&|&
\Tblockcomment \\
Expand Down
1 change: 1 addition & 0 deletions document/core/util/macros.def
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,7 @@
.. |Tchar| mathdef:: \xref{text/lexical}{text-char}{\T{char}}
.. |Tspace| mathdef:: \xref{text/lexical}{text-space}{\T{space}}
.. |Tformat| mathdef:: \xref{text/lexical}{text-format}{\T{format}}
.. |Tnewline| mathdef:: \xref{text/lexical}{text-newline}{\T{newline}}

.. |Ttoken| mathdef:: \xref{text/lexical}{text-token}{\T{token}}
.. |Tkeyword| mathdef:: \xref{text/lexical}{text-keyword}{\T{keyword}}
Expand Down
2 changes: 1 addition & 1 deletion interpreter/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ zip: $(ZIP)

# Building

.PHONY: $(NAME) $(JSLIB)
.PHONY: $(NAME) $(JSLIB)

$(NAME):
rm -f $@
Expand Down
24 changes: 13 additions & 11 deletions interpreter/text/lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ let string s =
while !i < String.length s - 1 do
let c = if s.[!i] <> '\\' then s.[!i] else
match (incr i; s.[!i]) with
| 'n' -> '\n'
| 'r' -> '\r'
| 't' -> '\t'
| 'n' -> '\x0a'
| 'r' -> '\x0d'
| 't' -> '\x09'
| '\\' -> '\\'
| '\'' -> '\''
| '\"' -> '\"'
Expand Down Expand Up @@ -61,10 +61,12 @@ let letter = ['a'-'z''A'-'Z']
let symbol =
['+''-''*''/''\\''^''~''=''<''>''!''?''@''#''$''%''&''|'':''`''.''\'']

let space = [' ''\t''\n''\r']
let ascii_newline = ['\x0a''\x0d']
let newline = ascii_newline | "\x0a\x0d"
let space = [' ''\x09''\x0a''\x0d']
let control = ['\x00'-'\x1f'] # space
let ascii = ['\x00'-'\x7f']
let ascii_no_nl = ascii # '\x0a'
let ascii_no_nl = ascii # ascii_newline
let utf8cont = ['\x80'-'\xbf']
let utf8enc =
['\xc2'-'\xdf'] utf8cont
Expand Down Expand Up @@ -127,8 +129,8 @@ rule token = parse
| float as s { FLOAT s }

| string as s { STRING (string s) }
| '"'character*('\n'|eof) { error lexbuf "unclosed string literal" }
| '"'character*['\x00'-'\x09''\x0b'-'\x1f''\x7f']
| '"'character*(newline|eof) { error lexbuf "unclosed string literal" }
| '"'character*(control#ascii_newline)
{ error lexbuf "illegal control character in string literal" }
| '"'character*'\\'_
{ error_nest (Lexing.lexeme_end_p lexbuf) lexbuf "illegal escape" }
Expand Down Expand Up @@ -698,11 +700,11 @@ rule token = parse
| id as s { VAR s }

| ";;"utf8_no_nl*eof { EOF }
| ";;"utf8_no_nl*'\n' { Lexing.new_line lexbuf; token lexbuf }
| ";;"utf8_no_nl*newline { Lexing.new_line lexbuf; token lexbuf }
| ";;"utf8_no_nl* { token lexbuf (* causes error on following position *) }
| "(;" { comment (Lexing.lexeme_start_p lexbuf) lexbuf; token lexbuf }
| space#'\n' { token lexbuf }
| '\n' { Lexing.new_line lexbuf; token lexbuf }
| space#ascii_newline { token lexbuf }
| newline { Lexing.new_line lexbuf; token lexbuf }
| eof { EOF }

| reserved { unknown lexbuf }
Expand All @@ -713,7 +715,7 @@ rule token = parse
and comment start = parse
| ";)" { () }
| "(;" { comment (Lexing.lexeme_start_p lexbuf) lexbuf; comment start lexbuf }
| '\n' { Lexing.new_line lexbuf; comment start lexbuf }
| newline { Lexing.new_line lexbuf; comment start lexbuf }
| utf8_no_nl { comment start lexbuf }
| eof { error_nest start lexbuf "unclosed comment" }
| _ { error lexbuf "malformed UTF-8 encoding" }
Binary file modified test/core/comments.wast
Binary file not shown.

0 comments on commit 43d405f

Please sign in to comment.