From a2c0d17402252fb681555da1cba7ab1b6cfe5e10 Mon Sep 17 00:00:00 2001 From: Keith Winstein Date: Mon, 3 Oct 2022 10:40:39 -0700 Subject: [PATCH] Add tokens test + adjust Wast lexing to match updated spec (#2001) * Update testsuite (adding new tokens.txt test) * Adjust Wast lexing to match updated spec (WebAssembly/spec#1499) --- include/wabt/wast-lexer.h | 12 ++-- src/wast-lexer.cc | 43 +++++++++----- test/spec/tokens.txt | 113 ++++++++++++++++++++++++++++++++++++ test/wasm2c/spec/tokens.txt | 5 ++ 4 files changed, 156 insertions(+), 17 deletions(-) create mode 100644 test/spec/tokens.txt create mode 100644 test/wasm2c/spec/tokens.txt diff --git a/include/wabt/wast-lexer.h b/include/wabt/wast-lexer.h index 385032ca7..3c64fea91 100644 --- a/include/wabt/wast-lexer.h +++ b/include/wabt/wast-lexer.h @@ -54,7 +54,7 @@ class WastLexer { private: static const int kEof = -1; - enum class CharClass { Reserved = 1, Keyword = 2, HexDigit = 4, Digit = 8 }; + enum class CharClass { IdChar = 1, Keyword = 2, HexDigit = 4, Digit = 8 }; Location GetLocation(); std::string_view GetText(size_t offset = 0); @@ -76,12 +76,16 @@ class WastLexer { static bool IsDigit(int c) { return IsCharClass(c, CharClass::Digit); } static bool IsHexDigit(int c) { return IsCharClass(c, CharClass::HexDigit); } static bool IsKeyword(int c) { return IsCharClass(c, CharClass::Keyword); } - static bool IsReserved(int c) { return IsCharClass(c, CharClass::Reserved); } + static bool IsIdChar(int c) { return IsCharClass(c, CharClass::IdChar); } bool ReadNum(); bool ReadHexNum(); - int ReadReservedChars(); - bool NoTrailingReservedChars() { return ReadReservedChars() == 0; } + + enum class ReservedChars { None, Some, Id }; + ReservedChars ReadReservedChars(); + bool NoTrailingReservedChars() { + return ReadReservedChars() == ReservedChars::None; + } void ReadSign(); Token GetStringToken(WastParser*); Token GetNumberToken(TokenType); diff --git a/src/wast-lexer.cc b/src/wast-lexer.cc index a601b75db..7a62e4a0b 100644 --- a/src/wast-lexer.cc +++ b/src/wast-lexer.cc @@ -24,7 +24,9 @@ #include "wabt/lexer-source.h" #include "wabt/wast-parser.h" -#define ERROR(...) parser->Error(GetLocation(), __VA_ARGS__) +#define ERROR(...) \ + if (parser) \ + parser->Error(GetLocation(), __VA_ARGS__) namespace wabt { @@ -165,7 +167,7 @@ Token WastLexer::GetToken(WastParser* parser) { default: if (IsKeyword(PeekChar())) { return GetKeywordToken(); - } else if (IsReserved(PeekChar())) { + } else if (IsIdChar(PeekChar())) { return GetReservedToken(); } else { ReadChar(); @@ -310,6 +312,10 @@ Token WastLexer::GetStringToken(WastParser* parser) { continue; case '"': + if (PeekChar() == '"') { + ERROR("invalid string token"); + has_error = true; + } in_string = false; break; @@ -417,13 +423,13 @@ bool WastLexer::IsCharClass(int c, CharClass bit) { // def IsDigit(c): return Range(c, '0', '9') // def IsHexDigit(c): return IsDigit(c) or Range(c.lower(), 'a', 'f') // def IsKeyword(c): return Range(c, 'a', 'z') - // def IsReserved(c): return Range(c, '!', '~') and c not in '"(),;[]{}' + // def IsIdChar(c): return Range(c, '!', '~') and c not in '"(),;[]{}' // // print ([0] + [ // (8 if IsDigit(c) else 0) | // (4 if IsHexDigit(c) else 0) | // (2 if IsKeyword(c) else 0) | - // (1 if IsReserved(c) else 0) + // (1 if IsIdChar(c) else 0) // for c in map(chr, range(0, 127)) // ]) static const char kCharClasses[257] = { @@ -456,13 +462,23 @@ bool WastLexer::ReadHexNum() { return false; } -int WastLexer::ReadReservedChars() { - int count = 0; - while (IsReserved(PeekChar())) { - ReadChar(); - ++count; +WastLexer::ReservedChars WastLexer::ReadReservedChars() { + ReservedChars ret{ReservedChars::None}; + while (true) { + auto peek = PeekChar(); + if (IsIdChar(peek)) { + ReadChar(); + if (ret == ReservedChars::None) { + ret = ReservedChars::Id; + } + } else if (peek == '"') { + GetStringToken(nullptr); + ret = ReservedChars::Some; + } else { + break; + } } - return count; + return ret; } void WastLexer::ReadSign() { @@ -562,10 +578,11 @@ Token WastLexer::GetNameEqNumToken(std::string_view name, Token WastLexer::GetIdToken() { ReadChar(); - if (NoTrailingReservedChars()) { - return TextToken(TokenType::Reserved); + if (ReadReservedChars() == ReservedChars::Id) { + return TextToken(TokenType::Var); } - return TextToken(TokenType::Var); + + return TextToken(TokenType::Reserved); } Token WastLexer::GetKeywordToken() { diff --git a/test/spec/tokens.txt b/test/spec/tokens.txt new file mode 100644 index 000000000..872f04775 --- /dev/null +++ b/test/spec/tokens.txt @@ -0,0 +1,113 @@ +;;; TOOL: run-interp-spec +;;; STDIN_FILE: third_party/testsuite/tokens.wast +(;; STDOUT ;;; +out/test/spec/tokens.wast:74: assert_malformed passed: + out/test/spec/tokens/tokens.17.wat:1:41: error: unexpected token "0$l", expected a var (e.g. 12 or $foo). + (func (block $l (i32.const 0) (br_table 0$l))) + ^^^ +out/test/spec/tokens.wast:84: assert_malformed passed: + out/test/spec/tokens/tokens.19.wat:1:41: error: undefined label variable "$l0" + (func (block $l (i32.const 0) (br_table $l0))) + ^^^ +out/test/spec/tokens.wast:94: assert_malformed passed: + out/test/spec/tokens/tokens.21.wat:1:41: error: undefined label variable "$l$l" + (func (block $l (i32.const 0) (br_table $l$l))) + ^^^^ +out/test/spec/tokens.wast:114: assert_malformed passed: + out/test/spec/tokens/tokens.25.wat:1:2: error: unexpected token "data"a"", expected a module field or a module. + (data"a") + ^^^^^^^ + out/test/spec/tokens/tokens.25.wat:1:9: error: unexpected token ), expected EOF. + (data"a") + ^ +out/test/spec/tokens.wast:124: assert_malformed passed: + out/test/spec/tokens/tokens.27.wat:1:7: error: unexpected token $l"a", expected ). + (data $l"a") + ^^^^^ +out/test/spec/tokens.wast:134: assert_malformed passed: + out/test/spec/tokens/tokens.29.wat:1:7: error: unexpected token $l" a", expected ). + (data $l" a") + ^^^^^^ +out/test/spec/tokens.wast:144: assert_malformed passed: + out/test/spec/tokens/tokens.31.wat:1:7: error: unexpected token $l"a ", expected ). + (data $l"a ") + ^^^^^^ +out/test/spec/tokens.wast:154: assert_malformed passed: + out/test/spec/tokens/tokens.33.wat:1:7: error: unexpected token $l"a ""b", expected ). + (data $l"a ""b") + ^^^^^^^^^ +out/test/spec/tokens.wast:164: assert_malformed passed: + out/test/spec/tokens/tokens.35.wat:1:7: error: unexpected token $l"", expected ). + (data $l"") + ^^^^^^^^^^ +out/test/spec/tokens.wast:174: assert_malformed passed: + out/test/spec/tokens/tokens.37.wat:1:7: error: unexpected token $l" ", expected ). + (data $l" ") + ^^^^^^^^^^^ +out/test/spec/tokens.wast:184: assert_malformed passed: + out/test/spec/tokens/tokens.39.wat:1:7: error: unexpected token $l" ", expected ). + (data $l" ") + ^^^^^^^^^^^ +out/test/spec/tokens.wast:194: assert_malformed passed: + out/test/spec/tokens/tokens.41.wat:1:7: error: invalid string token + (data "a""b") + ^^^ + out/test/spec/tokens/tokens.41.wat:1:7: error: unexpected token Invalid, expected ). + (data "a""b") + ^^^ +out/test/spec/tokens.wast:204: assert_malformed passed: + out/test/spec/tokens/tokens.43.wat:1:7: error: invalid string token + (data "a"" b") + ^^^ + out/test/spec/tokens/tokens.43.wat:1:7: error: unexpected token Invalid, expected ). + (data "a"" b") + ^^^ +out/test/spec/tokens.wast:214: assert_malformed passed: + out/test/spec/tokens/tokens.45.wat:1:7: error: invalid string token + (data "a ""b") + ^^^^ + out/test/spec/tokens/tokens.45.wat:1:7: error: unexpected token Invalid, expected ). + (data "a ""b") + ^^^^ +out/test/spec/tokens.wast:224: assert_malformed passed: + out/test/spec/tokens/tokens.47.wat:1:7: error: invalid string token + (data """") + ^^^^^^^^ + out/test/spec/tokens/tokens.47.wat:1:7: error: unexpected token Invalid, expected ). + (data """") + ^^^^^^^^ +out/test/spec/tokens.wast:234: assert_malformed passed: + out/test/spec/tokens/tokens.49.wat:1:7: error: invalid string token + (data """ ") + ^^^^^^^^ + out/test/spec/tokens/tokens.49.wat:1:7: error: unexpected token Invalid, expected ). + (data """ ") + ^^^^^^^^ +out/test/spec/tokens.wast:244: assert_malformed passed: + out/test/spec/tokens/tokens.51.wat:1:7: error: invalid string token + (data " """) + ^^^^^^^^^ + out/test/spec/tokens/tokens.51.wat:1:7: error: unexpected token Invalid, expected ). + (data " """) + ^^^^^^^^^ +out/test/spec/tokens.wast:252: assert_malformed passed: + out/test/spec/tokens/tokens.52.wat:1:7: error: unexpected token "a", expected ). + (func "a"x) + ^^^ + out/test/spec/tokens/tokens.52.wat:1:10: error: unexpected token x. + (func "a"x) + ^ +out/test/spec/tokens.wast:258: assert_malformed passed: + out/test/spec/tokens/tokens.53.wat:1:7: error: unexpected token "a", expected ). + (func "a"0) + ^^^ +out/test/spec/tokens.wast:264: assert_malformed passed: + out/test/spec/tokens/tokens.54.wat:1:7: error: unexpected token 0"a", expected ). + (func 0"a") + ^^^^ +out/test/spec/tokens.wast:270: assert_malformed passed: + out/test/spec/tokens/tokens.55.wat:1:7: error: unexpected token "a", expected ). + (func "a"$x) + ^^^ +56/56 tests passed. +;;; STDOUT ;;) diff --git a/test/wasm2c/spec/tokens.txt b/test/wasm2c/spec/tokens.txt new file mode 100644 index 000000000..0e18909c3 --- /dev/null +++ b/test/wasm2c/spec/tokens.txt @@ -0,0 +1,5 @@ +;;; TOOL: run-spec-wasm2c +;;; STDIN_FILE: third_party/testsuite/tokens.wast +(;; STDOUT ;;; +0/0 tests passed. +;;; STDOUT ;;)