Skip to content

Commit

Permalink
WastLexer: log lexing errors directly (#2013)
Browse files Browse the repository at this point in the history
* Log all lexing errors in WastLexer (rather than via parser)

* Update docs/demo/libwabt.js
  • Loading branch information
keithw authored Oct 3, 2022
1 parent a2c0d17 commit 1adcc91
Show file tree
Hide file tree
Showing 12 changed files with 72 additions and 76 deletions.
50 changes: 14 additions & 36 deletions docs/demo/libwabt.js

Large diffs are not rendered by default.

18 changes: 12 additions & 6 deletions include/wabt/wast-lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <memory>

#include "wabt/common.h"
#include "wabt/error.h"
#include "wabt/lexer-source-line-finder.h"
#include "wabt/literal.h"
#include "wabt/make-unique.h"
Expand All @@ -32,20 +33,22 @@ namespace wabt {

class ErrorHandler;
class LexerSource;
class WastParser;

class WastLexer {
public:
WABT_DISALLOW_COPY_AND_ASSIGN(WastLexer);

WastLexer(std::unique_ptr<LexerSource> source, std::string_view filename);
WastLexer(std::unique_ptr<LexerSource> source,
std::string_view filename,
Errors*);

// Convenience functions.
static std::unique_ptr<WastLexer> CreateBufferLexer(std::string_view filename,
const void* data,
size_t size);
size_t size,
Errors*);

Token GetToken(WastParser* parser);
Token GetToken();

// TODO(binji): Move this out of the lexer.
std::unique_ptr<LexerSourceLineFinder> MakeLineFinder() {
Expand All @@ -68,7 +71,7 @@ class WastLexer {
bool MatchChar(char);
bool MatchString(std::string_view);
void Newline();
bool ReadBlockComment(WastParser*); // Returns false if EOF.
bool ReadBlockComment(); // Returns false if EOF.
bool ReadLineComment(); // Returns false if EOF.
void ReadWhitespace();

Expand All @@ -87,7 +90,7 @@ class WastLexer {
return ReadReservedChars() == ReservedChars::None;
}
void ReadSign();
Token GetStringToken(WastParser*);
Token GetStringToken();
Token GetNumberToken(TokenType);
Token GetHexNumberToken(TokenType);
Token GetInfToken();
Expand All @@ -105,6 +108,9 @@ class WastLexer {
const char* line_start_;
const char* token_start_;
const char* cursor_;

Errors* errors_;
void WABT_PRINTF_FORMAT(3, 4) Error(Location, const char* format, ...);
};

} // namespace wabt
Expand Down
5 changes: 3 additions & 2 deletions src/emscripten-helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,10 @@ void wabt_destroy_features(wabt::Features* f) {

wabt::WastLexer* wabt_new_wast_buffer_lexer(const char* filename,
const void* data,
size_t size) {
size_t size,
wabt::Errors* errors) {
std::unique_ptr<wabt::WastLexer> lexer =
wabt::WastLexer::CreateBufferLexer(filename, data, size);
wabt::WastLexer::CreateBufferLexer(filename, data, size, errors);
return lexer.release();
}

Expand Down
3 changes: 2 additions & 1 deletion src/test-wast-parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ std::string repeat(std::string s, size_t count) {
}

Errors ParseInvalidModule(std::string text) {
auto lexer = WastLexer::CreateBufferLexer("test", text.c_str(), text.size());
Errors errors;
auto lexer =
WastLexer::CreateBufferLexer("test", text.c_str(), text.size(), &errors);
std::unique_ptr<Module> module;
Features features;
WastParseOptions options(features);
Expand Down
4 changes: 2 additions & 2 deletions src/tools/spectest-interp.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1382,9 +1382,9 @@ wabt::Result CommandRunner::ReadInvalidTextModule(
const std::string& header) {
std::vector<uint8_t> file_data;
wabt::Result result = ReadFile(module_filename, &file_data);
std::unique_ptr<WastLexer> lexer = WastLexer::CreateBufferLexer(
module_filename, file_data.data(), file_data.size());
Errors errors;
std::unique_ptr<WastLexer> lexer = WastLexer::CreateBufferLexer(
module_filename, file_data.data(), file_data.size(), &errors);
if (Succeeded(result)) {
std::unique_ptr<wabt::Module> module;
WastParseOptions options(s_features);
Expand Down
4 changes: 2 additions & 2 deletions src/tools/wast2json.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,13 @@ int ProgramMain(int argc, char** argv) {

std::vector<uint8_t> file_data;
Result result = ReadFile(s_infile, &file_data);
Errors errors;
std::unique_ptr<WastLexer> lexer = WastLexer::CreateBufferLexer(
s_infile, file_data.data(), file_data.size());
s_infile, file_data.data(), file_data.size(), &errors);
if (Failed(result)) {
WABT_FATAL("unable to read file: %s\n", s_infile);
}

Errors errors;
std::unique_ptr<Script> script;
WastParseOptions parse_wast_options(s_features);
result = ParseWastScript(lexer.get(), &script, &errors, &parse_wast_options);
Expand Down
4 changes: 2 additions & 2 deletions src/tools/wat-desugar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,10 @@ int ProgramMain(int argc, char** argv) {
WABT_FATAL("unable to read %s\n", s_infile);
}

Errors errors;
std::unique_ptr<WastLexer> lexer(WastLexer::CreateBufferLexer(
s_infile, file_data.data(), file_data.size()));
s_infile, file_data.data(), file_data.size(), &errors));

Errors errors;
std::unique_ptr<Script> script;
WastParseOptions parse_wast_options(s_features);
result = ParseWastScript(lexer.get(), &script, &errors, &parse_wast_options);
Expand Down
4 changes: 2 additions & 2 deletions src/tools/wat2wasm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -133,13 +133,13 @@ int ProgramMain(int argc, char** argv) {

std::vector<uint8_t> file_data;
Result result = ReadFile(s_infile, &file_data);
Errors errors;
std::unique_ptr<WastLexer> lexer = WastLexer::CreateBufferLexer(
s_infile, file_data.data(), file_data.size());
s_infile, file_data.data(), file_data.size(), &errors);
if (Failed(result)) {
WABT_FATAL("unable to read file: %s\n", s_infile);
}

Errors errors;
std::unique_ptr<Module> module;
WastParseOptions parse_wast_options(s_features);
result = ParseWatModule(lexer.get(), &module, &errors, &parse_wast_options);
Expand Down
13 changes: 7 additions & 6 deletions src/wabt.post.js
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,12 @@ FEATURES.forEach(function(feature) {


/// Lexer
function Lexer(filename, buffer) {
function Lexer(filename, buffer, errors) {
this.filenameObj = allocateCString(filename);
this.bufferObj = allocateBuffer(buffer);
this.addr = Module._wabt_new_wast_buffer_lexer(
this.filenameObj.addr, this.bufferObj.addr, this.bufferObj.size);
this.filenameObj.addr, this.bufferObj.addr, this.bufferObj.size,
errors.addr);
}
Lexer.prototype = Object.create(Object.prototype);

Expand Down Expand Up @@ -159,10 +160,9 @@ OutputBuffer.prototype.destroy = function() {


/// Errors
function Errors(kind, lexer) {
function Errors(kind) {
this.kind = kind;
this.addr = Module._wabt_new_errors();
this.lexer = lexer;
}
Errors.prototype = Object.create(Object.prototype);

Expand Down Expand Up @@ -194,8 +194,9 @@ Errors.prototype.destroy = function() {

/// parseWat
function parseWat(filename, buffer, options) {
var lexer = new Lexer(filename, buffer);
var errors = new Errors('text', lexer);
var errors = new Errors('text');
var lexer = new Lexer(filename, buffer, errors);
errors.lexer = lexer;
var features = new Features(options || {});

try {
Expand Down
34 changes: 20 additions & 14 deletions src/wast-lexer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,8 @@
#include "wabt/config.h"

#include "wabt/lexer-source.h"
#include "wabt/wast-parser.h"

#define ERROR(...) \
if (parser) \
parser->Error(GetLocation(), __VA_ARGS__)
#define ERROR(...) Error(GetLocation(), __VA_ARGS__)

namespace wabt {

Expand All @@ -37,25 +34,29 @@ namespace {
} // namespace

WastLexer::WastLexer(std::unique_ptr<LexerSource> source,
std::string_view filename)
std::string_view filename,
Errors* errors)
: source_(std::move(source)),
filename_(filename),
line_(1),
buffer_(static_cast<const char*>(source_->data())),
buffer_end_(buffer_ + source_->size()),
line_start_(buffer_),
token_start_(buffer_),
cursor_(buffer_) {}
cursor_(buffer_),
errors_(errors) {}

// static
std::unique_ptr<WastLexer> WastLexer::CreateBufferLexer(
std::string_view filename,
const void* data,
size_t size) {
return MakeUnique<WastLexer>(MakeUnique<LexerSource>(data, size), filename);
size_t size,
Errors* errors) {
return MakeUnique<WastLexer>(MakeUnique<LexerSource>(data, size), filename,
errors);
}

Token WastLexer::GetToken(WastParser* parser) {
Token WastLexer::GetToken() {
while (true) {
token_start_ = cursor_;
switch (PeekChar()) {
Expand All @@ -64,7 +65,7 @@ Token WastLexer::GetToken(WastParser* parser) {

case '(':
if (MatchString("(;")) {
if (ReadBlockComment(parser)) {
if (ReadBlockComment()) {
continue;
}
return BareToken(TokenType::Eof);
Expand Down Expand Up @@ -103,7 +104,7 @@ Token WastLexer::GetToken(WastParser* parser) {
continue;

case '"':
return GetStringToken(parser);
return GetStringToken();

case '+':
case '-':
Expand Down Expand Up @@ -234,7 +235,7 @@ void WastLexer::Newline() {
line_start_ = cursor_;
}

bool WastLexer::ReadBlockComment(WastParser* parser) {
bool WastLexer::ReadBlockComment() {
int nesting = 1;
while (true) {
switch (ReadChar()) {
Expand Down Expand Up @@ -294,7 +295,7 @@ void WastLexer::ReadWhitespace() {
}
}

Token WastLexer::GetStringToken(WastParser* parser) {
Token WastLexer::GetStringToken() {
const char* saved_token_start = token_start_;
bool has_error = false;
bool in_string = true;
Expand Down Expand Up @@ -472,7 +473,7 @@ WastLexer::ReservedChars WastLexer::ReadReservedChars() {
ret = ReservedChars::Id;
}
} else if (peek == '"') {
GetStringToken(nullptr);
GetStringToken();
ret = ReservedChars::Some;
} else {
break;
Expand Down Expand Up @@ -608,4 +609,9 @@ Token WastLexer::GetReservedToken() {
return TextToken(TokenType::Reserved);
}

void WastLexer::Error(Location loc, const char* format, ...) {
WABT_SNPRINTF_ALLOCA(buffer, length, format);
errors_->emplace_back(ErrorLevel::Error, loc, buffer);
}

} // namespace wabt
6 changes: 3 additions & 3 deletions src/wast-parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ void WastParser::Error(Location loc, const char* format, ...) {

Token WastParser::GetToken() {
if (tokens_.empty()) {
tokens_.push_back(lexer_->GetToken(this));
tokens_.push_back(lexer_->GetToken());
}
return tokens_.front();
}
Expand All @@ -595,7 +595,7 @@ Location WastParser::GetLocation() {

TokenType WastParser::Peek(size_t n) {
while (tokens_.size() <= n) {
Token cur = lexer_->GetToken(this);
Token cur = lexer_->GetToken();
if (cur.token_type() != TokenType::LparAnn) {
tokens_.push_back(cur);
} else {
Expand All @@ -613,7 +613,7 @@ TokenType WastParser::Peek(size_t n) {
}
int indent = 1;
while (indent > 0) {
cur = lexer_->GetToken(this);
cur = lexer_->GetToken();
switch (cur.token_type()) {
case TokenType::Lpar:
case TokenType::LparAnn:
Expand Down
3 changes: 3 additions & 0 deletions test/spec/tokens.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ out/test/spec/tokens.wast:144: assert_malformed passed:
(data $l"a ")
^^^^^^
out/test/spec/tokens.wast:154: assert_malformed passed:
out/test/spec/tokens/tokens.33.wat:1:7: error: invalid string token
(data $l"a ""b")
^^^^^^
out/test/spec/tokens/tokens.33.wat:1:7: error: unexpected token $l"a ""b", expected ).
(data $l"a ""b")
^^^^^^^^^
Expand Down

0 comments on commit 1adcc91

Please sign in to comment.