diff --git a/grammar/import.js b/grammar/import.js index 7826fae1..f0bce8bb 100644 --- a/grammar/import.js +++ b/grammar/import.js @@ -18,10 +18,7 @@ module.exports = { $.variable, seq( optional($.namespace), - choice( - $._tyconid, - $._sym_prefix, - ), + choice($._tyconid, $._sym_prefix), optional($.import_con_names), ), ), diff --git a/src/scanner.c b/src/scanner.c index 1947f001..85647cf5 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -1419,15 +1419,30 @@ static bool seq(Env *env, const char *restrict s) { } /** - * Advance until the next newline or EOF, used to consume the body of a cpp directive or comment. - * Escaped newlines are treated as line continuations. + * Advance until the next newline or EOF, used to consume the body of a comment. */ static void take_line(Env *env) { + while (not_eof(env) && !is_newline(PEEK)) S_ADVANCE; +} + +static bool is_space_or_tab(int32_t c) { + return c == ' ' || c == '\t'; +} + +/** + * Advance until the next newline or EOF, used to consume the body of a cpp directive. + * Escaped newlines are treated as line continuations, which allow spaces and tabs between backslash and newline. + */ +static void take_line_escaped_newline(Env *env) { for (;;) { while (not_eof(env) && !is_newline(PEEK) && PEEK != '\\') S_ADVANCE; if (PEEK == '\\') { S_ADVANCE; - S_ADVANCE; + if (is_space_or_tab(PEEK)) { + while (is_space_or_tab(PEEK)) S_ADVANCE; + if (is_newline(PEEK)) S_ADVANCE; + } + else S_ADVANCE; } else return; } @@ -2374,7 +2389,7 @@ static Lexed lex(Env *env, bool bol) { static Symbol cpp_else(Env *env, bool emit) { uint32_t nesting = 1; do { - take_line(env); + take_line_escaped_newline(env); if (emit) MARK("cpp_else"); S_ADVANCE; reset_lookahead(env); @@ -2395,7 +2410,7 @@ static Symbol cpp_else(Env *env, bool emit) { } static Symbol cpp_line(Env *env) { - take_line(env); + take_line_escaped_newline(env); return finish_marked(env, CPP, "cpp"); } @@ -2869,10 +2884,10 @@ static void newline_lookahead(Env *env) { break; case LCppElse: cpp_else(env, false); - take_line(env); + take_line_escaped_newline(env); break; case LCpp: - take_line(env); + take_line_escaped_newline(env); break; default: return; diff --git a/test/corpus/comment.txt b/test/corpus/comment.txt index 3847494a..2b27ca94 100644 --- a/test/corpus/comment.txt +++ b/test/corpus/comment.txt @@ -221,25 +221,6 @@ a = a (variable))) (comment))) -================================================================================ -comment: escaped newline -================================================================================ - -a = a - --- a \ -a = a - --------------------------------------------------------------------------------- - -(haskell - (declarations - (bind - (variable) - (exp_name - (variable))) - (comment))) - ================================================================================ comment: haddock ================================================================================ diff --git a/test/corpus/cpp.txt b/test/corpus/cpp.txt index dc5b3f46..3d190312 100644 --- a/test/corpus/cpp.txt +++ b/test/corpus/cpp.txt @@ -638,3 +638,25 @@ a = do (exp_statement (exp_name (variable))))))) + +================================================================================ +cpp: newline continuation +================================================================================ + +#if \ +1 +#endif + +-- this one has trailing whitespace, which is valid +#if \ +1 +#endif + +-------------------------------------------------------------------------------- + +(haskell + (cpp) + (cpp) + (comment) + (cpp) + (cpp))