Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[move-compiler] Improved declaration handling after a parse error #20498

Merged
merged 8 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"publisher": "mysten",
"icon": "images/move.png",
"license": "Apache-2.0",
"version": "1.0.13",
"version": "1.0.14",
"preview": true,
"repository": {
"url": "https://github.com/MystenLabs/sui.git",
Expand Down
104 changes: 60 additions & 44 deletions external-crates/move/crates/move-compiler/src/parser/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ pub struct Lexer<'input> {
cur_start: usize,
cur_end: usize,
token: Tok,
preceded_by_eol: bool, // last token was preceded by end-of-line
}

impl<'input> Lexer<'input> {
Expand All @@ -208,6 +209,7 @@ impl<'input> Lexer<'input> {
cur_start: 0,
cur_end: 0,
token: Tok::EOF,
preceded_by_eol: false,
}
}

Expand Down Expand Up @@ -259,6 +261,10 @@ impl<'input> Lexer<'input> {
self.edition
}

pub fn last_token_preceded_by_eol(&self) -> bool {
self.preceded_by_eol
}

/// Strips line and block comments from input source, and collects documentation comments,
/// putting them into a map indexed by the span of the comment region. Comments in the original
/// source will be replaced by spaces, such that positions of source items stay unchanged.
Expand All @@ -272,7 +278,8 @@ impl<'input> Lexer<'input> {
fn trim_whitespace_and_comments(
&mut self,
offset: usize,
) -> Result<&'input str, Box<Diagnostic>> {
) -> Result<(&'input str, bool), Box<Diagnostic>> {
let mut trimmed_preceding_eol;
let mut text = &self.text[offset..];

// A helper function to compute the index of the start of the given substring.
Expand All @@ -283,7 +290,7 @@ impl<'input> Lexer<'input> {
// a multi-line or single-line comment.
loop {
// Trim the start whitespace characters.
text = trim_start_whitespace(text);
(text, trimmed_preceding_eol) = trim_start_whitespace(text);

if text.starts_with("/*") {
// Continue the loop immediately after the multi-line comment.
Expand Down Expand Up @@ -311,7 +318,7 @@ impl<'input> Lexer<'input> {
}
break;
}
Ok(text)
Ok((text, trimmed_preceding_eol))
}

fn parse_block_comment(&mut self, offset: usize) -> Result<&'input str, Box<Diagnostic>> {
Expand Down Expand Up @@ -409,7 +416,7 @@ impl<'input> Lexer<'input> {
// Look ahead to the next token after the current one and return it, and its starting offset,
// without advancing the state of the lexer.
pub fn lookahead(&mut self) -> Result<Tok, Box<Diagnostic>> {
let text = self.trim_whitespace_and_comments(self.cur_end)?;
let (text, _) = self.trim_whitespace_and_comments(self.cur_end)?;
let next_start = self.text.len() - text.len();
let (result, _) = find_token(
/* panic_mode */ false,
Expand All @@ -425,7 +432,7 @@ impl<'input> Lexer<'input> {
// Look ahead to the next two tokens after the current one and return them without advancing
// the state of the lexer.
pub fn lookahead2(&mut self) -> Result<(Tok, Tok), Box<Diagnostic>> {
let text = self.trim_whitespace_and_comments(self.cur_end)?;
let (text, _) = self.trim_whitespace_and_comments(self.cur_end)?;
let offset = self.text.len() - text.len();
let (result, length) = find_token(
/* panic_mode */ false,
Expand All @@ -435,7 +442,7 @@ impl<'input> Lexer<'input> {
offset,
);
let first = result.map_err(|diag_opt| diag_opt.unwrap())?;
let text2 = self.trim_whitespace_and_comments(offset + length)?;
let (text2, _) = self.trim_whitespace_and_comments(offset + length)?;
let offset2 = self.text.len() - text2.len();
let (result2, _) = find_token(
/* panic_mode */ false,
Expand Down Expand Up @@ -528,7 +535,7 @@ impl<'input> Lexer<'input> {
let token = loop {
let mut cur_end = self.cur_end;
// loop until the next text snippet which may contain a valid token is found)
let text = loop {
let (text, trimmed_preceding_eol) = loop {
match self.trim_whitespace_and_comments(cur_end) {
Ok(t) => break t,
Err(diag) => {
Expand All @@ -542,6 +549,7 @@ impl<'input> Lexer<'input> {
}
};
};
self.preceded_by_eol = trimmed_preceding_eol;
let new_start = self.text.len() - text.len();
// panic_mode determines if a diag should be actually recorded in find_token (so that
// only first one is recorded)
Expand Down Expand Up @@ -959,19 +967,27 @@ fn get_name_token(edition: Edition, name: &str) -> Tok {
}

// Trim the start whitespace characters, include: space, tab, lf(\n) and crlf(\r\n).
fn trim_start_whitespace(text: &str) -> &str {
fn trim_start_whitespace(text: &str) -> (&str, bool) {
let mut trimmed_eof = false;
let mut pos = 0;
let mut iter = text.chars();

while let Some(chr) = iter.next() {
match chr {
' ' | '\t' | '\n' => pos += 1,
'\r' if matches!(iter.next(), Some('\n')) => pos += 2,
'\n' => {
pos += 1;
trimmed_eof = true;
}
' ' | '\t' => pos += 1,
'\r' if matches!(iter.next(), Some('\n')) => {
pos += 2;
trimmed_eof = true;
}
_ => break,
};
}

&text[pos..]
(&text[pos..], trimmed_eof)
}

#[cfg(test)]
Expand All @@ -980,45 +996,45 @@ mod tests {

#[test]
fn test_trim_start_whitespace() {
assert_eq!(trim_start_whitespace("\r"), "\r");
assert_eq!(trim_start_whitespace("\rxxx"), "\rxxx");
assert_eq!(trim_start_whitespace("\t\rxxx"), "\rxxx");
assert_eq!(trim_start_whitespace("\r\n\rxxx"), "\rxxx");
assert_eq!(trim_start_whitespace("\r").0, "\r");
assert_eq!(trim_start_whitespace("\rxxx").0, "\rxxx");
assert_eq!(trim_start_whitespace("\t\rxxx").0, "\rxxx");
assert_eq!(trim_start_whitespace("\r\n\rxxx").0, "\rxxx");

assert_eq!(trim_start_whitespace("\n"), "");
assert_eq!(trim_start_whitespace("\r\n"), "");
assert_eq!(trim_start_whitespace("\t"), "");
assert_eq!(trim_start_whitespace(" "), "");
assert_eq!(trim_start_whitespace("\n").0, "");
assert_eq!(trim_start_whitespace("\r\n").0, "");
assert_eq!(trim_start_whitespace("\t").0, "");
assert_eq!(trim_start_whitespace(" ").0, "");

assert_eq!(trim_start_whitespace("\nxxx"), "xxx");
assert_eq!(trim_start_whitespace("\r\nxxx"), "xxx");
assert_eq!(trim_start_whitespace("\txxx"), "xxx");
assert_eq!(trim_start_whitespace(" xxx"), "xxx");
assert_eq!(trim_start_whitespace("\nxxx").0, "xxx");
assert_eq!(trim_start_whitespace("\r\nxxx").0, "xxx");
assert_eq!(trim_start_whitespace("\txxx").0, "xxx");
assert_eq!(trim_start_whitespace(" xxx").0, "xxx");

assert_eq!(trim_start_whitespace(" \r\n"), "");
assert_eq!(trim_start_whitespace("\t\r\n"), "");
assert_eq!(trim_start_whitespace("\n\r\n"), "");
assert_eq!(trim_start_whitespace("\r\n "), "");
assert_eq!(trim_start_whitespace("\r\n\t"), "");
assert_eq!(trim_start_whitespace("\r\n\n"), "");
assert_eq!(trim_start_whitespace(" \r\n").0, "");
assert_eq!(trim_start_whitespace("\t\r\n").0, "");
assert_eq!(trim_start_whitespace("\n\r\n").0, "");
assert_eq!(trim_start_whitespace("\r\n ").0, "");
assert_eq!(trim_start_whitespace("\r\n\t").0, "");
assert_eq!(trim_start_whitespace("\r\n\n").0, "");

assert_eq!(trim_start_whitespace(" \r\nxxx"), "xxx");
assert_eq!(trim_start_whitespace("\t\r\nxxx"), "xxx");
assert_eq!(trim_start_whitespace("\n\r\nxxx"), "xxx");
assert_eq!(trim_start_whitespace("\r\n xxx"), "xxx");
assert_eq!(trim_start_whitespace("\r\n\txxx"), "xxx");
assert_eq!(trim_start_whitespace("\r\n\nxxx"), "xxx");
assert_eq!(trim_start_whitespace(" \r\nxxx").0, "xxx");
assert_eq!(trim_start_whitespace("\t\r\nxxx").0, "xxx");
assert_eq!(trim_start_whitespace("\n\r\nxxx").0, "xxx");
assert_eq!(trim_start_whitespace("\r\n xxx").0, "xxx");
assert_eq!(trim_start_whitespace("\r\n\txxx").0, "xxx");
assert_eq!(trim_start_whitespace("\r\n\nxxx").0, "xxx");

assert_eq!(trim_start_whitespace(" \r\n\r\n"), "");
assert_eq!(trim_start_whitespace("\r\n \t\n"), "");
assert_eq!(trim_start_whitespace(" \r\n\r\n").0, "");
assert_eq!(trim_start_whitespace("\r\n \t\n").0, "");

assert_eq!(trim_start_whitespace(" \r\n\r\nxxx"), "xxx");
assert_eq!(trim_start_whitespace("\r\n \t\nxxx"), "xxx");
assert_eq!(trim_start_whitespace(" \r\n\r\nxxx").0, "xxx");
assert_eq!(trim_start_whitespace("\r\n \t\nxxx").0, "xxx");

assert_eq!(trim_start_whitespace(" \r\n\r\nxxx\n"), "xxx\n");
assert_eq!(trim_start_whitespace("\r\n \t\nxxx\r\n"), "xxx\r\n");
assert_eq!(trim_start_whitespace("\r\n\u{A0}\n"), "\u{A0}\n");
assert_eq!(trim_start_whitespace("\r\n\u{A0}\n"), "\u{A0}\n");
assert_eq!(trim_start_whitespace("\t \u{0085}\n"), "\u{0085}\n")
assert_eq!(trim_start_whitespace(" \r\n\r\nxxx\n").0, "xxx\n");
assert_eq!(trim_start_whitespace("\r\n \t\nxxx\r\n").0, "xxx\r\n");
assert_eq!(trim_start_whitespace("\r\n\u{A0}\n").0, "\u{A0}\n");
assert_eq!(trim_start_whitespace("\r\n\u{A0}\n").0, "\u{A0}\n");
assert_eq!(trim_start_whitespace("\t \u{0085}\n").0, "\u{0085}\n")
}
}
55 changes: 54 additions & 1 deletion external-crates/move/crates/move-compiler/src/parser/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1503,6 +1503,45 @@ fn parse_sequence_item(context: &mut Context) -> Result<SequenceItem, Box<Diagno
))
}

// Checks if parsing of a sequence should continue after encountering an error.
fn continue_sequence_after_error(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: should_continue_sequence_parsing or similar?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed to should_continue_sequence_after_error

context: &mut Context,
diag: Diagnostic,
last_token_preceded_by_eol: bool,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Small nit: since we already take context, we do not strictly need to pass this as well. And it would be weird to pass anything other than context.tokens.last_token_preceded_by_eol().

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this wasn't very smart - I simply slapped another parameter when updating the previous version without thinking too much. Thanks! And fixed!

) -> bool {
context.add_diag(diag);
// This is intended to handle a rather specific case when a valid sequence item is on the following line
// from the parsing error. This is particularly useful for the IDE use case when a programmer starts
// typing an incomplete (and unparsable) line right before the line containing a valid expression.
// In this case, we would like to still report the error but try to avoid dropping the valid expression
// itself, particularly as it might lead to unnecessary cascading errors to appear if this expression
// is a variable declaration as in the example below where we want to avoid `_tmp1` being undefined
// in the following lines.
//
// let v =
// let _tmp1 = 42;
// let _tmp2 = _tmp1 * param;
// let _tmp3 = _tmp1 + param;

if context.at_stop_set() {
// don't continue if we are at the stop set
return false;
}
let tok = context.tokens.peek();
if last_token_preceded_by_eol
&& (SEQ_ITEM_START_SET.contains(tok, context.tokens.content())
// ANY identfier can start a sequence item
|| tok == Tok::Identifier
|| tok == Tok::SyntaxIdentifier
|| tok == Tok::RestrictedIdentifier)
{
// if the last token was preceded by EOL, and it's in the start set for sequence items, continue
// parsing the sequence
return true;
}
false
}

// Parse a sequence:
// Sequence = <UseDecl>* (<SequenceItem> ";")* <Exp>? "}"
//
Expand Down Expand Up @@ -1546,6 +1585,13 @@ fn parse_sequence(context: &mut Context) -> Result<Sequence, Box<Diagnostic>> {
seq.push(item);
last_semicolon_loc = Some(current_token_loc(context.tokens));
if let Err(diag) = consume_token(context.tokens, Tok::Semicolon) {
if continue_sequence_after_error(
context,
diag.as_ref().clone(),
context.tokens.last_token_preceded_by_eol(),
) {
continue;
}
advance_separated_items_error(
context,
Tok::LBrace,
Expand All @@ -1560,10 +1606,17 @@ fn parse_sequence(context: &mut Context) -> Result<Sequence, Box<Diagnostic>> {
}
}
Err(diag) => {
context.stop_set.remove(Tok::Semicolon);
if continue_sequence_after_error(
context,
diag.as_ref().clone(),
context.tokens.last_token_preceded_by_eol(),
) {
continue;
}
let err_exp = sp(context.tokens.current_token_loc(), Exp_::UnresolvedError);
let err_seq_item = SequenceItem_::Seq(Box::new(err_exp));
seq.push(sp(context.tokens.current_token_loc(), err_seq_item));
context.stop_set.remove(Tok::Semicolon);
advance_separated_items_error(
context,
Tok::LBrace,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,13 @@ const EXP_STARTS: &[Tok] = &[

pub static EXP_START_SET: Lazy<TokenSet> = Lazy::new(|| TokenSet::from(EXP_STARTS));

pub static SEQ_ITEM_START_SET: Lazy<TokenSet> = Lazy::new(|| {
let mut token_set = TokenSet::new();
token_set.add_all(EXP_STARTS);
token_set.add(Tok::Let);
token_set
});

const TYPE_STARTS: &[Tok] = &[
Tok::Identifier,
Tok::Amp,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,21 @@ error[E01002]: unexpected token
│ Unexpected 'let'
│ Expected an identifier or a decimal number

error[E01002]: unexpected token
┌─ tests/move_2024/ide_mode/dot_incomplete.move:18:9
18 │ let _tmp5 = _s. // incomplete without `;` (unexpected `}`)
│ ^^^
│ │
│ Unexpected 'let'
│ Expected an identifier or a decimal number

error[E01002]: unexpected token
┌─ tests/move_2024/ide_mode/dot_incomplete.move:19:5
19 │ }
│ ^
│ │
│ Unexpected '}'
│ Expected an identifier or a decimal number

Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,33 @@ error[E01002]: unexpected token
│ Unexpected 'let'
│ Expected an identifier or a decimal number

note[I15001]: IDE dot autocomplete
┌─ tests/move_2024/ide_mode/dot_incomplete.move:17:23
17 │ let _tmp4 = _s.
│ ^ Possible dot names: 'a'

error[E01002]: unexpected token
┌─ tests/move_2024/ide_mode/dot_incomplete.move:18:9
18 │ let _tmp5 = _s. // incomplete without `;` (unexpected `}`)
│ ^^^
│ │
│ Unexpected 'let'
│ Expected an identifier or a decimal number

note[I15001]: IDE dot autocomplete
┌─ tests/move_2024/ide_mode/dot_incomplete.move:18:23
18 │ let _tmp5 = _s. // incomplete without `;` (unexpected `}`)
│ ^ Possible dot names: 'a'

error[E01002]: unexpected token
┌─ tests/move_2024/ide_mode/dot_incomplete.move:19:5
19 │ }
│ ^
│ │
│ Unexpected '}'
│ Expected an identifier or a decimal number

Loading
Loading