From 4356d18e4ab262a6703fa3a901c7cf00e9d27cc7 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov <vadim.petrochenkov@gmail.com> Date: Sat, 22 Feb 2020 16:22:38 +0300 Subject: [PATCH] parser: Cleanup `Parser::bump_with` and its uses --- src/librustc_parse/parser/mod.rs | 178 +++++++++---------------------- src/libsyntax/token.rs | 33 ++++++ 2 files changed, 84 insertions(+), 127 deletions(-) diff --git a/src/librustc_parse/parser/mod.rs b/src/librustc_parse/parser/mod.rs index 937e5e3cd695b..75d4b3750f164 100644 --- a/src/librustc_parse/parser/mod.rs +++ b/src/librustc_parse/parser/mod.rs @@ -601,141 +601,76 @@ impl<'a> Parser<'a> { ) } - /// Expects and consumes a `+`. if `+=` is seen, replaces it with a `=` - /// and continues. If a `+` is not seen, returns `false`. - /// - /// This is used when token-splitting `+=` into `+`. - /// See issue #47856 for an example of when this may occur. - fn eat_plus(&mut self) -> bool { - self.expected_tokens.push(TokenType::Token(token::BinOp(token::Plus))); - match self.token.kind { - token::BinOp(token::Plus) => { - self.bump(); + /// Eats the expected token if it's present possibly breaking + /// compound tokens like multi-character operators in process. + /// Returns `true` if the token was eaten. + fn break_and_eat(&mut self, expected: TokenKind) -> bool { + if self.token.kind == expected { + self.bump(); + return true; + } + match self.token.kind.break_two_token_op() { + Some((first, second)) if first == expected => { + let first_span = self.sess.source_map().start_point(self.token.span); + let second_span = self.token.span.with_lo(first_span.hi()); + self.set_token(Token::new(first, first_span)); + self.bump_with(Token::new(second, second_span)); true } - token::BinOpEq(token::Plus) => { - let start_point = self.sess.source_map().start_point(self.token.span); - self.bump_with(token::Eq, self.token.span.with_lo(start_point.hi())); - true + _ => { + self.expected_tokens.push(TokenType::Token(expected)); + false } - _ => false, } } - /// Expects and consumes an `&`. If `&&` is seen, replaces it with a single - /// `&` and continues. If an `&` is not seen, signals an error. + /// Eats `+` possibly breaking tokens like `+=` in process. + fn eat_plus(&mut self) -> bool { + self.break_and_eat(token::BinOp(token::Plus)) + } + + /// Eats `&` possibly breaking tokens like `&&` in process. + /// Signals an error if `&` is not eaten. fn expect_and(&mut self) -> PResult<'a, ()> { - self.expected_tokens.push(TokenType::Token(token::BinOp(token::And))); - match self.token.kind { - token::BinOp(token::And) => { - self.bump(); - Ok(()) - } - token::AndAnd => { - let start_point = self.sess.source_map().start_point(self.token.span); - Ok(self - .bump_with(token::BinOp(token::And), self.token.span.with_lo(start_point.hi()))) - } - _ => self.unexpected(), - } + if self.break_and_eat(token::BinOp(token::And)) { Ok(()) } else { self.unexpected() } } - /// Expects and consumes an `|`. If `||` is seen, replaces it with a single - /// `|` and continues. If an `|` is not seen, signals an error. + /// Eats `|` possibly breaking tokens like `||` in process. + /// Signals an error if `|` was not eaten. fn expect_or(&mut self) -> PResult<'a, ()> { - self.expected_tokens.push(TokenType::Token(token::BinOp(token::Or))); - match self.token.kind { - token::BinOp(token::Or) => { - self.bump(); - Ok(()) - } - token::OrOr => { - let start_point = self.sess.source_map().start_point(self.token.span); - Ok(self - .bump_with(token::BinOp(token::Or), self.token.span.with_lo(start_point.hi()))) - } - _ => self.unexpected(), - } + if self.break_and_eat(token::BinOp(token::Or)) { Ok(()) } else { self.unexpected() } } - /// Attempts to consume a `<`. If `<<` is seen, replaces it with a single - /// `<` and continue. If `<-` is seen, replaces it with a single `<` - /// and continue. If a `<` is not seen, returns false. - /// - /// This is meant to be used when parsing generics on a path to get the - /// starting token. + /// Eats `<` possibly breaking tokens like `<<` in process. fn eat_lt(&mut self) -> bool { - self.expected_tokens.push(TokenType::Token(token::Lt)); - let ate = match self.token.kind { - token::Lt => { - self.bump(); - true - } - token::BinOp(token::Shl) => { - let start_point = self.sess.source_map().start_point(self.token.span); - self.bump_with(token::Lt, self.token.span.with_lo(start_point.hi())); - true - } - token::LArrow => { - let start_point = self.sess.source_map().start_point(self.token.span); - self.bump_with( - token::BinOp(token::Minus), - self.token.span.with_lo(start_point.hi()), - ); - true - } - _ => false, - }; - + let ate = self.break_and_eat(token::Lt); if ate { // See doc comment for `unmatched_angle_bracket_count`. self.unmatched_angle_bracket_count += 1; self.max_angle_bracket_count += 1; debug!("eat_lt: (increment) count={:?}", self.unmatched_angle_bracket_count); } - ate } + /// Eats `<` possibly breaking tokens like `<<` in process. + /// Signals an error if `<` was not eaten. fn expect_lt(&mut self) -> PResult<'a, ()> { - if !self.eat_lt() { self.unexpected() } else { Ok(()) } + if self.eat_lt() { Ok(()) } else { self.unexpected() } } - /// Expects and consumes a single `>` token. if a `>>` is seen, replaces it - /// with a single `>` and continues. If a `>` is not seen, signals an error. + /// Eats `>` possibly breaking tokens like `>>` in process. + /// Signals an error if `>` was not eaten. fn expect_gt(&mut self) -> PResult<'a, ()> { - self.expected_tokens.push(TokenType::Token(token::Gt)); - let ate = match self.token.kind { - token::Gt => { - self.bump(); - Some(()) - } - token::BinOp(token::Shr) => { - let start_point = self.sess.source_map().start_point(self.token.span); - Some(self.bump_with(token::Gt, self.token.span.with_lo(start_point.hi()))) - } - token::BinOpEq(token::Shr) => { - let start_point = self.sess.source_map().start_point(self.token.span); - Some(self.bump_with(token::Ge, self.token.span.with_lo(start_point.hi()))) - } - token::Ge => { - let start_point = self.sess.source_map().start_point(self.token.span); - Some(self.bump_with(token::Eq, self.token.span.with_lo(start_point.hi()))) - } - _ => None, - }; - - match ate { - Some(_) => { - // See doc comment for `unmatched_angle_bracket_count`. - if self.unmatched_angle_bracket_count > 0 { - self.unmatched_angle_bracket_count -= 1; - debug!("expect_gt: (decrement) count={:?}", self.unmatched_angle_bracket_count); - } - - Ok(()) + if self.break_and_eat(token::Gt) { + // See doc comment for `unmatched_angle_bracket_count`. + if self.unmatched_angle_bracket_count > 0 { + self.unmatched_angle_bracket_count -= 1; + debug!("expect_gt: (decrement) count={:?}", self.unmatched_angle_bracket_count); } - None => self.unexpected(), + Ok(()) + } else { + self.unexpected() } } @@ -903,10 +838,10 @@ impl<'a> Parser<'a> { } } - /// Advance the parser by one token. - pub fn bump(&mut self) { + /// Advance the parser by one token using provided token as the next one. + fn bump_with(&mut self, next_token: Token) { + // Bumping after EOF is a bad sign, usually an infinite loop. if self.prev_token.kind == TokenKind::Eof { - // Bumping after EOF is a bad sign, usually an infinite loop. let msg = "attempted to bump the parser past EOF (may be stuck in a loop)"; self.span_bug(self.token.span, msg); } @@ -914,30 +849,19 @@ impl<'a> Parser<'a> { // Update the current and previous tokens. self.prev_token = self.token.take(); self.unnormalized_prev_token = self.unnormalized_token.take(); - let next_token = self.next_tok(self.unnormalized_prev_token.span); self.set_token(next_token); // Update fields derived from the previous token. self.prev_span = self.unnormalized_prev_token.span; + // Diagnostics. self.expected_tokens.clear(); } - /// Advances the parser using provided token as a next one. Use this when - /// consuming a part of a token. For example a single `<` from `<<`. - /// FIXME: this function sets the previous token data to some semi-nonsensical values - /// which kind of work because they are currently used in very limited ways in practice. - /// Correct token kinds and spans need to be calculated instead. - fn bump_with(&mut self, next: TokenKind, span: Span) { - // Update the current and previous tokens. - self.prev_token = self.token.take(); - self.unnormalized_prev_token = self.unnormalized_token.take(); - self.set_token(Token::new(next, span)); - - // Update fields derived from the previous token. - self.prev_span = self.unnormalized_prev_token.span.with_hi(span.lo()); - - self.expected_tokens.clear(); + /// Advance the parser by one token. + pub fn bump(&mut self) { + let next_token = self.next_tok(self.unnormalized_token.span); + self.bump_with(next_token); } /// Look-ahead `dist` tokens of `self.token` and get access to that token there. diff --git a/src/libsyntax/token.rs b/src/libsyntax/token.rs index 862934300e085..6eeee49881579 100644 --- a/src/libsyntax/token.rs +++ b/src/libsyntax/token.rs @@ -270,6 +270,39 @@ impl TokenKind { Literal(Lit::new(kind, symbol, suffix)) } + // An approximation to proc-macro-style single-character operators used by rustc parser. + // If the operator token can be broken into two tokens, the first of which is single-character, + // then this function performs that operation, otherwise it returns `None`. + pub fn break_two_token_op(&self) -> Option<(TokenKind, TokenKind)> { + Some(match *self { + Le => (Lt, Eq), + EqEq => (Eq, Eq), + Ne => (Not, Eq), + Ge => (Gt, Eq), + AndAnd => (BinOp(And), BinOp(And)), + OrOr => (BinOp(Or), BinOp(Or)), + BinOp(Shl) => (Lt, Lt), + BinOp(Shr) => (Gt, Gt), + BinOpEq(Plus) => (BinOp(Plus), Eq), + BinOpEq(Minus) => (BinOp(Minus), Eq), + BinOpEq(Star) => (BinOp(Star), Eq), + BinOpEq(Slash) => (BinOp(Slash), Eq), + BinOpEq(Percent) => (BinOp(Percent), Eq), + BinOpEq(Caret) => (BinOp(Caret), Eq), + BinOpEq(And) => (BinOp(And), Eq), + BinOpEq(Or) => (BinOp(Or), Eq), + BinOpEq(Shl) => (Lt, Le), + BinOpEq(Shr) => (Gt, Ge), + DotDot => (Dot, Dot), + DotDotDot => (Dot, DotDot), + ModSep => (Colon, Colon), + RArrow => (BinOp(Minus), Gt), + LArrow => (Lt, BinOp(Minus)), + FatArrow => (Eq, Gt), + _ => return None, + }) + } + /// Returns tokens that are likely to be typed accidentally instead of the current token. /// Enables better error recovery when the wrong token is found. pub fn similar_tokens(&self) -> Option<Vec<TokenKind>> {