From 4356d18e4ab262a6703fa3a901c7cf00e9d27cc7 Mon Sep 17 00:00:00 2001
From: Vadim Petrochenkov <vadim.petrochenkov@gmail.com>
Date: Sat, 22 Feb 2020 16:22:38 +0300
Subject: [PATCH] parser: Cleanup `Parser::bump_with` and its uses

---
 src/librustc_parse/parser/mod.rs | 178 +++++++++----------------------
 src/libsyntax/token.rs           |  33 ++++++
 2 files changed, 84 insertions(+), 127 deletions(-)

diff --git a/src/librustc_parse/parser/mod.rs b/src/librustc_parse/parser/mod.rs
index 937e5e3cd695b..75d4b3750f164 100644
--- a/src/librustc_parse/parser/mod.rs
+++ b/src/librustc_parse/parser/mod.rs
@@ -601,141 +601,76 @@ impl<'a> Parser<'a> {
         )
     }
 
-    /// Expects and consumes a `+`. if `+=` is seen, replaces it with a `=`
-    /// and continues. If a `+` is not seen, returns `false`.
-    ///
-    /// This is used when token-splitting `+=` into `+`.
-    /// See issue #47856 for an example of when this may occur.
-    fn eat_plus(&mut self) -> bool {
-        self.expected_tokens.push(TokenType::Token(token::BinOp(token::Plus)));
-        match self.token.kind {
-            token::BinOp(token::Plus) => {
-                self.bump();
+    /// Eats the expected token if it's present possibly breaking
+    /// compound tokens like multi-character operators in process.
+    /// Returns `true` if the token was eaten.
+    fn break_and_eat(&mut self, expected: TokenKind) -> bool {
+        if self.token.kind == expected {
+            self.bump();
+            return true;
+        }
+        match self.token.kind.break_two_token_op() {
+            Some((first, second)) if first == expected => {
+                let first_span = self.sess.source_map().start_point(self.token.span);
+                let second_span = self.token.span.with_lo(first_span.hi());
+                self.set_token(Token::new(first, first_span));
+                self.bump_with(Token::new(second, second_span));
                 true
             }
-            token::BinOpEq(token::Plus) => {
-                let start_point = self.sess.source_map().start_point(self.token.span);
-                self.bump_with(token::Eq, self.token.span.with_lo(start_point.hi()));
-                true
+            _ => {
+                self.expected_tokens.push(TokenType::Token(expected));
+                false
             }
-            _ => false,
         }
     }
 
-    /// Expects and consumes an `&`. If `&&` is seen, replaces it with a single
-    /// `&` and continues. If an `&` is not seen, signals an error.
+    /// Eats `+` possibly breaking tokens like `+=` in process.
+    fn eat_plus(&mut self) -> bool {
+        self.break_and_eat(token::BinOp(token::Plus))
+    }
+
+    /// Eats `&` possibly breaking tokens like `&&` in process.
+    /// Signals an error if `&` is not eaten.
     fn expect_and(&mut self) -> PResult<'a, ()> {
-        self.expected_tokens.push(TokenType::Token(token::BinOp(token::And)));
-        match self.token.kind {
-            token::BinOp(token::And) => {
-                self.bump();
-                Ok(())
-            }
-            token::AndAnd => {
-                let start_point = self.sess.source_map().start_point(self.token.span);
-                Ok(self
-                    .bump_with(token::BinOp(token::And), self.token.span.with_lo(start_point.hi())))
-            }
-            _ => self.unexpected(),
-        }
+        if self.break_and_eat(token::BinOp(token::And)) { Ok(()) } else { self.unexpected() }
     }
 
-    /// Expects and consumes an `|`. If `||` is seen, replaces it with a single
-    /// `|` and continues. If an `|` is not seen, signals an error.
+    /// Eats `|` possibly breaking tokens like `||` in process.
+    /// Signals an error if `|` was not eaten.
     fn expect_or(&mut self) -> PResult<'a, ()> {
-        self.expected_tokens.push(TokenType::Token(token::BinOp(token::Or)));
-        match self.token.kind {
-            token::BinOp(token::Or) => {
-                self.bump();
-                Ok(())
-            }
-            token::OrOr => {
-                let start_point = self.sess.source_map().start_point(self.token.span);
-                Ok(self
-                    .bump_with(token::BinOp(token::Or), self.token.span.with_lo(start_point.hi())))
-            }
-            _ => self.unexpected(),
-        }
+        if self.break_and_eat(token::BinOp(token::Or)) { Ok(()) } else { self.unexpected() }
     }
 
-    /// Attempts to consume a `<`. If `<<` is seen, replaces it with a single
-    /// `<` and continue. If `<-` is seen, replaces it with a single `<`
-    /// and continue. If a `<` is not seen, returns false.
-    ///
-    /// This is meant to be used when parsing generics on a path to get the
-    /// starting token.
+    /// Eats `<` possibly breaking tokens like `<<` in process.
     fn eat_lt(&mut self) -> bool {
-        self.expected_tokens.push(TokenType::Token(token::Lt));
-        let ate = match self.token.kind {
-            token::Lt => {
-                self.bump();
-                true
-            }
-            token::BinOp(token::Shl) => {
-                let start_point = self.sess.source_map().start_point(self.token.span);
-                self.bump_with(token::Lt, self.token.span.with_lo(start_point.hi()));
-                true
-            }
-            token::LArrow => {
-                let start_point = self.sess.source_map().start_point(self.token.span);
-                self.bump_with(
-                    token::BinOp(token::Minus),
-                    self.token.span.with_lo(start_point.hi()),
-                );
-                true
-            }
-            _ => false,
-        };
-
+        let ate = self.break_and_eat(token::Lt);
         if ate {
             // See doc comment for `unmatched_angle_bracket_count`.
             self.unmatched_angle_bracket_count += 1;
             self.max_angle_bracket_count += 1;
             debug!("eat_lt: (increment) count={:?}", self.unmatched_angle_bracket_count);
         }
-
         ate
     }
 
+    /// Eats `<` possibly breaking tokens like `<<` in process.
+    /// Signals an error if `<` was not eaten.
     fn expect_lt(&mut self) -> PResult<'a, ()> {
-        if !self.eat_lt() { self.unexpected() } else { Ok(()) }
+        if self.eat_lt() { Ok(()) } else { self.unexpected() }
     }
 
-    /// Expects and consumes a single `>` token. if a `>>` is seen, replaces it
-    /// with a single `>` and continues. If a `>` is not seen, signals an error.
+    /// Eats `>` possibly breaking tokens like `>>` in process.
+    /// Signals an error if `>` was not eaten.
     fn expect_gt(&mut self) -> PResult<'a, ()> {
-        self.expected_tokens.push(TokenType::Token(token::Gt));
-        let ate = match self.token.kind {
-            token::Gt => {
-                self.bump();
-                Some(())
-            }
-            token::BinOp(token::Shr) => {
-                let start_point = self.sess.source_map().start_point(self.token.span);
-                Some(self.bump_with(token::Gt, self.token.span.with_lo(start_point.hi())))
-            }
-            token::BinOpEq(token::Shr) => {
-                let start_point = self.sess.source_map().start_point(self.token.span);
-                Some(self.bump_with(token::Ge, self.token.span.with_lo(start_point.hi())))
-            }
-            token::Ge => {
-                let start_point = self.sess.source_map().start_point(self.token.span);
-                Some(self.bump_with(token::Eq, self.token.span.with_lo(start_point.hi())))
-            }
-            _ => None,
-        };
-
-        match ate {
-            Some(_) => {
-                // See doc comment for `unmatched_angle_bracket_count`.
-                if self.unmatched_angle_bracket_count > 0 {
-                    self.unmatched_angle_bracket_count -= 1;
-                    debug!("expect_gt: (decrement) count={:?}", self.unmatched_angle_bracket_count);
-                }
-
-                Ok(())
+        if self.break_and_eat(token::Gt) {
+            // See doc comment for `unmatched_angle_bracket_count`.
+            if self.unmatched_angle_bracket_count > 0 {
+                self.unmatched_angle_bracket_count -= 1;
+                debug!("expect_gt: (decrement) count={:?}", self.unmatched_angle_bracket_count);
             }
-            None => self.unexpected(),
+            Ok(())
+        } else {
+            self.unexpected()
         }
     }
 
@@ -903,10 +838,10 @@ impl<'a> Parser<'a> {
         }
     }
 
-    /// Advance the parser by one token.
-    pub fn bump(&mut self) {
+    /// Advance the parser by one token using provided token as the next one.
+    fn bump_with(&mut self, next_token: Token) {
+        // Bumping after EOF is a bad sign, usually an infinite loop.
         if self.prev_token.kind == TokenKind::Eof {
-            // Bumping after EOF is a bad sign, usually an infinite loop.
             let msg = "attempted to bump the parser past EOF (may be stuck in a loop)";
             self.span_bug(self.token.span, msg);
         }
@@ -914,30 +849,19 @@ impl<'a> Parser<'a> {
         // Update the current and previous tokens.
         self.prev_token = self.token.take();
         self.unnormalized_prev_token = self.unnormalized_token.take();
-        let next_token = self.next_tok(self.unnormalized_prev_token.span);
         self.set_token(next_token);
 
         // Update fields derived from the previous token.
         self.prev_span = self.unnormalized_prev_token.span;
 
+        // Diagnostics.
         self.expected_tokens.clear();
     }
 
-    /// Advances the parser using provided token as a next one. Use this when
-    /// consuming a part of a token. For example a single `<` from `<<`.
-    /// FIXME: this function sets the previous token data to some semi-nonsensical values
-    /// which kind of work because they are currently used in very limited ways in practice.
-    /// Correct token kinds and spans need to be calculated instead.
-    fn bump_with(&mut self, next: TokenKind, span: Span) {
-        // Update the current and previous tokens.
-        self.prev_token = self.token.take();
-        self.unnormalized_prev_token = self.unnormalized_token.take();
-        self.set_token(Token::new(next, span));
-
-        // Update fields derived from the previous token.
-        self.prev_span = self.unnormalized_prev_token.span.with_hi(span.lo());
-
-        self.expected_tokens.clear();
+    /// Advance the parser by one token.
+    pub fn bump(&mut self) {
+        let next_token = self.next_tok(self.unnormalized_token.span);
+        self.bump_with(next_token);
     }
 
     /// Look-ahead `dist` tokens of `self.token` and get access to that token there.
diff --git a/src/libsyntax/token.rs b/src/libsyntax/token.rs
index 862934300e085..6eeee49881579 100644
--- a/src/libsyntax/token.rs
+++ b/src/libsyntax/token.rs
@@ -270,6 +270,39 @@ impl TokenKind {
         Literal(Lit::new(kind, symbol, suffix))
     }
 
+    // An approximation to proc-macro-style single-character operators used by rustc parser.
+    // If the operator token can be broken into two tokens, the first of which is single-character,
+    // then this function performs that operation, otherwise it returns `None`.
+    pub fn break_two_token_op(&self) -> Option<(TokenKind, TokenKind)> {
+        Some(match *self {
+            Le => (Lt, Eq),
+            EqEq => (Eq, Eq),
+            Ne => (Not, Eq),
+            Ge => (Gt, Eq),
+            AndAnd => (BinOp(And), BinOp(And)),
+            OrOr => (BinOp(Or), BinOp(Or)),
+            BinOp(Shl) => (Lt, Lt),
+            BinOp(Shr) => (Gt, Gt),
+            BinOpEq(Plus) => (BinOp(Plus), Eq),
+            BinOpEq(Minus) => (BinOp(Minus), Eq),
+            BinOpEq(Star) => (BinOp(Star), Eq),
+            BinOpEq(Slash) => (BinOp(Slash), Eq),
+            BinOpEq(Percent) => (BinOp(Percent), Eq),
+            BinOpEq(Caret) => (BinOp(Caret), Eq),
+            BinOpEq(And) => (BinOp(And), Eq),
+            BinOpEq(Or) => (BinOp(Or), Eq),
+            BinOpEq(Shl) => (Lt, Le),
+            BinOpEq(Shr) => (Gt, Ge),
+            DotDot => (Dot, Dot),
+            DotDotDot => (Dot, DotDot),
+            ModSep => (Colon, Colon),
+            RArrow => (BinOp(Minus), Gt),
+            LArrow => (Lt, BinOp(Minus)),
+            FatArrow => (Eq, Gt),
+            _ => return None,
+        })
+    }
+
     /// Returns tokens that are likely to be typed accidentally instead of the current token.
     /// Enables better error recovery when the wrong token is found.
     pub fn similar_tokens(&self) -> Option<Vec<TokenKind>> {