diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index affb4289cb1b5..d609fa6720502 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -94,16 +94,6 @@ impl TokenTree { TokenTree::Token(Token::new(kind, span)) } - /// Returns the opening delimiter as a token tree. - pub fn open_tt(span: DelimSpan, delim: DelimToken) -> TokenTree { - TokenTree::token(token::OpenDelim(delim), span.open) - } - - /// Returns the closing delimiter as a token tree. - pub fn close_tt(span: DelimSpan, delim: DelimToken) -> TokenTree { - TokenTree::token(token::CloseDelim(delim), span.close) - } - pub fn uninterpolate(self) -> TokenTree { match self { TokenTree::Token(token) => TokenTree::Token(token.uninterpolate().into_owned()), @@ -585,13 +575,20 @@ impl Cursor { Cursor { stream, index: 0 } } + #[inline] pub fn next_with_spacing(&mut self) -> Option { - if self.index < self.stream.len() { + self.stream.0.get(self.index).map(|tree| { self.index += 1; - Some(self.stream.0[self.index - 1].clone()) - } else { - None - } + tree.clone() + }) + } + + #[inline] + pub fn next_with_spacing_ref(&mut self) -> Option<&TreeAndSpacing> { + self.stream.0.get(self.index).map(|tree| { + self.index += 1; + tree + }) } pub fn index(&self) -> usize { diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 5ee9c339bb7aa..02749088c3139 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -100,21 +100,16 @@ rustc_data_structures::static_assert_size!(LazyTokenStreamImpl, 144); impl CreateTokenStream for LazyTokenStreamImpl { fn create_token_stream(&self) -> AttrAnnotatedTokenStream { - // The token produced by the final call to `{,inlined_}next` or - // `{,inlined_}next_desugared` was not actually consumed by the - // callback. The combination of chaining the initial token and using - // `take` produces the desired result - we produce an empty - // `TokenStream` if no calls were made, and omit the final token - // otherwise. + // The token produced by the final call to `{,inlined_}next` was not + // actually consumed by the callback. The combination of chaining the + // initial token and using `take` produces the desired result - we + // produce an empty `TokenStream` if no calls were made, and omit the + // final token otherwise. let mut cursor_snapshot = self.cursor_snapshot.clone(); let tokens = std::iter::once((FlatToken::Token(self.start_token.0.clone()), self.start_token.1)) .chain((0..self.num_calls).map(|_| { - let token = if cursor_snapshot.desugar_doc_comments { - cursor_snapshot.next_desugared() - } else { - cursor_snapshot.next() - }; + let token = cursor_snapshot.next(cursor_snapshot.desugar_doc_comments); (FlatToken::Token(token.0), token.1) })) .take(self.num_calls); diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index cb6be8f412cf5..1686c5873e183 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -123,8 +123,8 @@ pub struct Parser<'a> { pub capture_cfg: bool, restrictions: Restrictions, expected_tokens: Vec, - // Important: This must only be advanced from `next_tok` - // to ensure that `token_cursor.num_next_calls` is updated properly + // Important: This must only be advanced from `bump` to ensure that + // `token_cursor.num_next_calls` is updated properly. token_cursor: TokenCursor, desugar_doc_comments: bool, /// This field is used to keep track of how many left angle brackets we have seen. This is @@ -150,6 +150,11 @@ pub struct Parser<'a> { pub current_closure: Option, } +// This type is used a lot, e.g. it's cloned when matching many declarative macro rules. Make sure +// it doesn't unintentionally get bigger. +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +rustc_data_structures::static_assert_size!(Parser<'_>, 328); + /// Stores span information about a closure. #[derive(Clone)] pub struct ClosureSpans { @@ -203,12 +208,15 @@ impl<'a> Drop for Parser<'a> { #[derive(Clone)] struct TokenCursor { + // The current (innermost) frame. `frame` and `stack` could be combined, + // but it's faster to have them separately to access `frame` directly + // rather than via something like `stack.last().unwrap()` or + // `stack[stack.len() - 1]`. frame: TokenCursorFrame, + // Additional frames that enclose `frame`. stack: Vec, desugar_doc_comments: bool, - // Counts the number of calls to `{,inlined_}next` or - // `{,inlined_}next_desugared`, depending on whether - // `desugar_doc_comments` is set. + // Counts the number of calls to `{,inlined_}next`. num_next_calls: usize, // During parsing, we may sometimes need to 'unglue' a // glued token into two component tokens @@ -238,73 +246,60 @@ struct TokenCursor { struct TokenCursorFrame { delim: token::DelimToken, span: DelimSpan, - open_delim: bool, tree_cursor: tokenstream::Cursor, - close_delim: bool, } impl TokenCursorFrame { fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream) -> Self { - TokenCursorFrame { - delim, - span, - open_delim: false, - tree_cursor: tts.into_trees(), - close_delim: false, - } + TokenCursorFrame { delim, span, tree_cursor: tts.into_trees() } } } impl TokenCursor { - fn next(&mut self) -> (Token, Spacing) { - self.inlined_next() + fn next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { + self.inlined_next(desugar_doc_comments) } /// This always-inlined version should only be used on hot code paths. #[inline(always)] - fn inlined_next(&mut self) -> (Token, Spacing) { + fn inlined_next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { loop { - let (tree, spacing) = if !self.frame.open_delim { - self.frame.open_delim = true; - TokenTree::open_tt(self.frame.span, self.frame.delim).into() - } else if let Some(tree) = self.frame.tree_cursor.next_with_spacing() { - tree - } else if !self.frame.close_delim { - self.frame.close_delim = true; - TokenTree::close_tt(self.frame.span, self.frame.delim).into() + // FIXME: we currently don't return `NoDelim` open/close delims. To fix #67062 we will + // need to, whereupon the `delim != DelimToken::NoDelim` conditions below can be + // removed, as well as the loop. + if let Some((tree, spacing)) = self.frame.tree_cursor.next_with_spacing_ref() { + match tree { + &TokenTree::Token(ref token) => match (desugar_doc_comments, token) { + (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => { + return self.desugar(attr_style, data, span); + } + _ => return (token.clone(), *spacing), + }, + &TokenTree::Delimited(sp, delim, ref tts) => { + // Set `open_delim` to true here because we deal with it immediately. + let frame = TokenCursorFrame::new(sp, delim, tts.clone()); + self.stack.push(mem::replace(&mut self.frame, frame)); + if delim != DelimToken::NoDelim { + return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone); + } + // No open delimeter to return; continue on to the next iteration. + } + }; } else if let Some(frame) = self.stack.pop() { + let delim = self.frame.delim; + let span = self.frame.span; self.frame = frame; - continue; - } else { - (TokenTree::Token(Token::new(token::Eof, DUMMY_SP)), Spacing::Alone) - }; - - match tree { - TokenTree::Token(token) => { - return (token, spacing); - } - TokenTree::Delimited(sp, delim, tts) => { - let frame = TokenCursorFrame::new(sp, delim, tts); - self.stack.push(mem::replace(&mut self.frame, frame)); + if delim != DelimToken::NoDelim { + return (Token::new(token::CloseDelim(delim), span.close), Spacing::Alone); } + // No close delimiter to return; continue on to the next iteration. + } else { + return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone); } } } - fn next_desugared(&mut self) -> (Token, Spacing) { - self.inlined_next_desugared() - } - - /// This always-inlined version should only be used on hot code paths. - #[inline(always)] - fn inlined_next_desugared(&mut self) -> (Token, Spacing) { - let (data, attr_style, sp) = match self.inlined_next() { - (Token { kind: token::DocComment(_, attr_style, data), span }, _) => { - (data, attr_style, span) - } - tok => return tok, - }; - + fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> (Token, Spacing) { // Searches for the occurrences of `"#*` and returns the minimum number of `#`s // required to wrap the text. let mut num_of_hashes = 0; @@ -318,14 +313,14 @@ impl TokenCursor { num_of_hashes = cmp::max(num_of_hashes, count); } - let delim_span = DelimSpan::from_single(sp); + let delim_span = DelimSpan::from_single(span); let body = TokenTree::Delimited( delim_span, token::Bracket, [ - TokenTree::token(token::Ident(sym::doc, false), sp), - TokenTree::token(token::Eq, sp), - TokenTree::token(TokenKind::lit(token::StrRaw(num_of_hashes), data, None), sp), + TokenTree::token(token::Ident(sym::doc, false), span), + TokenTree::token(token::Eq, span), + TokenTree::token(TokenKind::lit(token::StrRaw(num_of_hashes), data, None), span), ] .iter() .cloned() @@ -338,12 +333,12 @@ impl TokenCursor { delim_span, token::NoDelim, if attr_style == AttrStyle::Inner { - [TokenTree::token(token::Pound, sp), TokenTree::token(token::Not, sp), body] + [TokenTree::token(token::Pound, span), TokenTree::token(token::Not, span), body] .iter() .cloned() .collect::() } else { - [TokenTree::token(token::Pound, sp), body] + [TokenTree::token(token::Pound, span), body] .iter() .cloned() .collect::() @@ -351,7 +346,7 @@ impl TokenCursor { ), )); - self.next() + self.next(/* desugar_doc_comments */ false) } } @@ -436,9 +431,9 @@ impl<'a> Parser<'a> { desugar_doc_comments: bool, subparser_name: Option<&'static str>, ) -> Self { - let mut start_frame = TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens); - start_frame.open_delim = true; - start_frame.close_delim = true; + // Note: because of the way `TokenCursor::inlined_next` is structured, the `span` and + // `delim` arguments here are never used. + let start_frame = TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens); let mut parser = Parser { sess, @@ -476,33 +471,6 @@ impl<'a> Parser<'a> { parser } - #[inline] - fn next_tok(&mut self, fallback_span: Span) -> (Token, Spacing) { - loop { - let (mut next, spacing) = if self.desugar_doc_comments { - self.token_cursor.inlined_next_desugared() - } else { - self.token_cursor.inlined_next() - }; - self.token_cursor.num_next_calls += 1; - // We've retrieved an token from the underlying - // cursor, so we no longer need to worry about - // an unglued token. See `break_and_eat` for more details - self.token_cursor.break_last_token = false; - if next.span.is_dummy() { - // Tweak the location for better diagnostics, but keep syntactic context intact. - next.span = fallback_span.with_ctxt(next.span.ctxt()); - } - if matches!( - next.kind, - token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim) - ) { - continue; - } - return (next, spacing); - } - } - pub fn unexpected(&mut self) -> PResult<'a, T> { match self.expect_one_of(&[], &[]) { Err(e) => Err(e), @@ -697,7 +665,7 @@ impl<'a> Parser<'a> { // // If we consume any additional tokens, then this token // is not needed (we'll capture the entire 'glued' token), - // and `next_tok` will set this field to `None` + // and `bump` will set this field to `None` self.token_cursor.break_last_token = true; // Use the spacing of the glued token as the spacing // of the unglued second token. @@ -1019,12 +987,6 @@ impl<'a> Parser<'a> { /// This always-inlined version should only be used on hot code paths. #[inline(always)] fn inlined_bump_with(&mut self, (next_token, next_spacing): (Token, Spacing)) { - // Bumping after EOF is a bad sign, usually an infinite loop. - if self.prev_token.kind == TokenKind::Eof { - let msg = "attempted to bump the parser past EOF (may be stuck in a loop)"; - self.span_bug(self.token.span, msg); - } - // Update the current and previous tokens. self.prev_token = mem::replace(&mut self.token, next_token); self.token_spacing = next_spacing; @@ -1035,8 +997,24 @@ impl<'a> Parser<'a> { /// Advance the parser by one token. pub fn bump(&mut self) { - let next_token = self.next_tok(self.token.span); - self.inlined_bump_with(next_token); + // Note: destructuring here would give nicer code, but it was found in #96210 to be slower + // than `.0`/`.1` access. + let mut next = self.token_cursor.inlined_next(self.desugar_doc_comments); + self.token_cursor.num_next_calls += 1; + // We've retrieved an token from the underlying + // cursor, so we no longer need to worry about + // an unglued token. See `break_and_eat` for more details + self.token_cursor.break_last_token = false; + if next.0.span.is_dummy() { + // Tweak the location for better diagnostics, but keep syntactic context intact. + let fallback_span = self.token.span; + next.0.span = fallback_span.with_ctxt(next.0.span.ctxt()); + } + debug_assert!(!matches!( + next.0.kind, + token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim) + )); + self.inlined_bump_with(next) } /// Look-ahead `dist` tokens of `self.token` and get access to that token there. @@ -1069,7 +1047,7 @@ impl<'a> Parser<'a> { let mut i = 0; let mut token = Token::dummy(); while i < dist { - token = cursor.next().0; + token = cursor.next(/* desugar_doc_comments */ false).0; if matches!( token.kind, token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim) @@ -1217,24 +1195,28 @@ impl<'a> Parser<'a> { pub(crate) fn parse_token_tree(&mut self) -> TokenTree { match self.token.kind { token::OpenDelim(..) => { - let depth = self.token_cursor.stack.len(); + // Grab the tokens from this frame. + let frame = &self.token_cursor.frame; + let stream = frame.tree_cursor.stream.clone(); + let span = frame.span; + let delim = frame.delim; - // We keep advancing the token cursor until we hit - // the matching `CloseDelim` token. - while !(depth == self.token_cursor.stack.len() - && matches!(self.token.kind, token::CloseDelim(_))) - { + // Advance the token cursor through the entire delimited + // sequence. After getting the `OpenDelim` we are *within* the + // delimited sequence, i.e. at depth `d`. After getting the + // matching `CloseDelim` we are *after* the delimited sequence, + // i.e. at depth `d - 1`. + let target_depth = self.token_cursor.stack.len() - 1; + loop { // Advance one token at a time, so `TokenCursor::next()` // can capture these tokens if necessary. self.bump(); + if self.token_cursor.stack.len() == target_depth { + debug_assert!(matches!(self.token.kind, token::CloseDelim(_))); + break; + } } - // We are still inside the frame corresponding - // to the delimited stream we captured, so grab - // the tokens from this frame. - let frame = &self.token_cursor.frame; - let stream = frame.tree_cursor.stream.clone(); - let span = frame.span; - let delim = frame.delim; + // Consume close delimiter self.bump(); TokenTree::Delimited(span, delim, stream) diff --git a/compiler/rustc_parse/src/parser/nonterminal.rs b/compiler/rustc_parse/src/parser/nonterminal.rs index b45bca3d2e024..691bfdb01a43e 100644 --- a/compiler/rustc_parse/src/parser/nonterminal.rs +++ b/compiler/rustc_parse/src/parser/nonterminal.rs @@ -11,8 +11,10 @@ use crate::parser::{FollowedByType, ForceCollect, NtOrTt, Parser, PathStyle}; impl<'a> Parser<'a> { /// Checks whether a non-terminal may begin with a particular token. /// - /// Returning `false` is a *stability guarantee* that such a matcher will *never* begin with that - /// token. Be conservative (return true) if not sure. + /// Returning `false` is a *stability guarantee* that such a matcher will *never* begin with + /// that token. Be conservative (return true) if not sure. Inlined because it has a single call + /// site. + #[inline] pub fn nonterminal_may_begin_with(kind: NonterminalKind, token: &Token) -> bool { /// Checks whether the non-terminal may contain a single (non-keyword) identifier. fn may_be_ident(nt: &token::Nonterminal) -> bool { @@ -95,7 +97,9 @@ impl<'a> Parser<'a> { } } - /// Parse a non-terminal (e.g. MBE `:pat` or `:ident`). + /// Parse a non-terminal (e.g. MBE `:pat` or `:ident`). Inlined because there is only one call + /// site. + #[inline] pub fn parse_nonterminal(&mut self, kind: NonterminalKind) -> PResult<'a, NtOrTt> { // Any `Nonterminal` which stores its tokens (currently `NtItem` and `NtExpr`) // needs to have them force-captured here.