diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index 522989c64548b..ffe8b10e6877a 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -106,9 +106,9 @@ rustc_data_structures::static_assert_size!(NamedMatchVec, 48); /// /// This means a matcher can be represented by `&[MatcherLoc]`, and traversal mostly involves /// simply incrementing the current matcher position index by one. -enum MatcherLoc<'tt> { +pub(super) enum MatcherLoc { Token { - token: &'tt Token, + token: Token, }, Delimited, Sequence { @@ -123,7 +123,7 @@ enum MatcherLoc<'tt> { idx_first: usize, }, SequenceSep { - separator: &'tt Token, + separator: Token, }, SequenceKleeneOpAfterSep { idx_first: usize, @@ -131,13 +131,85 @@ enum MatcherLoc<'tt> { MetaVarDecl { span: Span, bind: Ident, - kind: NonterminalKind, + kind: Option, next_metavar: usize, seq_depth: usize, }, Eof, } +pub(super) fn compute_locs(sess: &ParseSess, matcher: &[TokenTree]) -> Vec { + fn inner( + sess: &ParseSess, + tts: &[TokenTree], + locs: &mut Vec, + next_metavar: &mut usize, + seq_depth: usize, + ) { + for tt in tts { + match tt { + TokenTree::Token(token) => { + locs.push(MatcherLoc::Token { token: token.clone() }); + } + TokenTree::Delimited(_, delimited) => { + locs.push(MatcherLoc::Delimited); + inner(sess, &delimited.all_tts, locs, next_metavar, seq_depth); + } + TokenTree::Sequence(_, seq) => { + // We can't determine `idx_first_after` and construct the final + // `MatcherLoc::Sequence` until after `inner()` is called and the sequence end + // pieces are processed. So we push a dummy value (`Eof` is cheapest to + // construct) now, and overwrite it with the proper value below. + let dummy = MatcherLoc::Eof; + locs.push(dummy); + + let next_metavar_orig = *next_metavar; + let op = seq.kleene.op; + let idx_first = locs.len(); + let idx_seq = idx_first - 1; + inner(sess, &seq.tts, locs, next_metavar, seq_depth + 1); + + if let Some(separator) = &seq.separator { + locs.push(MatcherLoc::SequenceSep { separator: separator.clone() }); + locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first }); + } else { + locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first }); + } + + // Overwrite the dummy value pushed above with the proper value. + locs[idx_seq] = MatcherLoc::Sequence { + op, + num_metavar_decls: seq.num_captures, + idx_first_after: locs.len(), + next_metavar: next_metavar_orig, + seq_depth, + }; + } + &TokenTree::MetaVarDecl(span, bind, kind) => { + locs.push(MatcherLoc::MetaVarDecl { + span, + bind, + kind, + next_metavar: *next_metavar, + seq_depth, + }); + *next_metavar += 1; + } + TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(), + } + } + } + + let mut locs = vec![]; + let mut next_metavar = 0; + inner(sess, matcher, &mut locs, &mut next_metavar, /* seq_depth */ 0); + + // A final entry is needed for eof. + locs.push(MatcherLoc::Eof); + + locs +} + /// A single matcher position, representing the state of matching. struct MatcherPos { /// The index into `TtParser::locs`, which represents the "dot". @@ -298,12 +370,9 @@ fn token_name_eq(t1: &Token, t2: &Token) -> bool { // Note: the vectors could be created and dropped within `parse_tt`, but to avoid excess // allocations we have a single vector fo each kind that is cleared and reused repeatedly. -pub struct TtParser<'tt> { +pub struct TtParser { macro_name: Ident, - /// The matcher of the current rule. - locs: Vec>, - /// The set of current mps to be processed. This should be empty by the end of a successful /// execution of `parse_tt_inner`. cur_mps: Vec, @@ -320,11 +389,10 @@ pub struct TtParser<'tt> { empty_matches: Lrc, } -impl<'tt> TtParser<'tt> { - pub(super) fn new(macro_name: Ident) -> TtParser<'tt> { +impl TtParser { + pub(super) fn new(macro_name: Ident) -> TtParser { TtParser { macro_name, - locs: vec![], cur_mps: vec![], next_mps: vec![], bb_mps: vec![], @@ -332,99 +400,6 @@ impl<'tt> TtParser<'tt> { } } - /// Convert a `&[TokenTree]` to a `&[MatcherLoc]`. Note: this conversion happens every time the - /// macro is called, which may be many times if there are many call sites or if it is - /// recursive. This conversion is fairly cheap and the representation is sufficiently better - /// for matching than `&[TokenTree]` that it's a clear performance win even with the overhead. - /// But it might be possible to move the conversion outwards so it only occurs once per macro. - fn compute_locs( - &mut self, - sess: &ParseSess, - matcher: &'tt [TokenTree], - ) -> Result { - fn inner<'tt>( - sess: &ParseSess, - tts: &'tt [TokenTree], - locs: &mut Vec>, - next_metavar: &mut usize, - seq_depth: usize, - ) -> Result<(), (Span, String)> { - for tt in tts { - match tt { - TokenTree::Token(token) => { - locs.push(MatcherLoc::Token { token }); - } - TokenTree::Delimited(_, delimited) => { - locs.push(MatcherLoc::Delimited); - inner(sess, &delimited.all_tts, locs, next_metavar, seq_depth)?; - } - TokenTree::Sequence(_, seq) => { - // We can't determine `idx_first_after` and construct the final - // `MatcherLoc::Sequence` until after `inner()` is called and the sequence - // end pieces are processed. So we push a dummy value (`Eof` is cheapest to - // construct) now, and overwrite it with the proper value below. - let dummy = MatcherLoc::Eof; - locs.push(dummy); - - let next_metavar_orig = *next_metavar; - let op = seq.kleene.op; - let idx_first = locs.len(); - let idx_seq = idx_first - 1; - inner(sess, &seq.tts, locs, next_metavar, seq_depth + 1)?; - - if let Some(separator) = &seq.separator { - locs.push(MatcherLoc::SequenceSep { separator }); - locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first }); - } else { - locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first }); - } - - // Overwrite the dummy value pushed above with the proper value. - locs[idx_seq] = MatcherLoc::Sequence { - op, - num_metavar_decls: seq.num_captures, - idx_first_after: locs.len(), - next_metavar: next_metavar_orig, - seq_depth, - }; - } - &TokenTree::MetaVarDecl(span, bind, kind) => { - if let Some(kind) = kind { - locs.push(MatcherLoc::MetaVarDecl { - span, - bind, - kind, - next_metavar: *next_metavar, - seq_depth, - }); - *next_metavar += 1; - } else if sess - .missing_fragment_specifiers - .borrow_mut() - .remove(&span) - .is_some() - { - // E.g. `$e` instead of `$e:expr`. - return Err((span, "missing fragment specifier".to_string())); - } - } - TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(), - } - } - Ok(()) - } - - self.locs.clear(); - let mut next_metavar = 0; - inner(sess, matcher, &mut self.locs, &mut next_metavar, /* seq_depth */ 0)?; - - // A final entry is needed for eof. - self.locs.push(MatcherLoc::Eof); - - // This is the number of metavar decls. - Ok(next_metavar) - } - /// Process the matcher positions of `cur_mps` until it is empty. In the process, this will /// produce more mps in `next_mps` and `bb_mps`. /// @@ -434,7 +409,8 @@ impl<'tt> TtParser<'tt> { /// track of through the mps generated. fn parse_tt_inner( &mut self, - num_metavar_decls: usize, + sess: &ParseSess, + matcher: &[MatcherLoc], token: &Token, ) -> Option { // Matcher positions that would be valid if the macro invocation was over now. Only @@ -442,7 +418,7 @@ impl<'tt> TtParser<'tt> { let mut eof_mps = EofMatcherPositions::None; while let Some(mut mp) = self.cur_mps.pop() { - match &self.locs[mp.idx] { + match &matcher[mp.idx] { MatcherLoc::Token { token: t } => { // If it's a doc comment, we just ignore it and move on to the next tt in the // matcher. This is a bug, but #95267 showed that existing programs rely on @@ -532,17 +508,25 @@ impl<'tt> TtParser<'tt> { mp.idx = idx_first; self.cur_mps.push(mp); } - MatcherLoc::MetaVarDecl { kind, .. } => { + &MatcherLoc::MetaVarDecl { span, kind, .. } => { // Built-in nonterminals never start with these tokens, so we can eliminate // them from consideration. We use the span of the metavariable declaration // to determine any edition-specific matching behavior for non-terminals. - if Parser::nonterminal_may_begin_with(*kind, token) { - self.bb_mps.push(mp); + if let Some(kind) = kind { + if Parser::nonterminal_may_begin_with(kind, token) { + self.bb_mps.push(mp); + } + } else { + // Both this check and the one in `nameize` are necessary, surprisingly. + if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() { + // E.g. `$e` instead of `$e:expr`. + return Some(Error(span, "missing fragment specifier".to_string())); + } } } MatcherLoc::Eof => { // We are past the matcher's end, and not in a sequence. Try to end things. - debug_assert_eq!(mp.idx, self.locs.len() - 1); + debug_assert_eq!(mp.idx, matcher.len() - 1); if *token == token::Eof { eof_mps = match eof_mps { EofMatcherPositions::None => EofMatcherPositions::One(mp), @@ -560,11 +544,10 @@ impl<'tt> TtParser<'tt> { if *token == token::Eof { Some(match eof_mps { EofMatcherPositions::One(mut eof_mp) => { - assert_eq!(eof_mp.matches.len(), num_metavar_decls); // Need to take ownership of the matches from within the `Lrc`. Lrc::make_mut(&mut eof_mp.matches); let matches = Lrc::try_unwrap(eof_mp.matches).unwrap().into_iter(); - self.nameize(matches) + self.nameize(sess, matcher, matches) } EofMatcherPositions::Multiple => { Error(token.span, "ambiguity: multiple successful parses".to_string()) @@ -586,13 +569,8 @@ impl<'tt> TtParser<'tt> { pub(super) fn parse_tt( &mut self, parser: &mut Cow<'_, Parser<'_>>, - matcher: &'tt [TokenTree], + matcher: &[MatcherLoc], ) -> NamedParseResult { - let num_metavar_decls = match self.compute_locs(parser.sess, matcher) { - Ok(num_metavar_decls) => num_metavar_decls, - Err((span, msg)) => return Error(span, msg), - }; - // A queue of possible matcher positions. We initialize it with the matcher position in // which the "dot" is before the first token of the first token tree in `matcher`. // `parse_tt_inner` then processes all of these possible matcher positions and produces @@ -607,7 +585,7 @@ impl<'tt> TtParser<'tt> { // Process `cur_mps` until either we have finished the input or we need to get some // parsing from the black-box parser done. - if let Some(res) = self.parse_tt_inner(num_metavar_decls, &parser.token) { + if let Some(res) = self.parse_tt_inner(&parser.sess, matcher, &parser.token) { return res; } @@ -635,9 +613,13 @@ impl<'tt> TtParser<'tt> { (0, 1) => { // We need to call the black-box parser to get some nonterminal. let mut mp = self.bb_mps.pop().unwrap(); - let loc = &self.locs[mp.idx]; + let loc = &matcher[mp.idx]; if let &MatcherLoc::MetaVarDecl { - span, kind, next_metavar, seq_depth, .. + span, + kind: Some(kind), + next_metavar, + seq_depth, + .. } = loc { // We use the span of the metavariable declaration to determine any @@ -669,7 +651,7 @@ impl<'tt> TtParser<'tt> { (_, _) => { // Too many possibilities! - return self.ambiguity_error(parser.token.span); + return self.ambiguity_error(matcher, parser.token.span); } } @@ -677,12 +659,18 @@ impl<'tt> TtParser<'tt> { } } - fn ambiguity_error(&self, token_span: rustc_span::Span) -> NamedParseResult { + fn ambiguity_error( + &self, + matcher: &[MatcherLoc], + token_span: rustc_span::Span, + ) -> NamedParseResult { let nts = self .bb_mps .iter() - .map(|mp| match &self.locs[mp.idx] { - MatcherLoc::MetaVarDecl { bind, kind, .. } => format!("{} ('{}')", kind, bind), + .map(|mp| match &matcher[mp.idx] { + MatcherLoc::MetaVarDecl { bind, kind: Some(kind), .. } => { + format!("{} ('{}')", kind, bind) + } _ => unreachable!(), }) .collect::>() @@ -702,16 +690,31 @@ impl<'tt> TtParser<'tt> { ) } - fn nameize>(&self, mut res: I) -> NamedParseResult { + fn nameize>( + &self, + sess: &ParseSess, + matcher: &[MatcherLoc], + mut res: I, + ) -> NamedParseResult { // Make that each metavar has _exactly one_ binding. If so, insert the binding into the // `NamedParseResult`. Otherwise, it's an error. let mut ret_val = FxHashMap::default(); - for loc in self.locs.iter() { - if let &MatcherLoc::MetaVarDecl { span, bind, .. } = loc { - match ret_val.entry(MacroRulesNormalizedIdent::new(bind)) { - Vacant(spot) => spot.insert(res.next().unwrap()), - Occupied(..) => return Error(span, format!("duplicated bind name: {}", bind)), - }; + for loc in matcher { + if let &MatcherLoc::MetaVarDecl { span, bind, kind, .. } = loc { + if kind.is_some() { + match ret_val.entry(MacroRulesNormalizedIdent::new(bind)) { + Vacant(spot) => spot.insert(res.next().unwrap()), + Occupied(..) => { + return Error(span, format!("duplicated bind name: {}", bind)); + } + }; + } else { + // Both this check and the one in `parse_tt_inner` are necessary, surprisingly. + if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() { + // E.g. `$e` instead of `$e:expr`. + return Error(span, "missing fragment specifier".to_string()); + } + } } } Success(ret_val) diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs index 10b2b9f07e2a5..27fe78516a494 100644 --- a/compiler/rustc_expand/src/mbe/macro_rules.rs +++ b/compiler/rustc_expand/src/mbe/macro_rules.rs @@ -4,7 +4,7 @@ use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstF use crate::mbe; use crate::mbe::macro_check; use crate::mbe::macro_parser::{Error, ErrorReported, Failure, Success, TtParser}; -use crate::mbe::macro_parser::{MatchedSeq, MatchedTokenTree}; +use crate::mbe::macro_parser::{MatchedSeq, MatchedTokenTree, MatcherLoc}; use crate::mbe::transcribe::transcribe; use rustc_ast as ast; @@ -159,7 +159,7 @@ struct MacroRulesMacroExpander { name: Ident, span: Span, transparency: Transparency, - lhses: Vec, + lhses: Vec>, rhses: Vec, valid: bool, is_local: bool, @@ -210,7 +210,7 @@ fn generic_extension<'cx, 'tt>( name: Ident, transparency: Transparency, arg: TokenStream, - lhses: &'tt [mbe::TokenTree], + lhses: &'tt [Vec], rhses: &'tt [mbe::TokenTree], is_local: bool, ) -> Box { @@ -245,14 +245,6 @@ fn generic_extension<'cx, 'tt>( // this situation.) let parser = parser_from_cx(sess, arg.clone()); - // A matcher is always delimited, but the delimiters are ignored. - let delimited_inner_tts = |tt: &'tt mbe::TokenTree| -> &'tt [mbe::TokenTree] { - match tt { - mbe::TokenTree::Delimited(_, delimited) => delimited.inner_tts(), - _ => cx.span_bug(sp, "malformed macro lhs"), - } - }; - // Try each arm's matchers. let mut tt_parser = TtParser::new(name); for (i, lhs) in lhses.iter().enumerate() { @@ -262,13 +254,19 @@ fn generic_extension<'cx, 'tt>( // are not recorded. On the first `Success(..)`ful matcher, the spans are merged. let mut gated_spans_snapshot = mem::take(&mut *sess.gated_spans.spans.borrow_mut()); - match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), delimited_inner_tts(lhs)) { + match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs) { Success(named_matches) => { // The matcher was `Success(..)`ful. // Merge the gated spans from parsing the matcher with the pre-existing ones. sess.gated_spans.merge(gated_spans_snapshot); - let rhs = delimited_inner_tts(&rhses[i]).to_vec().clone(); + // Ignore the delimiters on the RHS. + let rhs = match &rhses[i] { + mbe::TokenTree::Delimited(_, delimited) => { + delimited.inner_tts().to_vec().clone() + } + _ => cx.span_bug(sp, "malformed macro rhs"), + }; let arm_span = rhses[i].span(); let rhs_spans = rhs.iter().map(|t| t.span()).collect::>(); @@ -346,10 +344,8 @@ fn generic_extension<'cx, 'tt>( // Check whether there's a missing comma in this macro call, like `println!("{}" a);` if let Some((arg, comma_span)) = arg.add_comma() { for lhs in lhses { - if let Success(_) = tt_parser.parse_tt( - &mut Cow::Borrowed(&parser_from_cx(sess, arg.clone())), - delimited_inner_tts(lhs), - ) { + let parser = parser_from_cx(sess, arg.clone()); + if let Success(_) = tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs) { if comma_span.is_dummy() { err.note("you might be missing a comma"); } else { @@ -440,6 +436,8 @@ pub fn compile_declarative_macro( }), ), ]; + // Convert it into `MatcherLoc` form. + let argument_gram = mbe::macro_parser::compute_locs(&sess.parse_sess, &argument_gram); let parser = Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS); let mut tt_parser = TtParser::new(def.ident); @@ -536,6 +534,25 @@ pub fn compile_declarative_macro( None => {} } + // Convert the lhses into `MatcherLoc` form, which is better for doing the + // actual matching. Unless the matcher is invalid. + let lhses = if valid { + lhses + .iter() + .map(|lhs| { + // Ignore the delimiters around the matcher. + match lhs { + mbe::TokenTree::Delimited(_, delimited) => { + mbe::macro_parser::compute_locs(&sess.parse_sess, delimited.inner_tts()) + } + _ => sess.parse_sess.span_diagnostic.span_bug(def.span, "malformed macro lhs"), + } + }) + .collect() + } else { + vec![] + }; + mk_syn_ext(Box::new(MacroRulesMacroExpander { name: def.ident, span: def.span,