From 6bf2cc2229768faa8e86e0e8a9f5bd8ebfc817a2 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <nnethercote@mozilla.com>
Date: Wed, 5 Feb 2020 09:44:03 +1100
Subject: [PATCH 1/3] Avoid instantiating many `Parser` structs in
 `generic_extension`.

Currently, every iteration of the main loop in `generic_extension`
instantiates a `Parser`, which is expensive because `Parser` is a large
type. Many of those instantiations are only used immutably, particularly
for simple-but-repetitive macros of the sort seen in `html5ever` and PR
68836.

This commit initializes a single "base" parser outside the loop, and
then uses `Cow` to avoid cloning it except for the mutating iterations.
This speeds up `html5ever` runs by up to 15%.
---
 src/librustc_expand/lib.rs              |  1 +
 src/librustc_expand/mbe/macro_parser.rs | 38 ++++-----------
 src/librustc_expand/mbe/macro_rules.rs  | 62 ++++++++++++++++++-------
 3 files changed, 56 insertions(+), 45 deletions(-)

diff --git a/src/librustc_expand/lib.rs b/src/librustc_expand/lib.rs
index 4fe7c268c4f0b..f119c956ced04 100644
--- a/src/librustc_expand/lib.rs
+++ b/src/librustc_expand/lib.rs
@@ -1,3 +1,4 @@
+#![feature(cow_is_borrowed)]
 #![feature(crate_visibility_modifier)]
 #![feature(decl_macro)]
 #![feature(proc_macro_diagnostic)]
diff --git a/src/librustc_expand/mbe/macro_parser.rs b/src/librustc_expand/mbe/macro_parser.rs
index b14725fd731b1..78f22f3e443b1 100644
--- a/src/librustc_expand/mbe/macro_parser.rs
+++ b/src/librustc_expand/mbe/macro_parser.rs
@@ -78,13 +78,11 @@ use crate::mbe::{self, TokenTree};
 
 use rustc_ast_pretty::pprust;
 use rustc_parse::parser::{FollowedByType, Parser, PathStyle};
-use rustc_parse::Directory;
 use rustc_session::parse::ParseSess;
 use rustc_span::symbol::{kw, sym, Symbol};
 use syntax::ast::{Ident, Name};
 use syntax::ptr::P;
 use syntax::token::{self, DocComment, Nonterminal, Token};
-use syntax::tokenstream::TokenStream;
 
 use rustc_errors::{FatalError, PResult};
 use rustc_span::Span;
@@ -92,6 +90,7 @@ use smallvec::{smallvec, SmallVec};
 
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::sync::Lrc;
+use std::borrow::Cow;
 use std::collections::hash_map::Entry::{Occupied, Vacant};
 use std::mem;
 use std::ops::{Deref, DerefMut};
@@ -613,28 +612,9 @@ fn inner_parse_loop<'root, 'tt>(
     Success(())
 }
 
-/// Use the given sequence of token trees (`ms`) as a matcher. Match the given token stream `tts`
-/// against it and return the match.
-///
-/// # Parameters
-///
-/// - `sess`: The session into which errors are emitted
-/// - `tts`: The tokenstream we are matching against the pattern `ms`
-/// - `ms`: A sequence of token trees representing a pattern against which we are matching
-/// - `directory`: Information about the file locations (needed for the black-box parser)
-/// - `recurse_into_modules`: Whether or not to recurse into modules (needed for the black-box
-///   parser)
-pub(super) fn parse(
-    sess: &ParseSess,
-    tts: TokenStream,
-    ms: &[TokenTree],
-    directory: Option<Directory<'_>>,
-    recurse_into_modules: bool,
-) -> NamedParseResult {
-    // Create a parser that can be used for the "black box" parts.
-    let mut parser =
-        Parser::new(sess, tts, directory, recurse_into_modules, true, rustc_parse::MACRO_ARGUMENTS);
-
+/// Use the given sequence of token trees (`ms`) as a matcher. Match the token
+/// stream from the given `parser` against it and return the match.
+pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> NamedParseResult {
     // A queue of possible matcher positions. We initialize it with the matcher position in which
     // the "dot" is before the first token of the first token tree in `ms`. `inner_parse_loop` then
     // processes all of these possible matcher positions and produces possible next positions into
@@ -659,7 +639,7 @@ pub(super) fn parse(
         // parsing from the black-box parser done. The result is that `next_items` will contain a
         // bunch of possible next matcher positions in `next_items`.
         match inner_parse_loop(
-            sess,
+            parser.sess,
             &mut cur_items,
             &mut next_items,
             &mut eof_items,
@@ -684,7 +664,7 @@ pub(super) fn parse(
             if eof_items.len() == 1 {
                 let matches =
                     eof_items[0].matches.iter_mut().map(|dv| Lrc::make_mut(dv).pop().unwrap());
-                return nameize(sess, ms, matches);
+                return nameize(parser.sess, ms, matches);
             } else if eof_items.len() > 1 {
                 return Error(
                     parser.token.span,
@@ -736,13 +716,13 @@ pub(super) fn parse(
         // If there are no possible next positions AND we aren't waiting for the black-box parser,
         // then there is a syntax error.
         else if bb_items.is_empty() && next_items.is_empty() {
-            return Failure(parser.token.take(), "no rules expected this token in macro call");
+            return Failure(parser.token.clone(), "no rules expected this token in macro call");
         }
         // Dump all possible `next_items` into `cur_items` for the next iteration.
         else if !next_items.is_empty() {
             // Now process the next token
             cur_items.extend(next_items.drain(..));
-            parser.bump();
+            parser.to_mut().bump();
         }
         // Finally, we have the case where we need to call the black-box parser to get some
         // nonterminal.
@@ -754,7 +734,7 @@ pub(super) fn parse(
                 let match_cur = item.match_cur;
                 item.push_match(
                     match_cur,
-                    MatchedNonterminal(Lrc::new(parse_nt(&mut parser, span, ident.name))),
+                    MatchedNonterminal(Lrc::new(parse_nt(parser.to_mut(), span, ident.name))),
                 );
                 item.idx += 1;
                 item.match_cur += 1;
diff --git a/src/librustc_expand/mbe/macro_rules.rs b/src/librustc_expand/mbe/macro_rules.rs
index 29d41543fbf8c..9432790e78ced 100644
--- a/src/librustc_expand/mbe/macro_rules.rs
+++ b/src/librustc_expand/mbe/macro_rules.rs
@@ -1,11 +1,11 @@
-use crate::base::{DummyResult, ExtCtxt, MacResult, TTMacroExpander};
+use crate::base::{DummyResult, ExpansionData, ExtCtxt, MacResult, TTMacroExpander};
 use crate::base::{SyntaxExtension, SyntaxExtensionKind};
 use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstFragmentKind};
 use crate::mbe;
 use crate::mbe::macro_check;
-use crate::mbe::macro_parser::parse;
+use crate::mbe::macro_parser::parse_tt;
 use crate::mbe::macro_parser::{Error, Failure, Success};
-use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq, NamedParseResult};
+use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq};
 use crate::mbe::transcribe::transcribe;
 
 use rustc_ast_pretty::pprust;
@@ -166,9 +166,9 @@ impl TTMacroExpander for MacroRulesMacroExpander {
     }
 }
 
-fn trace_macros_note(cx: &mut ExtCtxt<'_>, sp: Span, message: String) {
+fn trace_macros_note(cx_expansions: &mut FxHashMap<Span, Vec<String>>, sp: Span, message: String) {
     let sp = sp.macro_backtrace().last().map(|trace| trace.call_site).unwrap_or(sp);
-    cx.expansions.entry(sp).or_default().push(message);
+    cx_expansions.entry(sp).or_default().push(message);
 }
 
 /// Given `lhses` and `rhses`, this is the new macro we create
@@ -184,12 +184,36 @@ fn generic_extension<'cx>(
 ) -> Box<dyn MacResult + 'cx> {
     if cx.trace_macros() {
         let msg = format!("expanding `{}! {{ {} }}`", name, pprust::tts_to_string(arg.clone()));
-        trace_macros_note(cx, sp, msg);
+        trace_macros_note(&mut cx.expansions, sp, msg);
     }
 
     // Which arm's failure should we report? (the one furthest along)
     let mut best_failure: Option<(Token, &str)> = None;
+
+    // We create a base parser that can be used for the "black box" parts.
+    // Every iteration needs a fresh copy of that base parser. However, the
+    // parser is not mutated on many of the iterations, particularly when
+    // dealing with macros like this:
+    //
+    // macro_rules! foo {
+    //     ("a") => (A);
+    //     ("b") => (B);
+    //     ("c") => (C);
+    //     // ... etc. (maybe hundreds more)
+    // }
+    //
+    // as seen in the `html5ever` benchmark. We use a `Cow` so that the base
+    // parser is only cloned when necessary (upon mutation). Furthermore, we
+    // reinitialize the `Cow` with the base parser at the start of every
+    // iteration, so that any mutated parsers are not reused. This is all quite
+    // hacky, but speeds up the `html5ever` benchmark significantly. (Issue
+    // 68836 suggests a more comprehensive but more complex change to deal with
+    // this situation.)
+    let base_parser = base_parser_from_cx(&cx.current_expansion, &cx.parse_sess, arg.clone());
+
     for (i, lhs) in lhses.iter().enumerate() {
+        let mut parser = Cow::Borrowed(&base_parser);
+
         // try each arm's matchers
         let lhs_tt = match *lhs {
             mbe::TokenTree::Delimited(_, ref delim) => &delim.tts[..],
@@ -202,7 +226,7 @@ fn generic_extension<'cx>(
         // are not recorded. On the first `Success(..)`ful matcher, the spans are merged.
         let mut gated_spans_snaphot = mem::take(&mut *cx.parse_sess.gated_spans.spans.borrow_mut());
 
-        match parse_tt(cx, lhs_tt, arg.clone()) {
+        match parse_tt(&mut parser, lhs_tt) {
             Success(named_matches) => {
                 // The matcher was `Success(..)`ful.
                 // Merge the gated spans from parsing the matcher with the pre-existing ones.
@@ -232,7 +256,7 @@ fn generic_extension<'cx>(
 
                 if cx.trace_macros() {
                     let msg = format!("to `{}`", pprust::tts_to_string(tts.clone()));
-                    trace_macros_note(cx, sp, msg);
+                    trace_macros_note(&mut cx.expansions, sp, msg);
                 }
 
                 let directory = Directory {
@@ -269,6 +293,7 @@ fn generic_extension<'cx>(
         // Restore to the state before snapshotting and maybe try again.
         mem::swap(&mut gated_spans_snaphot, &mut cx.parse_sess.gated_spans.spans.borrow_mut());
     }
+    drop(base_parser);
 
     let (token, label) = best_failure.expect("ran no matchers");
     let span = token.span.substitute_dummy(sp);
@@ -286,7 +311,9 @@ fn generic_extension<'cx>(
                 mbe::TokenTree::Delimited(_, ref delim) => &delim.tts[..],
                 _ => continue,
             };
-            match parse_tt(cx, lhs_tt, arg.clone()) {
+            let base_parser =
+                base_parser_from_cx(&cx.current_expansion, &cx.parse_sess, arg.clone());
+            match parse_tt(&mut Cow::Borrowed(&base_parser), lhs_tt) {
                 Success(_) => {
                     if comma_span.is_dummy() {
                         err.note("you might be missing a comma");
@@ -368,7 +395,8 @@ pub fn compile_declarative_macro(
         ),
     ];
 
-    let argument_map = match parse(sess, body, &argument_gram, None, true) {
+    let base_parser = Parser::new(sess, body, None, true, true, rustc_parse::MACRO_ARGUMENTS);
+    let argument_map = match parse_tt(&mut Cow::Borrowed(&base_parser), &argument_gram) {
         Success(m) => m,
         Failure(token, msg) => {
             let s = parse_failure_msg(&token);
@@ -1184,14 +1212,16 @@ fn quoted_tt_to_string(tt: &mbe::TokenTree) -> String {
     }
 }
 
-/// Use this token tree as a matcher to parse given tts.
-fn parse_tt(cx: &ExtCtxt<'_>, mtch: &[mbe::TokenTree], tts: TokenStream) -> NamedParseResult {
-    // `None` is because we're not interpolating
+fn base_parser_from_cx<'cx>(
+    current_expansion: &'cx ExpansionData,
+    sess: &'cx ParseSess,
+    tts: TokenStream,
+) -> Parser<'cx> {
     let directory = Directory {
-        path: Cow::from(cx.current_expansion.module.directory.as_path()),
-        ownership: cx.current_expansion.directory_ownership,
+        path: Cow::from(current_expansion.module.directory.as_path()),
+        ownership: current_expansion.directory_ownership,
     };
-    parse(cx.parse_sess(), tts, mtch, Some(directory), true)
+    Parser::new(sess, tts, Some(directory), true, true, rustc_parse::MACRO_ARGUMENTS)
 }
 
 /// Generates an appropriate parsing failure message. For EOF, this is "unexpected end...". For

From f840a955bd449810e75d8320b4c46482d6dbdec1 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <nnethercote@mozilla.com>
Date: Wed, 5 Feb 2020 14:33:08 +1100
Subject: [PATCH 2/3] Remove the `Cow` from `Directory`.

The previous commit wrapped `Parser` within a `Cow` for the hot macro
parsing path. As a result, there's no need for the `Cow` within
`Directory`, because it lies within `Parser`.
---
 src/librustc_expand/mbe/macro_rules.rs | 4 ++--
 src/librustc_parse/lib.rs              | 9 ++++-----
 src/librustc_parse/parser/mod.rs       | 9 ++++-----
 src/librustc_parse/parser/module.rs    | 6 +++---
 4 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/src/librustc_expand/mbe/macro_rules.rs b/src/librustc_expand/mbe/macro_rules.rs
index 9432790e78ced..9e6edee265c98 100644
--- a/src/librustc_expand/mbe/macro_rules.rs
+++ b/src/librustc_expand/mbe/macro_rules.rs
@@ -260,7 +260,7 @@ fn generic_extension<'cx>(
                 }
 
                 let directory = Directory {
-                    path: Cow::from(cx.current_expansion.module.directory.as_path()),
+                    path: cx.current_expansion.module.directory.clone(),
                     ownership: cx.current_expansion.directory_ownership,
                 };
                 let mut p = Parser::new(cx.parse_sess(), tts, Some(directory), true, false, None);
@@ -1218,7 +1218,7 @@ fn base_parser_from_cx<'cx>(
     tts: TokenStream,
 ) -> Parser<'cx> {
     let directory = Directory {
-        path: Cow::from(current_expansion.module.directory.as_path()),
+        path: current_expansion.module.directory.clone(),
         ownership: current_expansion.directory_ownership,
     };
     Parser::new(sess, tts, Some(directory), true, true, rustc_parse::MACRO_ARGUMENTS)
diff --git a/src/librustc_parse/lib.rs b/src/librustc_parse/lib.rs
index cd674e3c5ebef..4aad2c0f68a29 100644
--- a/src/librustc_parse/lib.rs
+++ b/src/librustc_parse/lib.rs
@@ -12,8 +12,7 @@ use syntax::ast;
 use syntax::token::{self, Nonterminal};
 use syntax::tokenstream::{self, TokenStream, TokenTree};
 
-use std::borrow::Cow;
-use std::path::Path;
+use std::path::{Path, PathBuf};
 use std::str;
 
 use log::info;
@@ -29,8 +28,8 @@ pub mod validate_attr;
 pub mod config;
 
 #[derive(Clone)]
-pub struct Directory<'a> {
-    pub path: Cow<'a, Path>,
+pub struct Directory {
+    pub path: PathBuf,
     pub ownership: DirectoryOwnership,
 }
 
@@ -274,7 +273,7 @@ pub fn stream_to_parser<'a>(
 pub fn stream_to_parser_with_base_dir<'a>(
     sess: &'a ParseSess,
     stream: TokenStream,
-    base_dir: Directory<'a>,
+    base_dir: Directory,
 ) -> Parser<'a> {
     Parser::new(sess, stream, Some(base_dir), true, false, None)
 }
diff --git a/src/librustc_parse/parser/mod.rs b/src/librustc_parse/parser/mod.rs
index 8c1839da1cb8f..cb95750d984e9 100644
--- a/src/librustc_parse/parser/mod.rs
+++ b/src/librustc_parse/parser/mod.rs
@@ -29,7 +29,6 @@ use syntax::token::{self, DelimToken, Token, TokenKind};
 use syntax::tokenstream::{self, DelimSpan, TokenStream, TokenTree, TreeAndJoint};
 use syntax::util::comments::{doc_comment_style, strip_doc_comment_decoration};
 
-use std::borrow::Cow;
 use std::path::PathBuf;
 use std::{cmp, mem, slice};
 
@@ -114,7 +113,7 @@ pub struct Parser<'a> {
     prev_token_kind: PrevTokenKind,
     restrictions: Restrictions,
     /// Used to determine the path to externally loaded source files.
-    pub(super) directory: Directory<'a>,
+    pub(super) directory: Directory,
     /// `true` to parse sub-modules in other files.
     // Public for rustfmt usage.
     pub recurse_into_file_modules: bool,
@@ -376,7 +375,7 @@ impl<'a> Parser<'a> {
     pub fn new(
         sess: &'a ParseSess,
         tokens: TokenStream,
-        directory: Option<Directory<'a>>,
+        directory: Option<Directory>,
         recurse_into_file_modules: bool,
         desugar_doc_comments: bool,
         subparser_name: Option<&'static str>,
@@ -390,7 +389,7 @@ impl<'a> Parser<'a> {
             restrictions: Restrictions::empty(),
             recurse_into_file_modules,
             directory: Directory {
-                path: Cow::from(PathBuf::new()),
+                path: PathBuf::new(),
                 ownership: DirectoryOwnership::Owned { relative: None },
             },
             root_module_name: None,
@@ -418,7 +417,7 @@ impl<'a> Parser<'a> {
                 &sess.source_map().lookup_char_pos(parser.token.span.lo()).file.unmapped_path
             {
                 if let Some(directory_path) = path.parent() {
-                    parser.directory.path = Cow::from(directory_path.to_path_buf());
+                    parser.directory.path = directory_path.to_path_buf();
                 }
             }
         }
diff --git a/src/librustc_parse/parser/module.rs b/src/librustc_parse/parser/module.rs
index 6ce94d3c6793c..0c8fad03d8690 100644
--- a/src/librustc_parse/parser/module.rs
+++ b/src/librustc_parse/parser/module.rs
@@ -285,7 +285,7 @@ impl<'a> Parser<'a> {
 
     fn push_directory(&mut self, id: Ident, attrs: &[Attribute]) {
         if let Some(path) = attr::first_attr_value_str_by_name(attrs, sym::path) {
-            self.directory.path.to_mut().push(&*path.as_str());
+            self.directory.path.push(&*path.as_str());
             self.directory.ownership = DirectoryOwnership::Owned { relative: None };
         } else {
             // We have to push on the current module name in the case of relative
@@ -297,10 +297,10 @@ impl<'a> Parser<'a> {
             if let DirectoryOwnership::Owned { relative } = &mut self.directory.ownership {
                 if let Some(ident) = relative.take() {
                     // remove the relative offset
-                    self.directory.path.to_mut().push(&*ident.as_str());
+                    self.directory.path.push(&*ident.as_str());
                 }
             }
-            self.directory.path.to_mut().push(&*id.as_str());
+            self.directory.path.push(&*id.as_str());
         }
     }
 }

From 2a13b24d369b8619f0197993cd5dc60f7217ed72 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <nnethercote@mozilla.com>
Date: Wed, 5 Feb 2020 15:09:24 +1100
Subject: [PATCH 3/3] Change condition ordering in `parse_tt`.

This is a small win, because `Failure` is much more common than
`Success`.
---
 src/librustc_expand/mbe/macro_parser.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/librustc_expand/mbe/macro_parser.rs b/src/librustc_expand/mbe/macro_parser.rs
index 78f22f3e443b1..5bf7602ea6e8f 100644
--- a/src/librustc_expand/mbe/macro_parser.rs
+++ b/src/librustc_expand/mbe/macro_parser.rs
@@ -689,9 +689,14 @@ pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> Na
         // unnecessary implicit clone later in Rc::make_mut.
         drop(eof_items);
 
+        // If there are no possible next positions AND we aren't waiting for the black-box parser,
+        // then there is a syntax error.
+        if bb_items.is_empty() && next_items.is_empty() {
+            return Failure(parser.token.clone(), "no rules expected this token in macro call");
+        }
         // Another possibility is that we need to call out to parse some rust nonterminal
         // (black-box) parser. However, if there is not EXACTLY ONE of these, something is wrong.
-        if (!bb_items.is_empty() && !next_items.is_empty()) || bb_items.len() > 1 {
+        else if (!bb_items.is_empty() && !next_items.is_empty()) || bb_items.len() > 1 {
             let nts = bb_items
                 .iter()
                 .map(|item| match item.top_elts.get_tt(item.idx) {
@@ -713,11 +718,6 @@ pub(super) fn parse_tt(parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree]) -> Na
                 ),
             );
         }
-        // If there are no possible next positions AND we aren't waiting for the black-box parser,
-        // then there is a syntax error.
-        else if bb_items.is_empty() && next_items.is_empty() {
-            return Failure(parser.token.clone(), "no rules expected this token in macro call");
-        }
         // Dump all possible `next_items` into `cur_items` for the next iteration.
         else if !next_items.is_empty() {
             // Now process the next token