Skip to content

Commit

Permalink
Cache pretty-print/retokenize result to avoid compile time blowup
Browse files Browse the repository at this point in the history
Fixes #79242

If a `macro_rules!` recursively builds up a nested nonterminal
(passing it to a proc-macro at each step), we will end up repeatedly
pretty-printing/retokenizing the same nonterminals. Unfortunately, the
'probable equality' check we do has a non-trivial cost, which leads to a
blowup in compilation time.

As a workaround, we cache the result of the 'probable equality' check,
which eliminates the compilation time blowup for the linked issue. This
commit only touches a single file (other than adding tests), so it
should be easy to backport.

The proper solution is to remove the pretty-print/retokenize hack
entirely. However, this will almost certainly break a large number of
crates that were relying on hygiene bugs created by using the reparsed
`TokenStream`. As a result, we will definitely not want to backport
such a change.
  • Loading branch information
Aaron1011 committed Nov 23, 2020
1 parent a0d664b commit 6e466ef
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 3 deletions.
38 changes: 35 additions & 3 deletions compiler/rustc_parse/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@ use rustc_ast as ast;
use rustc_ast::token::{self, DelimToken, Nonterminal, Token, TokenKind};
use rustc_ast::tokenstream::{self, LazyTokenStream, TokenStream, TokenTree};
use rustc_ast_pretty::pprust;
use rustc_data_structures::fx::FxHashSet;
use rustc_data_structures::sync::Lrc;
use rustc_errors::{Diagnostic, FatalError, Level, PResult};
use rustc_session::parse::ParseSess;
use rustc_span::{symbol::kw, FileName, SourceFile, Span, DUMMY_SP};

use smallvec::SmallVec;
use std::cell::RefCell;
use std::mem;
use std::path::Path;
use std::str;
Expand Down Expand Up @@ -282,14 +284,33 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
}
};

// Caches the stringification of 'good' `TokenStreams` which passed
// `tokenstream_probably_equal_for_proc_macro`. This allows us to avoid
// repeatedly stringifying and comparing the same `TokenStream` for deeply
// nested nonterminals.
//
// We cache by the strinification instead of the `TokenStream` to avoid
// needing to implement `Hash` for `TokenStream`. Note that it's possible to
// have two distinct `TokenStream`s that stringify to the same result
// (e.g. if they differ only in hygiene information). However, any
// information lost during the stringification process is also intentionally
// ignored by `tokenstream_probably_equal_for_proc_macro`, so it's fine
// that a single cache entry may 'map' to multiple distinct `TokenStream`s.
//
// This is a temporary hack to prevent compilation blowup on certain inputs.
// The entire pretty-print/retokenize process will be removed soon.
thread_local! {
static GOOD_TOKEN_CACHE: RefCell<FxHashSet<String>> = Default::default();
}

// FIXME(#43081): Avoid this pretty-print + reparse hack
// Pretty-print the AST struct without inserting any parenthesis
// beyond those explicitly written by the user (e.g. `ExpnKind::Paren`).
// The resulting stream may have incorrect precedence, but it's only
// ever used for a comparison against the capture tokenstream.
let source = pprust::nonterminal_to_string_no_extra_parens(nt);
let filename = FileName::macro_expansion_source_code(&source);
let reparsed_tokens = parse_stream_from_source_str(filename, source, sess, Some(span));
let reparsed_tokens = parse_stream_from_source_str(filename, source.clone(), sess, Some(span));

// During early phases of the compiler the AST could get modified
// directly (e.g., attributes added or removed) and the internal cache
Expand All @@ -315,8 +336,13 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
// modifications, including adding/removing typically non-semantic
// tokens such as extra braces and commas, don't happen.
if let Some(tokens) = tokens {
if GOOD_TOKEN_CACHE.with(|cache| cache.borrow().contains(&source)) {
return tokens;
}

// Compare with a non-relaxed delim match to start.
if tokenstream_probably_equal_for_proc_macro(&tokens, &reparsed_tokens, sess, false) {
GOOD_TOKEN_CACHE.with(|cache| cache.borrow_mut().insert(source.clone()));
return tokens;
}

Expand All @@ -325,6 +351,11 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
// token stream to match up with inserted parenthesis in the reparsed stream.
let source_with_parens = pprust::nonterminal_to_string(nt);
let filename_with_parens = FileName::macro_expansion_source_code(&source_with_parens);

if GOOD_TOKEN_CACHE.with(|cache| cache.borrow().contains(&source_with_parens)) {
return tokens;
}

let reparsed_tokens_with_parens = parse_stream_from_source_str(
filename_with_parens,
source_with_parens,
Expand All @@ -340,6 +371,7 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
sess,
true,
) {
GOOD_TOKEN_CACHE.with(|cache| cache.borrow_mut().insert(source.clone()));
return tokens;
}

Expand Down Expand Up @@ -419,9 +451,9 @@ pub fn tokenstream_probably_equal_for_proc_macro(
// to iterate breaking tokens mutliple times. For example:
// '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]'
let mut token_trees: SmallVec<[_; 2]>;
if let TokenTree::Token(token) = &tree {
if let TokenTree::Token(token) = tree {
let mut out = SmallVec::<[_; 2]>::new();
out.push(token.clone());
out.push(token);
// Iterate to fixpoint:
// * We start off with 'out' containing our initial token, and `temp` empty
// * If we are able to break any tokens in `out`, then `out` will have
Expand Down
16 changes: 16 additions & 0 deletions src/test/ui/proc-macro/auxiliary/issue-79242.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// force-host
// no-prefer-dynamic

#![crate_type = "proc-macro"]

extern crate proc_macro;

use proc_macro::TokenStream;

#[proc_macro]
pub fn dummy(input: TokenStream) -> TokenStream {
// Iterate to force internal conversion of nonterminals
// to `proc_macro` structs
for _ in input {}
TokenStream::new()
}
34 changes: 34 additions & 0 deletions src/test/ui/proc-macro/issue-79242-slow-retokenize-check.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// check-pass
// aux-build:issue-79242.rs

// Regression test for issue #79242
// Tests that compilation time doesn't blow up for a proc-macro
// invocation with deeply nested nonterminals

#![allow(unused)]

extern crate issue_79242;

macro_rules! declare_nats {
($prev:ty) => {};
($prev:ty, $n:literal$(, $tail:literal)*) => {

issue_79242::dummy! {
$prev
}

declare_nats!(Option<$prev>$(, $tail)*);
};
(0, $($n:literal),+) => {
pub struct N0;
declare_nats!(N0, $($n),+);
};
}

declare_nats! {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28
}


fn main() {}

0 comments on commit 6e466ef

Please sign in to comment.