Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache pretty-print/retokenize result to avoid compile time blowup #79338

Merged
merged 1 commit into from
Nov 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 35 additions & 3 deletions compiler/rustc_parse/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@ use rustc_ast as ast;
use rustc_ast::token::{self, DelimToken, Nonterminal, Token, TokenKind};
use rustc_ast::tokenstream::{self, LazyTokenStream, TokenStream, TokenTree};
use rustc_ast_pretty::pprust;
use rustc_data_structures::fx::FxHashSet;
use rustc_data_structures::sync::Lrc;
use rustc_errors::{Diagnostic, FatalError, Level, PResult};
use rustc_session::parse::ParseSess;
use rustc_span::{symbol::kw, FileName, SourceFile, Span, DUMMY_SP};

use smallvec::SmallVec;
use std::cell::RefCell;
use std::mem;
use std::path::Path;
use std::str;
Expand Down Expand Up @@ -282,14 +284,33 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
}
};

// Caches the stringification of 'good' `TokenStreams` which passed
// `tokenstream_probably_equal_for_proc_macro`. This allows us to avoid
// repeatedly stringifying and comparing the same `TokenStream` for deeply
// nested nonterminals.
//
// We cache by the strinification instead of the `TokenStream` to avoid
// needing to implement `Hash` for `TokenStream`. Note that it's possible to
// have two distinct `TokenStream`s that stringify to the same result
// (e.g. if they differ only in hygiene information). However, any
// information lost during the stringification process is also intentionally
// ignored by `tokenstream_probably_equal_for_proc_macro`, so it's fine
// that a single cache entry may 'map' to multiple distinct `TokenStream`s.
//
// This is a temporary hack to prevent compilation blowup on certain inputs.
// The entire pretty-print/retokenize process will be removed soon.
thread_local! {
static GOOD_TOKEN_CACHE: RefCell<FxHashSet<String>> = Default::default();
}

// FIXME(#43081): Avoid this pretty-print + reparse hack
// Pretty-print the AST struct without inserting any parenthesis
// beyond those explicitly written by the user (e.g. `ExpnKind::Paren`).
// The resulting stream may have incorrect precedence, but it's only
// ever used for a comparison against the capture tokenstream.
let source = pprust::nonterminal_to_string_no_extra_parens(nt);
let filename = FileName::macro_expansion_source_code(&source);
let reparsed_tokens = parse_stream_from_source_str(filename, source, sess, Some(span));
let reparsed_tokens = parse_stream_from_source_str(filename, source.clone(), sess, Some(span));

// During early phases of the compiler the AST could get modified
// directly (e.g., attributes added or removed) and the internal cache
Expand All @@ -315,8 +336,13 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
// modifications, including adding/removing typically non-semantic
// tokens such as extra braces and commas, don't happen.
if let Some(tokens) = tokens {
if GOOD_TOKEN_CACHE.with(|cache| cache.borrow().contains(&source)) {
return tokens;
}

// Compare with a non-relaxed delim match to start.
if tokenstream_probably_equal_for_proc_macro(&tokens, &reparsed_tokens, sess, false) {
GOOD_TOKEN_CACHE.with(|cache| cache.borrow_mut().insert(source.clone()));
return tokens;
}

Expand All @@ -325,6 +351,11 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
// token stream to match up with inserted parenthesis in the reparsed stream.
let source_with_parens = pprust::nonterminal_to_string(nt);
let filename_with_parens = FileName::macro_expansion_source_code(&source_with_parens);

if GOOD_TOKEN_CACHE.with(|cache| cache.borrow().contains(&source_with_parens)) {
return tokens;
}

let reparsed_tokens_with_parens = parse_stream_from_source_str(
filename_with_parens,
source_with_parens,
Expand All @@ -340,6 +371,7 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
sess,
true,
) {
GOOD_TOKEN_CACHE.with(|cache| cache.borrow_mut().insert(source.clone()));
return tokens;
}

Expand Down Expand Up @@ -419,9 +451,9 @@ pub fn tokenstream_probably_equal_for_proc_macro(
// to iterate breaking tokens mutliple times. For example:
// '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]'
let mut token_trees: SmallVec<[_; 2]>;
if let TokenTree::Token(token) = &tree {
if let TokenTree::Token(token) = tree {
let mut out = SmallVec::<[_; 2]>::new();
out.push(token.clone());
out.push(token);
// Iterate to fixpoint:
// * We start off with 'out' containing our initial token, and `temp` empty
// * If we are able to break any tokens in `out`, then `out` will have
Expand Down
16 changes: 16 additions & 0 deletions src/test/ui/proc-macro/auxiliary/issue-79242.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// force-host
// no-prefer-dynamic

#![crate_type = "proc-macro"]

extern crate proc_macro;

use proc_macro::TokenStream;

#[proc_macro]
pub fn dummy(input: TokenStream) -> TokenStream {
// Iterate to force internal conversion of nonterminals
// to `proc_macro` structs
for _ in input {}
TokenStream::new()
}
34 changes: 34 additions & 0 deletions src/test/ui/proc-macro/issue-79242-slow-retokenize-check.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// check-pass
// aux-build:issue-79242.rs

// Regression test for issue #79242
// Tests that compilation time doesn't blow up for a proc-macro
// invocation with deeply nested nonterminals

#![allow(unused)]

extern crate issue_79242;

macro_rules! declare_nats {
($prev:ty) => {};
($prev:ty, $n:literal$(, $tail:literal)*) => {

issue_79242::dummy! {
$prev
}

declare_nats!(Option<$prev>$(, $tail)*);
};
(0, $($n:literal),+) => {
pub struct N0;
declare_nats!(N0, $($n),+);
};
}

declare_nats! {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28
}


fn main() {}