Skip to content

Commit

Permalink
Rollup merge of #65392 - Centril:nt-to-tt, r=Mark-Simulacrum
Browse files Browse the repository at this point in the history
Move `Nonterminal::to_tokenstream` to parser & don't rely directly on parser in lowering

Split out from #65324.

r? @petrochenkov
  • Loading branch information
Centril authored Oct 14, 2019
2 parents e29a6fc + 07e946c commit 28d08f3
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 142 deletions.
14 changes: 12 additions & 2 deletions src/librustc/hir/lowering.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ use syntax::print::pprust;
use syntax::source_map::{respan, ExpnData, ExpnKind, DesugaringKind, Spanned};
use syntax::symbol::{kw, sym, Symbol};
use syntax::tokenstream::{TokenStream, TokenTree};
use syntax::parse::token::{self, Token};
use syntax::parse::token::{self, Nonterminal, Token};
use syntax::parse::ParseSess;
use syntax::visit::{self, Visitor};
use syntax_pos::Span;

Expand All @@ -86,6 +87,11 @@ pub struct LoweringContext<'a> {

resolver: &'a mut dyn Resolver,

/// HACK(Centril): there is a cyclic dependency between the parser and lowering
/// if we don't have this function pointer. To avoid that dependency so that
/// librustc is independent of the parser, we use dynamic dispatch here.
nt_to_tokenstream: NtToTokenstream,

/// The items being lowered are collected here.
items: BTreeMap<hir::HirId, hir::Item>,

Expand Down Expand Up @@ -180,6 +186,8 @@ pub trait Resolver {
fn has_derives(&self, node_id: NodeId, derives: SpecialDerives) -> bool;
}

type NtToTokenstream = fn(&Nonterminal, &ParseSess, Span) -> TokenStream;

/// Context of `impl Trait` in code, which determines whether it is allowed in an HIR subtree,
/// and if so, what meaning it has.
#[derive(Debug)]
Expand Down Expand Up @@ -236,6 +244,7 @@ pub fn lower_crate(
dep_graph: &DepGraph,
krate: &Crate,
resolver: &mut dyn Resolver,
nt_to_tokenstream: NtToTokenstream,
) -> hir::Crate {
// We're constructing the HIR here; we don't care what we will
// read, since we haven't even constructed the *input* to
Expand All @@ -249,6 +258,7 @@ pub fn lower_crate(
sess,
cstore,
resolver,
nt_to_tokenstream,
items: BTreeMap::new(),
trait_items: BTreeMap::new(),
impl_items: BTreeMap::new(),
Expand Down Expand Up @@ -1022,7 +1032,7 @@ impl<'a> LoweringContext<'a> {
fn lower_token(&mut self, token: Token) -> TokenStream {
match token.kind {
token::Interpolated(nt) => {
let tts = nt.to_tokenstream(&self.sess.parse_sess, token.span);
let tts = (self.nt_to_tokenstream)(&nt, &self.sess.parse_sess, token.span);
self.lower_token_stream(tts)
}
_ => TokenTree::Token(token).into(),
Expand Down
3 changes: 2 additions & 1 deletion src/librustc_interface/passes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,8 @@ pub fn lower_to_hir(
) -> Result<hir::map::Forest> {
// Lower AST to HIR.
let hir_forest = time(sess, "lowering AST -> HIR", || {
let hir_crate = lower_crate(sess, cstore, &dep_graph, &krate, resolver);
let nt_to_tokenstream = syntax::parse::nt_to_tokenstream;
let hir_crate = lower_crate(sess, cstore, &dep_graph, &krate, resolver, nt_to_tokenstream);

if sess.opts.debugging_opts.hir_stats {
hir_stats::print_hir_stats(&hir_crate);
Expand Down
2 changes: 1 addition & 1 deletion src/libsyntax/ext/proc_macro_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ impl FromInternal<(TreeAndJoint, &'_ ParseSess, &'_ mut Vec<Self>)>
}

Interpolated(nt) => {
let stream = nt.to_tokenstream(sess, span);
let stream = parse::nt_to_tokenstream(&nt, sess, span);
TokenTree::Group(Group {
delimiter: Delimiter::None,
stream,
Expand Down
138 changes: 134 additions & 4 deletions src/libsyntax/parse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@ use crate::ast::{self, CrateConfig, NodeId};
use crate::early_buffered_lints::{BufferedEarlyLint, BufferedEarlyLintId};
use crate::source_map::{SourceMap, FilePathMapping};
use crate::feature_gate::UnstableFeatures;
use crate::parse::parser::Parser;
use crate::parse::parser::emit_unclosed_delims;
use crate::parse::token::TokenKind;
use crate::tokenstream::{TokenStream, TokenTree};
use crate::parse::parser::{Parser, emit_unclosed_delims};
use crate::parse::token::{Nonterminal, TokenKind};
use crate::tokenstream::{self, TokenStream, TokenTree};
use crate::print::pprust;
use crate::symbol::Symbol;

Expand All @@ -24,6 +23,8 @@ use std::borrow::Cow;
use std::path::{Path, PathBuf};
use std::str;

use log::info;

#[cfg(test)]
mod tests;

Expand Down Expand Up @@ -407,3 +408,132 @@ impl SeqSep {
}
}
}

// NOTE(Centril): The following probably shouldn't be here but it acknowledges the
// fact that architecturally, we are using parsing (read on below to understand why).

pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> TokenStream {
// A `Nonterminal` is often a parsed AST item. At this point we now
// need to convert the parsed AST to an actual token stream, e.g.
// un-parse it basically.
//
// Unfortunately there's not really a great way to do that in a
// guaranteed lossless fashion right now. The fallback here is to just
// stringify the AST node and reparse it, but this loses all span
// information.
//
// As a result, some AST nodes are annotated with the token stream they
// came from. Here we attempt to extract these lossless token streams
// before we fall back to the stringification.
let tokens = match *nt {
Nonterminal::NtItem(ref item) => {
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
}
Nonterminal::NtTraitItem(ref item) => {
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
}
Nonterminal::NtImplItem(ref item) => {
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
}
Nonterminal::NtIdent(ident, is_raw) => {
Some(tokenstream::TokenTree::token(token::Ident(ident.name, is_raw), ident.span).into())
}
Nonterminal::NtLifetime(ident) => {
Some(tokenstream::TokenTree::token(token::Lifetime(ident.name), ident.span).into())
}
Nonterminal::NtTT(ref tt) => {
Some(tt.clone().into())
}
_ => None,
};

// FIXME(#43081): Avoid this pretty-print + reparse hack
let source = pprust::nonterminal_to_string(nt);
let filename = FileName::macro_expansion_source_code(&source);
let tokens_for_real = parse_stream_from_source_str(filename, source, sess, Some(span));

// During early phases of the compiler the AST could get modified
// directly (e.g., attributes added or removed) and the internal cache
// of tokens my not be invalidated or updated. Consequently if the
// "lossless" token stream disagrees with our actual stringification
// (which has historically been much more battle-tested) then we go
// with the lossy stream anyway (losing span information).
//
// Note that the comparison isn't `==` here to avoid comparing spans,
// but it *also* is a "probable" equality which is a pretty weird
// definition. We mostly want to catch actual changes to the AST
// like a `#[cfg]` being processed or some weird `macro_rules!`
// expansion.
//
// What we *don't* want to catch is the fact that a user-defined
// literal like `0xf` is stringified as `15`, causing the cached token
// stream to not be literal `==` token-wise (ignoring spans) to the
// token stream we got from stringification.
//
// Instead the "probably equal" check here is "does each token
// recursively have the same discriminant?" We basically don't look at
// the token values here and assume that such fine grained token stream
// modifications, including adding/removing typically non-semantic
// tokens such as extra braces and commas, don't happen.
if let Some(tokens) = tokens {
if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
return tokens
}
info!("cached tokens found, but they're not \"probably equal\", \
going with stringified version");
}
return tokens_for_real
}

fn prepend_attrs(
sess: &ParseSess,
attrs: &[ast::Attribute],
tokens: Option<&tokenstream::TokenStream>,
span: syntax_pos::Span
) -> Option<tokenstream::TokenStream> {
let tokens = tokens?;
if attrs.len() == 0 {
return Some(tokens.clone())
}
let mut builder = tokenstream::TokenStreamBuilder::new();
for attr in attrs {
assert_eq!(attr.style, ast::AttrStyle::Outer,
"inner attributes should prevent cached tokens from existing");

let source = pprust::attribute_to_string(attr);
let macro_filename = FileName::macro_expansion_source_code(&source);
if attr.is_sugared_doc {
let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
builder.push(stream);
continue
}

// synthesize # [ $path $tokens ] manually here
let mut brackets = tokenstream::TokenStreamBuilder::new();

// For simple paths, push the identifier directly
if attr.path.segments.len() == 1 && attr.path.segments[0].args.is_none() {
let ident = attr.path.segments[0].ident;
let token = token::Ident(ident.name, ident.as_str().starts_with("r#"));
brackets.push(tokenstream::TokenTree::token(token, ident.span));

// ... and for more complicated paths, fall back to a reparse hack that
// should eventually be removed.
} else {
let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
brackets.push(stream);
}

brackets.push(attr.tokens.clone());

// The span we list here for `#` and for `[ ... ]` are both wrong in
// that it encompasses more than each token, but it hopefully is "good
// enough" for now at least.
builder.push(tokenstream::TokenTree::token(token::Pound, attr.span));
let delim_span = tokenstream::DelimSpan::from_single(attr.span);
builder.push(tokenstream::TokenTree::Delimited(
delim_span, token::DelimToken::Bracket, brackets.build().into()));
}
builder.push(tokens.clone());
Some(builder.build())
}
137 changes: 3 additions & 134 deletions src/libsyntax/parse/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,13 @@ pub use DelimToken::*;
pub use LitKind::*;
pub use TokenKind::*;

use crate::ast::{self};
use crate::parse::{parse_stream_from_source_str, ParseSess};
use crate::print::pprust;
use crate::ast;
use crate::ptr::P;
use crate::symbol::kw;
use crate::tokenstream::{self, DelimSpan, TokenStream, TokenTree};
use crate::tokenstream::TokenTree;

use syntax_pos::symbol::Symbol;
use syntax_pos::{self, Span, FileName, DUMMY_SP};
use log::info;
use syntax_pos::{self, Span, DUMMY_SP};

use std::fmt;
use std::mem;
Expand Down Expand Up @@ -737,131 +734,3 @@ impl fmt::Debug for Nonterminal {
}
}
}

impl Nonterminal {
pub fn to_tokenstream(&self, sess: &ParseSess, span: Span) -> TokenStream {
// A `Nonterminal` is often a parsed AST item. At this point we now
// need to convert the parsed AST to an actual token stream, e.g.
// un-parse it basically.
//
// Unfortunately there's not really a great way to do that in a
// guaranteed lossless fashion right now. The fallback here is to just
// stringify the AST node and reparse it, but this loses all span
// information.
//
// As a result, some AST nodes are annotated with the token stream they
// came from. Here we attempt to extract these lossless token streams
// before we fall back to the stringification.
let tokens = match *self {
Nonterminal::NtItem(ref item) => {
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
}
Nonterminal::NtTraitItem(ref item) => {
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
}
Nonterminal::NtImplItem(ref item) => {
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
}
Nonterminal::NtIdent(ident, is_raw) => {
Some(TokenTree::token(Ident(ident.name, is_raw), ident.span).into())
}
Nonterminal::NtLifetime(ident) => {
Some(TokenTree::token(Lifetime(ident.name), ident.span).into())
}
Nonterminal::NtTT(ref tt) => {
Some(tt.clone().into())
}
_ => None,
};

// FIXME(#43081): Avoid this pretty-print + reparse hack
let source = pprust::nonterminal_to_string(self);
let filename = FileName::macro_expansion_source_code(&source);
let tokens_for_real = parse_stream_from_source_str(filename, source, sess, Some(span));

// During early phases of the compiler the AST could get modified
// directly (e.g., attributes added or removed) and the internal cache
// of tokens my not be invalidated or updated. Consequently if the
// "lossless" token stream disagrees with our actual stringification
// (which has historically been much more battle-tested) then we go
// with the lossy stream anyway (losing span information).
//
// Note that the comparison isn't `==` here to avoid comparing spans,
// but it *also* is a "probable" equality which is a pretty weird
// definition. We mostly want to catch actual changes to the AST
// like a `#[cfg]` being processed or some weird `macro_rules!`
// expansion.
//
// What we *don't* want to catch is the fact that a user-defined
// literal like `0xf` is stringified as `15`, causing the cached token
// stream to not be literal `==` token-wise (ignoring spans) to the
// token stream we got from stringification.
//
// Instead the "probably equal" check here is "does each token
// recursively have the same discriminant?" We basically don't look at
// the token values here and assume that such fine grained token stream
// modifications, including adding/removing typically non-semantic
// tokens such as extra braces and commas, don't happen.
if let Some(tokens) = tokens {
if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
return tokens
}
info!("cached tokens found, but they're not \"probably equal\", \
going with stringified version");
}
return tokens_for_real
}
}

fn prepend_attrs(sess: &ParseSess,
attrs: &[ast::Attribute],
tokens: Option<&tokenstream::TokenStream>,
span: syntax_pos::Span)
-> Option<tokenstream::TokenStream>
{
let tokens = tokens?;
if attrs.len() == 0 {
return Some(tokens.clone())
}
let mut builder = tokenstream::TokenStreamBuilder::new();
for attr in attrs {
assert_eq!(attr.style, ast::AttrStyle::Outer,
"inner attributes should prevent cached tokens from existing");

let source = pprust::attribute_to_string(attr);
let macro_filename = FileName::macro_expansion_source_code(&source);
if attr.is_sugared_doc {
let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
builder.push(stream);
continue
}

// synthesize # [ $path $tokens ] manually here
let mut brackets = tokenstream::TokenStreamBuilder::new();

// For simple paths, push the identifier directly
if attr.path.segments.len() == 1 && attr.path.segments[0].args.is_none() {
let ident = attr.path.segments[0].ident;
let token = Ident(ident.name, ident.as_str().starts_with("r#"));
brackets.push(tokenstream::TokenTree::token(token, ident.span));

// ... and for more complicated paths, fall back to a reparse hack that
// should eventually be removed.
} else {
let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
brackets.push(stream);
}

brackets.push(attr.tokens.clone());

// The span we list here for `#` and for `[ ... ]` are both wrong in
// that it encompasses more than each token, but it hopefully is "good
// enough" for now at least.
builder.push(tokenstream::TokenTree::token(Pound, attr.span));
let delim_span = DelimSpan::from_single(attr.span);
builder.push(tokenstream::TokenTree::Delimited(
delim_span, DelimToken::Bracket, brackets.build().into()));
}
builder.push(tokens.clone());
Some(builder.build())
}

0 comments on commit 28d08f3

Please sign in to comment.