Skip to content

Commit

Permalink
Serialize all foreign SourceFiles into proc-macro crate metadata
Browse files Browse the repository at this point in the history
Normally, we encode a `Span` that references a foreign `SourceFile` by
encoding information about the foreign crate. When we decode this
`Span`, we lookup the foreign crate in order to decode the `SourceFile`.

However, this approach does not work for proc-macro crates. When we load
a proc-macro crate, we do not deserialzie any of its dependencies (since
a proc-macro crate can only export proc-macros). This means that we
cannot serialize a reference to an upstream crate, since the associated
metadata will not be available when we try to deserialize it.

This commit modifies foreign span handling so that we treat all foreign
`SourceFile`s as local `SourceFile`s when serializing a proc-macro.
All `SourceFile`s will be stored into the metadata of a proc-macro
crate, allowing us to cotinue to deserialize a proc-macro crate without
needing to load any of its dependencies.

Since the number of foreign `SourceFile`s that we load during a
compilation session may be very large, we only serialize a `SourceFile`
if we have also serialized a `Span` which requires it.
  • Loading branch information
Aaron1011 committed Jun 27, 2020
1 parent 394e1b4 commit 2765149
Show file tree
Hide file tree
Showing 13 changed files with 220 additions and 43 deletions.
27 changes: 15 additions & 12 deletions src/librustc_metadata/rmeta/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -450,19 +450,17 @@ impl<'a, 'tcx> SpecializedDecoder<Span> for DecodeContext<'a, 'tcx> {
let imported_source_files = if tag == TAG_VALID_SPAN_LOCAL {
self.cdata().imported_source_files(sess)
} else {
// FIXME: We don't decode dependencies of proc-macros.
// Remove this once #69976 is merged
// When we encode a proc-macro crate, all `Span`s should be encoded
// with `TAG_VALID_SPAN_LOCAL`
if self.cdata().root.is_proc_macro_crate() {
debug!(
"SpecializedDecoder<Span>::specialized_decode: skipping span for proc-macro crate {:?}",
self.cdata().cnum
);
// Decode `CrateNum` as u32 - using `CrateNum::decode` will ICE
// since we don't have `cnum_map` populated.
// This advances the decoder position so that we can continue
// to read metadata.
let _ = u32::decode(self)?;
return Ok(DUMMY_SP);
let cnum = u32::decode(self)?;
panic!(
"Decoding of crate {:?} tried to access proc-macro dep {:?}",
self.cdata().root.name,
cnum
);
}
// tag is TAG_VALID_SPAN_FOREIGN, checked by `debug_assert` above
let cnum = CrateNum::decode(self)?;
Expand Down Expand Up @@ -990,8 +988,13 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
DefKind::Macro(macro_kind(raw_macro)),
self.local_def_id(def_index),
);
let ident = Ident::from_str(raw_macro.name());
callback(Export { ident, res, vis: ty::Visibility::Public, span: DUMMY_SP });
let ident = self.item_ident(def_index, sess);
callback(Export {
ident,
res,
vis: ty::Visibility::Public,
span: self.get_span(def_index, sess),
});
}
}
return;
Expand Down
102 changes: 78 additions & 24 deletions src/librustc_metadata/rmeta/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use rustc_hir::intravisit::{self, NestedVisitorMap, Visitor};
use rustc_hir::itemlikevisit::{ItemLikeVisitor, ParItemLikeVisitor};
use rustc_hir::lang_items;
use rustc_hir::{AnonConst, GenericParamKind};
use rustc_index::bit_set::GrowableBitSet;
use rustc_index::vec::Idx;
use rustc_middle::hir::map::Map;
use rustc_middle::middle::cstore::{EncodedMetadata, ForeignModule, LinkagePreference, NativeLib};
Expand Down Expand Up @@ -51,7 +52,20 @@ struct EncodeContext<'tcx> {
interpret_allocs_inverse: Vec<interpret::AllocId>,

// This is used to speed up Span encoding.
source_file_cache: Lrc<SourceFile>,
// The `usize` is an index into the `MonotonicVec`
// that stores the `SourceFile`
source_file_cache: (Lrc<SourceFile>, usize),
// The indices (into the `SourceMap`'s `MonotonicVec`)
// of all of the `SourceFiles` that we need to serialize.
// When we serialize a `Span`, we insert the index of its
// `SourceFile` into the `GrowableBitSet`.
//
// This needs to be a `GrowableBitSet` and not a
// regular `BitSet` because we may actually import new `SourceFiles`
// during metadata encoding, due to executing a query
// with a result containing a foreign `Span`.
required_source_files: Option<GrowableBitSet<usize>>,
is_proc_macro: bool,
}

macro_rules! encoder_methods {
Expand Down Expand Up @@ -154,18 +168,23 @@ impl<'tcx> SpecializedEncoder<Span> for EncodeContext<'tcx> {
// The Span infrastructure should make sure that this invariant holds:
debug_assert!(span.lo <= span.hi);

if !self.source_file_cache.contains(span.lo) {
if !self.source_file_cache.0.contains(span.lo) {
let source_map = self.tcx.sess.source_map();
let source_file_index = source_map.lookup_source_file_idx(span.lo);
self.source_file_cache = source_map.files()[source_file_index].clone();
self.source_file_cache =
(source_map.files()[source_file_index].clone(), source_file_index);
}

if !self.source_file_cache.contains(span.hi) {
if !self.source_file_cache.0.contains(span.hi) {
// Unfortunately, macro expansion still sometimes generates Spans
// that malformed in this way.
return TAG_INVALID_SPAN.encode(self);
}

let source_files = self.required_source_files.as_mut().expect("Already encoded SourceMap!");
// Record the fact that we need to encode the data for this `SourceFile`
source_files.insert(self.source_file_cache.1);

// There are two possible cases here:
// 1. This span comes from a 'foreign' crate - e.g. some crate upstream of the
// crate we are writing metadata for. When the metadata for *this* crate gets
Expand All @@ -176,7 +195,13 @@ impl<'tcx> SpecializedEncoder<Span> for EncodeContext<'tcx> {
// 2. This span comes from our own crate. No special hamdling is needed - we just
// write `TAG_VALID_SPAN_LOCAL` to let the deserializer know that it should use
// our own source map information.
let (tag, lo, hi) = if self.source_file_cache.is_imported() {
//
// If we're a proc-macro crate, we always treat this as a local `Span`.
// In `encode_source_map`, we serialize foreign `SourceFile`s into our metadata
// if we're a proc-macro crate.
// This allows us to avoid loading the dependencies of proc-macro crates: all of
// the information we need to decode `Span`s is stored in the proc-macro crate.
let (tag, lo, hi) = if self.source_file_cache.0.is_imported() && !self.is_proc_macro {
// To simplify deserialization, we 'rebase' this span onto the crate it originally came from
// (the crate that 'owns' the file it references. These rebased 'lo' and 'hi' values
// are relative to the source map information for the 'foreign' crate whose CrateNum
Expand All @@ -188,13 +213,13 @@ impl<'tcx> SpecializedEncoder<Span> for EncodeContext<'tcx> {
// Span that can be used without any additional trouble.
let external_start_pos = {
// Introduce a new scope so that we drop the 'lock()' temporary
match &*self.source_file_cache.external_src.lock() {
match &*self.source_file_cache.0.external_src.lock() {
ExternalSource::Foreign { original_start_pos, .. } => *original_start_pos,
src => panic!("Unexpected external source {:?}", src),
}
};
let lo = (span.lo - self.source_file_cache.start_pos) + external_start_pos;
let hi = (span.hi - self.source_file_cache.start_pos) + external_start_pos;
let lo = (span.lo - self.source_file_cache.0.start_pos) + external_start_pos;
let hi = (span.hi - self.source_file_cache.0.start_pos) + external_start_pos;

(TAG_VALID_SPAN_FOREIGN, lo, hi)
} else {
Expand All @@ -212,7 +237,7 @@ impl<'tcx> SpecializedEncoder<Span> for EncodeContext<'tcx> {
if tag == TAG_VALID_SPAN_FOREIGN {
// This needs to be two lines to avoid holding the `self.source_file_cache`
// while calling `cnum.encode(self)`
let cnum = self.source_file_cache.cnum;
let cnum = self.source_file_cache.0.cnum;
cnum.encode(self)?;
}
Ok(())
Expand Down Expand Up @@ -386,17 +411,24 @@ impl<'tcx> EncodeContext<'tcx> {
let all_source_files = source_map.files();

let (working_dir, _cwd_remapped) = self.tcx.sess.working_dir.clone();
// By replacing the `Option` with `None`, we ensure that we can't
// accidentally serialize any more `Span`s after the source map encoding
// is done.
let required_source_files = self.required_source_files.take().unwrap();

let adapted = all_source_files
.iter()
.filter(|source_file| {
// No need to re-export imported source_files, as any downstream
// crate will import them from their original source.
// FIXME(eddyb) the `Span` encoding should take that into account.
!source_file.is_imported()
.enumerate()
.filter(|(idx, source_file)| {
// Only serialize `SourceFile`s that were used
// during the encoding of a `Span`
required_source_files.contains(*idx) &&
// Don't serialize imported `SourceFile`s, unless
// we're in a proc-macro crate.
(!source_file.is_imported() || self.is_proc_macro)
})
.map(|source_file| {
match source_file.name {
.map(|(_, source_file)| {
let mut adapted = match source_file.name {
// This path of this SourceFile has been modified by
// path-remapping, so we use it verbatim (and avoid
// cloning the whole map in the process).
Expand All @@ -419,15 +451,30 @@ impl<'tcx> EncodeContext<'tcx> {

// expanded code, not from a file
_ => source_file.clone(),
};

// We're serializing this `SourceFile` into our crate metadata,
// so mark it as coming from this crate.
// This also ensures that we don't try to deserialize the
// `CrateNum` for a proc-macro dependency - since proc macro
// dependencies aren't loaded when we deserialize a proc-macro,
// trying to remap the `CrateNum` would fail.
if self.is_proc_macro {
Lrc::make_mut(&mut adapted).cnum = LOCAL_CRATE;
}
adapted
})
.collect::<Vec<_>>();

self.lazy(adapted.iter().map(|rc| &**rc))
}

fn is_proc_macro(&self) -> bool {
self.tcx.sess.crate_types().contains(&CrateType::ProcMacro)
}

fn encode_crate_root(&mut self) -> Lazy<CrateRoot<'tcx>> {
let is_proc_macro = self.tcx.sess.crate_types().contains(&CrateType::ProcMacro);
let is_proc_macro = self.is_proc_macro();

let mut i = self.position();

Expand Down Expand Up @@ -458,11 +505,6 @@ impl<'tcx> EncodeContext<'tcx> {

let foreign_modules = self.encode_foreign_modules();

// Encode source_map
i = self.position();
let source_map = self.encode_source_map();
let source_map_bytes = self.position() - i;

// Encode DefPathTable
i = self.position();
let def_path_table = self.encode_def_path_table();
Expand Down Expand Up @@ -514,12 +556,19 @@ impl<'tcx> EncodeContext<'tcx> {
let proc_macro_data_bytes = self.position() - i;

// Encode exported symbols info. This is prefetched in `encode_metadata` so we encode
// this last to give the prefetching as much time as possible to complete.
// this late to give the prefetching as much time as possible to complete.
i = self.position();
let exported_symbols = self.tcx.exported_symbols(LOCAL_CRATE);
let exported_symbols = self.encode_exported_symbols(&exported_symbols);
let exported_symbols_bytes = self.position() - i;

// Encode source_map. This needs to be done last,
// since encoding `Span`s tells us which `SourceFiles` we actually
// need to encode.
i = self.position();
let source_map = self.encode_source_map();
let source_map_bytes = self.position() - i;

let attrs = tcx.hir().krate_attrs();
let has_default_lib_allocator = attr::contains_name(&attrs, sym::default_lib_allocator);

Expand Down Expand Up @@ -1860,17 +1909,22 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata {
// Will be filled with the root position after encoding everything.
encoder.emit_raw_bytes(&[0, 0, 0, 0]);

let source_map_files = tcx.sess.source_map().files();

let mut ecx = EncodeContext {
opaque: encoder,
tcx,
tables: Default::default(),
lazy_state: LazyState::NoNode,
type_shorthands: Default::default(),
predicate_shorthands: Default::default(),
source_file_cache: tcx.sess.source_map().files()[0].clone(),
source_file_cache: (source_map_files[0].clone(), 0),
interpret_allocs: Default::default(),
interpret_allocs_inverse: Default::default(),
required_source_files: Some(GrowableBitSet::with_capacity(source_map_files.len())),
is_proc_macro: tcx.sess.crate_types().contains(&CrateType::ProcMacro),
};
drop(source_map_files);

// Encode the rustc version string in a predictable location.
rustc_version().encode(&mut ecx).unwrap();
Expand Down
2 changes: 1 addition & 1 deletion src/librustc_metadata/rmeta/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,6 @@ crate struct CrateRoot<'tcx> {
diagnostic_items: Lazy<[(Symbol, DefIndex)]>,
native_libraries: Lazy<[NativeLib]>,
foreign_modules: Lazy<[ForeignModule]>,
source_map: Lazy<[rustc_span::SourceFile]>,
def_path_table: Lazy<rustc_hir::definitions::DefPathTable>,
impls: Lazy<[TraitImpls]>,
interpret_alloc_index: Lazy<[u32]>,
Expand All @@ -203,6 +202,7 @@ crate struct CrateRoot<'tcx> {
proc_macro_data: Option<Lazy<[DefIndex]>>,

exported_symbols: Lazy!([(ExportedSymbol<'tcx>, SymbolExportLevel)]),
source_map: Lazy<[rustc_span::SourceFile]>,

compiler_builtins: bool,
needs_allocator: bool,
Expand Down
3 changes: 2 additions & 1 deletion src/librustc_span/hygiene.rs
Original file line number Diff line number Diff line change
Expand Up @@ -395,10 +395,11 @@ pub fn debug_hygiene_data(verbose: bool) -> String {
data.expn_data.iter().enumerate().for_each(|(id, expn_info)| {
let expn_info = expn_info.as_ref().expect("no expansion data for an expansion ID");
s.push_str(&format!(
"\n{}: parent: {:?}, call_site_ctxt: {:?}, kind: {:?}",
"\n{}: parent: {:?}, call_site_ctxt: {:?}, def_site_ctxt: {:?}, kind: {:?}",
id,
expn_info.parent,
expn_info.call_site.ctxt(),
expn_info.def_site.ctxt(),
expn_info.kind,
));
});
Expand Down
43 changes: 41 additions & 2 deletions src/librustc_span/source_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,41 @@ pub fn original_sp(sp: Span, enclosing_sp: Span) -> Span {
}
}

pub mod monotonic {
use std::ops::{Deref, DerefMut};

/// A `MonotonicVec` is a `Vec` which can only be grown.
/// Once inserted, an element can never be removed or swapped,
/// guaranteeing that any indices into a `MonotonicVec` are stable
// This is declared in its own module to ensure that the private
// field is inaccessible
pub struct MonotonicVec<T>(Vec<T>);
impl<T> MonotonicVec<T> {
pub fn new(val: Vec<T>) -> MonotonicVec<T> {
MonotonicVec(val)
}

pub fn push(&mut self, val: T) {
self.0.push(val);
}
}

impl<T> Default for MonotonicVec<T> {
fn default() -> Self {
MonotonicVec::new(vec![])
}
}

impl<T> Deref for MonotonicVec<T> {
type Target = Vec<T>;
fn deref(&self) -> &Self::Target {
&self.0
}
}

impl<T> !DerefMut for MonotonicVec<T> {}
}

#[derive(Clone, RustcEncodable, RustcDecodable, Debug, Copy, HashStable_Generic)]
pub struct Spanned<T> {
pub node: T,
Expand Down Expand Up @@ -125,7 +160,7 @@ impl StableSourceFileId {

#[derive(Default)]
pub(super) struct SourceMapFiles {
source_files: Vec<Lrc<SourceFile>>,
source_files: monotonic::MonotonicVec<Lrc<SourceFile>>,
stable_id_to_source_file: FxHashMap<StableSourceFileId, Lrc<SourceFile>>,
}

Expand Down Expand Up @@ -199,7 +234,9 @@ impl SourceMap {
Ok(bytes)
}

pub fn files(&self) -> MappedLockGuard<'_, Vec<Lrc<SourceFile>>> {
// By returning a `MonotonicVec`, we ensure that consumers cannot invalidate
// any existing indices pointing into `files`.
pub fn files(&self) -> MappedLockGuard<'_, monotonic::MonotonicVec<Lrc<SourceFile>>> {
LockGuard::map(self.files.borrow(), |files| &mut files.source_files)
}

Expand Down Expand Up @@ -912,6 +949,8 @@ impl SourceMap {
}

// Returns the index of the `SourceFile` (in `self.files`) that contains `pos`.
// This index is guaranteed to be valid for the lifetime of this `SourceMap`,
// since `source_files` is a `MonotonicVec`
pub fn lookup_source_file_idx(&self, pos: BytePos) -> usize {
self.files
.borrow()
Expand Down
4 changes: 2 additions & 2 deletions src/test/ui/hygiene/unpretty-debug.stdout
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ fn y /* 0#0 */() { }

/*
Expansions:
0: parent: ExpnId(0), call_site_ctxt: #0, kind: Root
1: parent: ExpnId(0), call_site_ctxt: #0, kind: Macro(Bang, "foo")
0: parent: ExpnId(0), call_site_ctxt: #0, def_site_ctxt: #0, kind: Root
1: parent: ExpnId(0), call_site_ctxt: #0, def_site_ctxt: #0, kind: Macro(Bang, "foo")

SyntaxContexts:
#0: parent: #0, outer_mark: (ExpnId(0), Opaque)
Expand Down
10 changes: 10 additions & 0 deletions src/test/ui/proc-macro/auxiliary/make-macro.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#[macro_export]
macro_rules! make_it {
($name:ident) => {
#[proc_macro]
pub fn $name(input: TokenStream) -> TokenStream {
println!("Def site: {:?}", Span::def_site());
input
}
};
}
12 changes: 12 additions & 0 deletions src/test/ui/proc-macro/auxiliary/meta-macro.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// force-host
// no-prefer-dynamic
// edition:2018

#![feature(proc_macro_def_site)]
#![crate_type = "proc-macro"]

extern crate proc_macro;
extern crate make_macro;
use proc_macro::{TokenStream, Span};

make_macro::make_it!(print_def_site);
Loading

0 comments on commit 2765149

Please sign in to comment.