Skip to content

Commit

Permalink
quote_expr macro: embed Ident using special encoding that preserves h…
Browse files Browse the repository at this point in the history
…ygiene.

This adds support to `quote_expr!` and friends for round-trip hygienic
preservation of Ident.

Here are the pieces of the puzzle:

* adding a method for encoding Ident for re-reading into token tree.

* Support for reading such encoded Idents in the lexer.  Note that one
  must peek ahead for MOD_SEP after scan_embedded_hygienic_ident.

* To ensure that encoded Idents are only read when we are in the midst
  of expanding a `quote_expr` or similar, added a
  `read_embedded_ident` flag on `StringReader`.

* pprust support for encoding Ident's as (uint,uint) pairs (for hygiene).
  • Loading branch information
pnkfelix committed Aug 13, 2014
1 parent 9d55421 commit c3ce245
Show file tree
Hide file tree
Showing 5 changed files with 345 additions and 31 deletions.
6 changes: 6 additions & 0 deletions src/libsyntax/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ impl Ident {
pub fn as_str<'a>(&'a self) -> &'a str {
self.name.as_str()
}

pub fn encode_with_hygiene(&self) -> String {
format!("\x00name_{:u},ctxt_{:u}\x00",
self.name.uint(),
self.ctxt)
}
}

impl Show for Ident {
Expand Down
99 changes: 96 additions & 3 deletions src/libsyntax/ext/quote.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,20 +97,39 @@ pub mod rt {
fn to_source(&self) -> String;
}

// FIXME (Issue #16472): This should go away after ToToken impls
// are revised to go directly to token-trees.
trait ToSourceWithHygiene : ToSource {
// Takes a thing and generates a string containing rust code
// for it, encoding Idents as special byte sequences to
// maintain hygiene across serialization and deserialization.
fn to_source_with_hygiene(&self) -> String;
}

macro_rules! impl_to_source(
(Gc<$t:ty>, $pp:ident) => (
impl ToSource for Gc<$t> {
fn to_source(&self) -> String {
pprust::$pp(&**self)
}
}
impl ToSourceWithHygiene for Gc<$t> {
fn to_source_with_hygiene(&self) -> String {
pprust::with_hygiene::$pp(&**self)
}
}
);
($t:ty, $pp:ident) => (
impl ToSource for $t {
fn to_source(&self) -> String {
pprust::$pp(self)
}
}
impl ToSourceWithHygiene for $t {
fn to_source_with_hygiene(&self) -> String {
pprust::with_hygiene::$pp(self)
}
}
);
)

Expand All @@ -122,13 +141,28 @@ pub mod rt {
.to_string()
}

fn slice_to_source_with_hygiene<'a, T: ToSourceWithHygiene>(
sep: &'static str, xs: &'a [T]) -> String {
xs.iter()
.map(|i| i.to_source_with_hygiene())
.collect::<Vec<String>>()
.connect(sep)
.to_string()
}

macro_rules! impl_to_source_slice(
($t:ty, $sep:expr) => (
impl<'a> ToSource for &'a [$t] {
fn to_source(&self) -> String {
slice_to_source($sep, *self)
}
}

impl<'a> ToSourceWithHygiene for &'a [$t] {
fn to_source_with_hygiene(&self) -> String {
slice_to_source_with_hygiene($sep, *self)
}
}
)
)

Expand All @@ -138,6 +172,12 @@ pub mod rt {
}
}

impl ToSourceWithHygiene for ast::Ident {
fn to_source_with_hygiene(&self) -> String {
self.encode_with_hygiene()
}
}

impl_to_source!(ast::Ty, ty_to_string)
impl_to_source!(ast::Block, block_to_string)
impl_to_source!(ast::Arg, arg_to_string)
Expand All @@ -156,6 +196,11 @@ pub mod rt {
pprust::attribute_to_string(&dummy_spanned(*self))
}
}
impl ToSourceWithHygiene for ast::Attribute_ {
fn to_source_with_hygiene(&self) -> String {
self.to_source()
}
}

impl<'a> ToSource for &'a str {
fn to_source(&self) -> String {
Expand All @@ -164,26 +209,46 @@ pub mod rt {
pprust::lit_to_string(&lit)
}
}
impl<'a> ToSourceWithHygiene for &'a str {
fn to_source_with_hygiene(&self) -> String {
self.to_source()
}
}

impl ToSource for () {
fn to_source(&self) -> String {
"()".to_string()
}
}
impl ToSourceWithHygiene for () {
fn to_source_with_hygiene(&self) -> String {
self.to_source()
}
}

impl ToSource for bool {
fn to_source(&self) -> String {
let lit = dummy_spanned(ast::LitBool(*self));
pprust::lit_to_string(&lit)
}
}
impl ToSourceWithHygiene for bool {
fn to_source_with_hygiene(&self) -> String {
self.to_source()
}
}

impl ToSource for char {
fn to_source(&self) -> String {
let lit = dummy_spanned(ast::LitChar(*self));
pprust::lit_to_string(&lit)
}
}
impl ToSourceWithHygiene for char {
fn to_source_with_hygiene(&self) -> String {
self.to_source()
}
}

macro_rules! impl_to_source_int(
(signed, $t:ty, $tag:ident) => (
Expand All @@ -194,6 +259,11 @@ pub mod rt {
pprust::lit_to_string(&dummy_spanned(lit))
}
}
impl ToSourceWithHygiene for $t {
fn to_source_with_hygiene(&self) -> String {
self.to_source()
}
}
);
(unsigned, $t:ty, $tag:ident) => (
impl ToSource for $t {
Expand All @@ -202,6 +272,11 @@ pub mod rt {
pprust::lit_to_string(&dummy_spanned(lit))
}
}
impl ToSourceWithHygiene for $t {
fn to_source_with_hygiene(&self) -> String {
self.to_source()
}
}
);
)

Expand All @@ -223,7 +298,7 @@ pub mod rt {
($t:ty) => (
impl ToTokens for $t {
fn to_tokens(&self, cx: &ExtCtxt) -> Vec<TokenTree> {
cx.parse_tts(self.to_source())
cx.parse_tts_with_hygiene(self.to_source_with_hygiene())
}
}
)
Expand All @@ -233,7 +308,7 @@ pub mod rt {
($t:ty) => (
impl<'a> ToTokens for $t {
fn to_tokens(&self, cx: &ExtCtxt) -> Vec<TokenTree> {
cx.parse_tts(self.to_source())
cx.parse_tts_with_hygiene(self.to_source_with_hygiene())
}
}
)
Expand Down Expand Up @@ -272,7 +347,13 @@ pub mod rt {
fn parse_item(&self, s: String) -> Gc<ast::Item>;
fn parse_expr(&self, s: String) -> Gc<ast::Expr>;
fn parse_stmt(&self, s: String) -> Gc<ast::Stmt>;
fn parse_tts(&self, s: String) -> Vec<ast::TokenTree> ;
fn parse_tts(&self, s: String) -> Vec<ast::TokenTree>;
}

trait ExtParseUtilsWithHygiene {
// FIXME (Issue #16472): This should go away after ToToken impls
// are revised to go directly to token-trees.
fn parse_tts_with_hygiene(&self, s: String) -> Vec<ast::TokenTree>;
}

impl<'a> ExtParseUtils for ExtCtxt<'a> {
Expand Down Expand Up @@ -315,6 +396,18 @@ pub mod rt {
}
}

impl<'a> ExtParseUtilsWithHygiene for ExtCtxt<'a> {

fn parse_tts_with_hygiene(&self, s: String) -> Vec<ast::TokenTree> {
use parse::with_hygiene::parse_tts_from_source_str;
parse_tts_from_source_str("<quote expansion>".to_string(),
s,
self.cfg(),
self.parse_sess())
}

}

}

pub fn expand_quote_tokens(cx: &mut ExtCtxt,
Expand Down
105 changes: 105 additions & 0 deletions src/libsyntax/parse/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ use parse::token;
use parse::token::{str_to_ident};

use std::char;
use std::fmt;
use std::mem::replace;
use std::num;
use std::rc::Rc;
use std::str;

Expand Down Expand Up @@ -55,6 +57,11 @@ pub struct StringReader<'a> {
/* cached: */
pub peek_tok: token::Token,
pub peek_span: Span,

// FIXME (Issue #16472): This field should go away after ToToken impls
// are revised to go directly to token-trees.
/// Is \x00<name>,<ctxt>\x00 is interpreted as encoded ast::Ident?
read_embedded_ident: bool,
}

impl<'a> Reader for StringReader<'a> {
Expand Down Expand Up @@ -106,6 +113,17 @@ impl<'a> Reader for TtReader<'a> {
}
}

// FIXME (Issue #16472): This function should go away after
// ToToken impls are revised to go directly to token-trees.
pub fn make_reader_with_embedded_idents<'b>(span_diagnostic: &'b SpanHandler,
filemap: Rc<codemap::FileMap>)
-> StringReader<'b> {
let mut sr = StringReader::new_raw(span_diagnostic, filemap);
sr.read_embedded_ident = true;
sr.advance_token();
sr
}

impl<'a> StringReader<'a> {
/// For comments.rs, which hackily pokes into pos and curr
pub fn new_raw<'b>(span_diagnostic: &'b SpanHandler,
Expand All @@ -120,6 +138,7 @@ impl<'a> StringReader<'a> {
/* dummy values; not read */
peek_tok: token::EOF,
peek_span: codemap::DUMMY_SP,
read_embedded_ident: false,
};
sr.bump();
sr
Expand Down Expand Up @@ -512,6 +531,81 @@ impl<'a> StringReader<'a> {
})
}

// FIXME (Issue #16472): The scan_embedded_hygienic_ident function
// should go away after we revise the syntax::ext::quote::ToToken
// impls to go directly to token-trees instead of thing -> string
// -> token-trees. (The function is currently used to resolve
// Issues #15750 and #15962.)
//
// Since this function is only used for certain internal macros,
// and the functionality it provides is not exposed to end user
// programs, pnkfelix deliberately chose to write it in a way that
// favors rustc debugging effectiveness over runtime efficiency.

/// Scan through input of form \x00name_NNNNNN,ctxt_CCCCCCC\x00
/// where: `NNNNNN` is a string of characters forming an integer
/// (the name) and `CCCCCCC` is a string of characters forming an
/// integer (the ctxt), separate by a comma and delimited by a
/// `\x00` marker.
#[inline(never)]
fn scan_embedded_hygienic_ident(&mut self) -> ast::Ident {
fn bump_expecting_char<'a,D:fmt::Show>(r: &mut StringReader<'a>,
c: char,
described_c: D,
where: &str) {
match r.curr {
Some(r_c) if r_c == c => r.bump(),
Some(r_c) => fail!("expected {}, hit {}, {}", described_c, r_c, where),
None => fail!("expected {}, hit EOF, {}", described_c, where),
}
}

let where = "while scanning embedded hygienic ident";

// skip over the leading `\x00`
bump_expecting_char(self, '\x00', "nul-byte", where);

// skip over the "name_"
for c in "name_".chars() {
bump_expecting_char(self, c, c, where);
}

let start_bpos = self.last_pos;
let base = 10;

// find the integer representing the name
self.scan_digits(base);
let encoded_name : u32 = self.with_str_from(start_bpos, |s| {
num::from_str_radix(s, 10).unwrap_or_else(|| {
fail!("expected digits representing a name, got `{}`, {}, range [{},{}]",
s, where, start_bpos, self.last_pos);
})
});

// skip over the `,`
bump_expecting_char(self, ',', "comma", where);

// skip over the "ctxt_"
for c in "ctxt_".chars() {
bump_expecting_char(self, c, c, where);
}

// find the integer representing the ctxt
let start_bpos = self.last_pos;
self.scan_digits(base);
let encoded_ctxt : ast::SyntaxContext = self.with_str_from(start_bpos, |s| {
num::from_str_radix(s, 10).unwrap_or_else(|| {
fail!("expected digits representing a ctxt, got `{}`, {}", s, where);
})
});

// skip over the `\x00`
bump_expecting_char(self, '\x00', "nul-byte", where);

ast::Ident { name: ast::Name(encoded_name),
ctxt: encoded_ctxt, }
}

/// Scan through any digits (base `radix`) or underscores, and return how
/// many digits there were.
fn scan_digits(&mut self, radix: uint) -> uint {
Expand Down Expand Up @@ -839,6 +933,17 @@ impl<'a> StringReader<'a> {
return self.scan_number(c.unwrap());
}

if self.read_embedded_ident {
match (c.unwrap(), self.nextch(), self.nextnextch()) {
('\x00', Some('n'), Some('a')) => {
let ast_ident = self.scan_embedded_hygienic_ident();
let is_mod_name = self.curr_is(':') && self.nextch_is(':');
return token::IDENT(ast_ident, is_mod_name);
}
_ => {}
}
}

match c.expect("next_token_inner called at EOF") {
// One-byte tokens.
';' => { self.bump(); return token::SEMI; }
Expand Down
Loading

0 comments on commit c3ce245

Please sign in to comment.