From f702b20dfd22990a326af9221cb3ed9b389c8307 Mon Sep 17 00:00:00 2001 From: Eduard Burtescu Date: Fri, 10 Jun 2016 13:00:21 +0300 Subject: [PATCH] rustc_save_analysis: don't pollute the codemap with fake files. --- src/librustc_save_analysis/span_utils.rs | 67 ++++++------------------ src/libsyntax/parse/lexer/mod.rs | 46 +++++++++++++--- 2 files changed, 54 insertions(+), 59 deletions(-) diff --git a/src/librustc_save_analysis/span_utils.rs b/src/librustc_save_analysis/span_utils.rs index 89525b27ed36a..b5add6404fc9f 100644 --- a/src/librustc_save_analysis/span_utils.rs +++ b/src/librustc_save_analysis/span_utils.rs @@ -17,7 +17,6 @@ use std::env; use std::path::Path; use syntax::ast; -use syntax::parse::filemap_to_tts; use syntax::parse::lexer::{self, StringReader}; use syntax::parse::token::{self, Token}; use syntax::symbol::keywords; @@ -49,23 +48,6 @@ impl<'a> SpanUtils<'a> { } } - // sub_span starts at span.lo, so we need to adjust the positions etc. - // If sub_span is None, we don't need to adjust. - pub fn make_sub_span(&self, span: Span, sub_span: Option) -> Option { - match sub_span { - None => None, - Some(sub) => { - let FileMapAndBytePos {fm, pos} = self.sess.codemap().lookup_byte_offset(span.lo); - let base = pos + fm.start_pos; - Some(Span { - lo: base + self.sess.codemap().lookup_byte_offset(sub.lo).pos, - hi: base + self.sess.codemap().lookup_byte_offset(sub.hi).pos, - expn_id: span.expn_id, - }) - } - } - } - pub fn snippet(&self, span: Span) -> String { match self.sess.codemap().span_to_snippet(span) { Ok(s) => s, @@ -74,24 +56,7 @@ impl<'a> SpanUtils<'a> { } pub fn retokenise_span(&self, span: Span) -> StringReader<'a> { - // sadness - we don't have spans for sub-expressions nor access to the tokens - // so in order to get extents for the function name itself (which dxr expects) - // we need to re-tokenise the fn definition - - // Note: this is a bit awful - it adds the contents of span to the end of - // the codemap as a new filemap. This is mostly OK, but means we should - // not iterate over the codemap. Also, any spans over the new filemap - // are incompatible with spans over other filemaps. - let filemap = self.sess - .codemap() - .new_filemap(String::from(""), None, self.snippet(span)); - lexer::StringReader::new(&self.sess.parse_sess, filemap) - } - - fn span_to_tts(&self, span: Span) -> Vec { - let filename = String::from(""); - let filemap = self.sess.codemap().new_filemap(filename, None, self.snippet(span)); - filemap_to_tts(&self.sess.parse_sess, filemap) + lexer::StringReader::retokenize(&self.sess.parse_sess, span) } // Re-parses a path and returns the span for the last identifier in the path @@ -103,7 +68,7 @@ impl<'a> SpanUtils<'a> { loop { let ts = toks.real_token(); if ts.tok == token::Eof { - return self.make_sub_span(span, result) + return result } if bracket_count == 0 && (ts.tok.is_ident() || ts.tok.is_keyword(keywords::SelfValue)) { result = Some(ts.sp); @@ -128,7 +93,7 @@ impl<'a> SpanUtils<'a> { return None; } if bracket_count == 0 && (ts.tok.is_ident() || ts.tok.is_keyword(keywords::SelfValue)) { - return self.make_sub_span(span, Some(ts.sp)); + return Some(ts.sp); } bracket_count += match ts.tok { @@ -178,10 +143,7 @@ impl<'a> SpanUtils<'a> { } prev = next; } - if result.is_none() && prev_span.is_some() { - return self.make_sub_span(span, prev_span); - } - return self.make_sub_span(span, result); + result.or(prev_span) } // Return the span for the last ident before a `<` and outside any @@ -241,9 +203,9 @@ impl<'a> SpanUtils<'a> { loc.line); } if result.is_none() && prev.tok.is_ident() && angle_count == 0 { - return self.make_sub_span(span, Some(prev.sp)); + return Some(prev.sp); } - self.make_sub_span(span, result) + result } // Reparse span and return an owned vector of sub spans of the first limit @@ -310,7 +272,7 @@ impl<'a> SpanUtils<'a> { angle_count += 1; } if ts.tok.is_ident() && angle_count == nesting { - result.push(self.make_sub_span(span, Some(ts.sp)).unwrap()); + result.push(ts.sp); } } } @@ -320,8 +282,11 @@ impl<'a> SpanUtils<'a> { /// end of the 'signature' part, that is up to, but not including an opening /// brace or semicolon. pub fn signature_string_for_span(&self, span: Span) -> String { - let mut toks = self.span_to_tts(span).into_iter(); + let mut toks = self.retokenise_span(span); + toks.real_token(); + let mut toks = toks.parse_all_token_trees().unwrap().into_iter(); let mut prev = toks.next().unwrap(); + let first_span = prev.get_span(); let mut angle_count = 0; for tok in toks { @@ -360,7 +325,7 @@ impl<'a> SpanUtils<'a> { } let next = toks.real_token(); if next.tok == tok { - return self.make_sub_span(span, Some(prev.sp)); + return Some(prev.sp); } prev = next; } @@ -374,7 +339,7 @@ impl<'a> SpanUtils<'a> { return None; } if next.tok == tok { - return self.make_sub_span(span, Some(next.sp)); + return Some(next.sp); } } } @@ -399,7 +364,7 @@ impl<'a> SpanUtils<'a> { if ts.tok == token::Eof { return None } else { - return self.make_sub_span(span, Some(ts.sp)); + return Some(ts.sp); } } } @@ -444,7 +409,7 @@ impl<'a> SpanUtils<'a> { if ts.tok == token::Not { let ts = toks.real_token(); if ts.tok.is_ident() { - return self.make_sub_span(span, Some(ts.sp)); + return Some(ts.sp); } else { return None; } @@ -463,7 +428,7 @@ impl<'a> SpanUtils<'a> { let ts = toks.real_token(); if ts.tok == token::Not { if prev.tok.is_ident() { - return self.make_sub_span(span, Some(prev.sp)); + return Some(prev.sp); } else { return None; } diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 6bc15115b09d3..b7f6e6a2384f7 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -51,10 +51,10 @@ pub struct StringReader<'a> { pub filemap: Rc, /// If Some, stop reading the source at this position (inclusive). pub terminator: Option, - /// Whether to record new-lines in filemap. This is only necessary the first - /// time a filemap is lexed. If part of a filemap is being re-lexed, this - /// should be set to false. - pub save_new_lines: bool, + /// Whether to record new-lines and multibyte chars in filemap. + /// This is only necessary the first time a filemap is lexed. + /// If part of a filemap is being re-lexed, this should be set to false. + pub save_new_lines_and_multibyte: bool, // cached: pub peek_tok: token::Token, pub peek_span: Span, @@ -162,7 +162,7 @@ impl<'a> StringReader<'a> { ch: Some('\n'), filemap: filemap, terminator: None, - save_new_lines: true, + save_new_lines_and_multibyte: true, // dummy values; not read peek_tok: token::Eof, peek_span: syntax_pos::DUMMY_SP, @@ -183,6 +183,31 @@ impl<'a> StringReader<'a> { sr } + pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self { + let begin = sess.codemap().lookup_byte_offset(span.lo); + let end = sess.codemap().lookup_byte_offset(span.hi); + + // Make the range zero-length if the span is invalid. + if span.lo > span.hi || begin.fm.start_pos != end.fm.start_pos { + span.hi = span.lo; + } + + let mut sr = StringReader::new_raw_internal(sess, begin.fm); + + // Seek the lexer to the right byte range. + sr.save_new_lines_and_multibyte = false; + sr.next_pos = span.lo; + sr.terminator = Some(span.hi); + + sr.bump(); + + if let Err(_) = sr.advance_token() { + sr.emit_fatal_errors(); + panic!(FatalError); + } + sr + } + pub fn ch_is(&self, c: char) -> bool { self.ch == Some(c) } @@ -378,7 +403,10 @@ impl<'a> StringReader<'a> { pub fn bump(&mut self) { let new_pos = self.next_pos; let new_byte_offset = self.byte_offset(new_pos).to_usize(); - if new_byte_offset < self.source_text.len() { + let end = self.terminator.map_or(self.source_text.len(), |t| { + self.byte_offset(t).to_usize() + }); + if new_byte_offset < end { let old_ch_is_newline = self.ch.unwrap() == '\n'; let new_ch = char_at(&self.source_text, new_byte_offset); let new_ch_len = new_ch.len_utf8(); @@ -387,7 +415,7 @@ impl<'a> StringReader<'a> { self.pos = new_pos; self.next_pos = new_pos + Pos::from_usize(new_ch_len); if old_ch_is_newline { - if self.save_new_lines { + if self.save_new_lines_and_multibyte { self.filemap.next_line(self.pos); } self.col = CharPos(0); @@ -395,7 +423,9 @@ impl<'a> StringReader<'a> { self.col = self.col + CharPos(1); } if new_ch_len > 1 { - self.filemap.record_multibyte_char(self.pos, new_ch_len); + if self.save_new_lines_and_multibyte { + self.filemap.record_multibyte_char(self.pos, new_ch_len); + } } } else { self.ch = None;