Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

syntax: Tweak path parsing logic #37290

Merged
merged 2 commits into from
Oct 21, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/libsyntax/parse/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ pub struct TokenAndSpan {
pub sp: Span,
}

impl Default for TokenAndSpan {
fn default() -> Self {
TokenAndSpan { tok: token::Underscore, sp: syntax_pos::DUMMY_SP }
}
}

pub struct StringReader<'a> {
pub span_diagnostic: &'a Handler,
/// The absolute offset within the codemap of the next character to read
Expand Down
111 changes: 58 additions & 53 deletions src/libsyntax/parse/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,22 @@ enum PrevTokenKind {
Other,
}

// Simple circular buffer used for keeping few next tokens.
#[derive(Default)]
struct LookaheadBuffer {
buffer: [TokenAndSpan; LOOKAHEAD_BUFFER_CAPACITY],
start: usize,
end: usize,
}

const LOOKAHEAD_BUFFER_CAPACITY: usize = 8;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason this is 8? I think 6 would suffice.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

x % LOOKAHEAD_BUFFER_CAPACITY is faster.
Not much difference either way, though.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point -- I forgot about peephole optimizations.


impl LookaheadBuffer {
fn len(&self) -> usize {
(LOOKAHEAD_BUFFER_CAPACITY + self.end - self.start) % LOOKAHEAD_BUFFER_CAPACITY
}
}

/* ident is handled by common.rs */

pub struct Parser<'a> {
Expand All @@ -258,9 +274,7 @@ pub struct Parser<'a> {
pub cfg: CrateConfig,
/// the previous token kind
prev_token_kind: PrevTokenKind,
pub buffer: [TokenAndSpan; 4],
pub buffer_start: isize,
pub buffer_end: isize,
lookahead_buffer: LookaheadBuffer,
pub tokens_consumed: usize,
pub restrictions: Restrictions,
pub quote_depth: usize, // not (yet) related to the quasiquoter
Expand Down Expand Up @@ -356,10 +370,6 @@ impl<'a> Parser<'a> {
_ => PathBuf::from(sess.codemap().span_to_filename(span)),
};
directory.pop();
let placeholder = TokenAndSpan {
tok: token::Underscore,
sp: span,
};

Parser {
reader: rdr,
Expand All @@ -369,14 +379,7 @@ impl<'a> Parser<'a> {
span: span,
prev_span: span,
prev_token_kind: PrevTokenKind::Other,
buffer: [
placeholder.clone(),
placeholder.clone(),
placeholder.clone(),
placeholder.clone(),
],
buffer_start: 0,
buffer_end: 0,
lookahead_buffer: Default::default(),
tokens_consumed: 0,
restrictions: Restrictions::empty(),
quote_depth: 0,
Expand Down Expand Up @@ -937,19 +940,13 @@ impl<'a> Parser<'a> {
_ => PrevTokenKind::Other,
};

let next = if self.buffer_start == self.buffer_end {
let next = if self.lookahead_buffer.start == self.lookahead_buffer.end {
self.reader.real_token()
} else {
// Avoid token copies with `replace`.
let buffer_start = self.buffer_start as usize;
let next_index = (buffer_start + 1) & 3;
self.buffer_start = next_index as isize;

let placeholder = TokenAndSpan {
tok: token::Underscore,
sp: self.span,
};
mem::replace(&mut self.buffer[buffer_start], placeholder)
let old_start = self.lookahead_buffer.start;
self.lookahead_buffer.start = (old_start + 1) % LOOKAHEAD_BUFFER_CAPACITY;
mem::replace(&mut self.lookahead_buffer.buffer[old_start], Default::default())
};
self.span = next.sp;
self.token = next.tok;
Expand Down Expand Up @@ -982,21 +979,22 @@ impl<'a> Parser<'a> {
self.expected_tokens.clear();
}

pub fn buffer_length(&mut self) -> isize {
if self.buffer_start <= self.buffer_end {
return self.buffer_end - self.buffer_start;
}
return (4 - self.buffer_start) + self.buffer_end;
}
pub fn look_ahead<R, F>(&mut self, distance: usize, f: F) -> R where
pub fn look_ahead<R, F>(&mut self, dist: usize, f: F) -> R where
F: FnOnce(&token::Token) -> R,
{
let dist = distance as isize;
while self.buffer_length() < dist {
self.buffer[self.buffer_end as usize] = self.reader.real_token();
self.buffer_end = (self.buffer_end + 1) & 3;
if dist == 0 {
f(&self.token)
} else if dist < LOOKAHEAD_BUFFER_CAPACITY {
while self.lookahead_buffer.len() < dist {
self.lookahead_buffer.buffer[self.lookahead_buffer.end] = self.reader.real_token();
self.lookahead_buffer.end =
(self.lookahead_buffer.end + 1) % LOOKAHEAD_BUFFER_CAPACITY;
}
let index = (self.lookahead_buffer.start + dist - 1) % LOOKAHEAD_BUFFER_CAPACITY;
f(&self.lookahead_buffer.buffer[index].tok)
} else {
self.bug("lookahead distance is too large");
}
f(&self.buffer[((self.buffer_start + dist - 1) & 3) as usize].tok)
}
pub fn fatal(&self, m: &str) -> DiagnosticBuilder<'a> {
self.sess.span_diagnostic.struct_span_fatal(self.span, m)
Expand Down Expand Up @@ -1118,7 +1116,6 @@ impl<'a> Parser<'a> {
Ok(ast::TyKind::ImplTrait(bounds))
}


pub fn parse_ty_path(&mut self) -> PResult<'a, TyKind> {
Ok(TyKind::Path(None, self.parse_path(PathStyle::Type)?))
}
Expand Down Expand Up @@ -3623,7 +3620,7 @@ impl<'a> Parser<'a> {
// Parse box pat
let subpat = self.parse_pat()?;
pat = PatKind::Box(subpat);
} else if self.token.is_ident() && self.token.is_path_start() &&
} else if self.token.is_ident() && !self.token.is_any_keyword() &&
self.look_ahead(1, |t| match *t {
token::OpenDelim(token::Paren) | token::OpenDelim(token::Brace) |
token::DotDotDot | token::ModSep | token::Not => false,
Expand Down Expand Up @@ -3874,6 +3871,11 @@ impl<'a> Parser<'a> {
})
}

fn is_union_item(&mut self) -> bool {
self.token.is_keyword(keywords::Union) &&
self.look_ahead(1, |t| t.is_ident() && !t.is_any_keyword())
}

fn parse_stmt_without_recovery(&mut self,
macro_legacy_warnings: bool)
-> PResult<'a, Option<Stmt>> {
Expand All @@ -3888,10 +3890,10 @@ impl<'a> Parser<'a> {
node: StmtKind::Local(self.parse_local(attrs.into())?),
span: mk_sp(lo, self.prev_span.hi),
}
} else if self.token.is_path_start() && self.token != token::Lt && {
!self.check_keyword(keywords::Union) ||
self.look_ahead(1, |t| *t == token::Not || *t == token::ModSep)
} {
// Starts like a simple path, but not a union item.
} else if self.token.is_path_start() &&
!self.token.is_qpath_start() &&
!self.is_union_item() {
let pth = self.parse_path(PathStyle::Expr)?;

if !self.eat(&token::Not) {
Expand Down Expand Up @@ -4602,6 +4604,10 @@ impl<'a> Parser<'a> {
token::Ident(ident) => { this.bump(); codemap::respan(this.prev_span, ident) }
_ => unreachable!()
};
let isolated_self = |this: &mut Self, n| {
this.look_ahead(n, |t| t.is_keyword(keywords::SelfValue)) &&
this.look_ahead(n + 1, |t| t != &token::ModSep)
};

// Parse optional self parameter of a method.
// Only a limited set of initial token sequences is considered self parameters, anything
Expand All @@ -4614,22 +4620,22 @@ impl<'a> Parser<'a> {
// &'lt self
// &'lt mut self
// &not_self
if self.look_ahead(1, |t| t.is_keyword(keywords::SelfValue)) {
if isolated_self(self, 1) {
self.bump();
(SelfKind::Region(None, Mutability::Immutable), expect_ident(self))
} else if self.look_ahead(1, |t| t.is_keyword(keywords::Mut)) &&
self.look_ahead(2, |t| t.is_keyword(keywords::SelfValue)) {
isolated_self(self, 2) {
self.bump();
self.bump();
(SelfKind::Region(None, Mutability::Mutable), expect_ident(self))
} else if self.look_ahead(1, |t| t.is_lifetime()) &&
self.look_ahead(2, |t| t.is_keyword(keywords::SelfValue)) {
isolated_self(self, 2) {
self.bump();
let lt = self.parse_lifetime()?;
(SelfKind::Region(Some(lt), Mutability::Immutable), expect_ident(self))
} else if self.look_ahead(1, |t| t.is_lifetime()) &&
self.look_ahead(2, |t| t.is_keyword(keywords::Mut)) &&
self.look_ahead(3, |t| t.is_keyword(keywords::SelfValue)) {
isolated_self(self, 3) {
self.bump();
let lt = self.parse_lifetime()?;
self.bump();
Expand All @@ -4644,12 +4650,12 @@ impl<'a> Parser<'a> {
// *mut self
// *not_self
// Emit special error for `self` cases.
if self.look_ahead(1, |t| t.is_keyword(keywords::SelfValue)) {
if isolated_self(self, 1) {
self.bump();
self.span_err(self.span, "cannot pass `self` by raw pointer");
(SelfKind::Value(Mutability::Immutable), expect_ident(self))
} else if self.look_ahead(1, |t| t.is_mutability()) &&
self.look_ahead(2, |t| t.is_keyword(keywords::SelfValue)) {
isolated_self(self, 2) {
self.bump();
self.bump();
self.span_err(self.span, "cannot pass `self` by raw pointer");
Expand All @@ -4659,7 +4665,7 @@ impl<'a> Parser<'a> {
}
}
token::Ident(..) => {
if self.token.is_keyword(keywords::SelfValue) {
if isolated_self(self, 0) {
// self
// self: TYPE
let eself_ident = expect_ident(self);
Expand All @@ -4670,7 +4676,7 @@ impl<'a> Parser<'a> {
(SelfKind::Value(Mutability::Immutable), eself_ident)
}
} else if self.token.is_keyword(keywords::Mut) &&
self.look_ahead(1, |t| t.is_keyword(keywords::SelfValue)) {
isolated_self(self, 1) {
// mut self
// mut self: TYPE
self.bump();
Expand Down Expand Up @@ -5961,8 +5967,7 @@ impl<'a> Parser<'a> {
maybe_append(attrs, extra_attrs));
return Ok(Some(item));
}
if self.check_keyword(keywords::Union) &&
self.look_ahead(1, |t| t.is_ident() && !t.is_any_keyword()) {
if self.is_union_item() {
// UNION ITEM
self.bump();
let (ident, item_, extra_attrs) = self.parse_item_union()?;
Expand Down
11 changes: 7 additions & 4 deletions src/libsyntax/parse/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,8 @@ impl Token {
/// Returns `true` if the token can appear at the start of an expression.
pub fn can_begin_expr(&self) -> bool {
match *self {
OpenDelim(_) => true,
OpenDelim(..) => true,
Ident(..) => true,
Underscore => true,
Tilde => true,
Literal(..) => true,
Not => true,
BinOp(Minus) => true,
Expand All @@ -172,6 +170,7 @@ impl Token {
OrOr => true, // in lambda syntax
AndAnd => true, // double borrow
DotDot | DotDotDot => true, // range notation
Lt | BinOp(Shl) => true, // associated path
ModSep => true,
Interpolated(NtExpr(..)) => true,
Interpolated(NtIdent(..)) => true,
Expand Down Expand Up @@ -236,8 +235,12 @@ impl Token {
self.is_keyword(keywords::Const)
}

pub fn is_qpath_start(&self) -> bool {
self == &Lt || self == &BinOp(Shl)
}

pub fn is_path_start(&self) -> bool {
self == &ModSep || self == &Lt || self.is_path() ||
self == &ModSep || self.is_qpath_start() || self.is_path() ||
self.is_path_segment_keyword() || self.is_ident() && !self.is_any_keyword()
}

Expand Down
20 changes: 20 additions & 0 deletions src/test/compile-fail/associated-path-shl.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// Check that associated paths starting with `<<` are successfully parsed.

fn main() {
let _: <<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
let _ = <<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
let <<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
let 0 ... <<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
//~^ ERROR only char and numeric types are allowed in range patterns
<<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// compile-flags: -Z parse-only

fn main() {
let Self = "foo"; //~ error: expected identifier, found keyword `Self`
let Self = "foo"; //~ ERROR unresolved unit struct/variant or constant `Self`
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// compile-flags: -Z parse-only

fn main() {
let super = "foo"; //~ error: expected identifier, found keyword `super`
let super = "foo"; //~ ERROR unresolved unit struct/variant or constant `super`
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// compile-flags: -Z parse-only

fn main() {
let super: isize; //~ ERROR expected identifier, found keyword `super`
let super: isize; //~ ERROR unresolved unit struct/variant or constant `super`
}
23 changes: 23 additions & 0 deletions src/test/compile-fail/self-vs-path-ambiguity.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// Check that `self::foo` is parsed as a general pattern and not a self argument.

struct S;

impl S {
fn f(self::S: S) {}
fn g(&self::S: &S) {}
fn h(&mut self::S: &mut S) {}
fn i(&'a self::S: &S) {} //~ ERROR unexpected lifetime `'a` in pattern
//~^ ERROR expected one of `)` or `mut`, found `'a`
}

fn main() {}
12 changes: 11 additions & 1 deletion src/test/compile-fail/self_type_keyword-2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,14 @@

use self::Self as Foo; //~ ERROR unresolved import `self::Self`

pub fn main() {}
pub fn main() {
let Self = 5;
//~^ ERROR unresolved unit struct/variant or constant `Self`

match 15 {
Self => (),
//~^ ERROR unresolved unit struct/variant or constant `Self`
Foo { x: Self } => (),
//~^ ERROR unresolved unit struct/variant or constant `Self`
}
}
Loading