From 65ff4ca2948301f59b6a6eab14234d005378859a Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Wed, 19 Oct 2016 23:33:41 +0300 Subject: [PATCH 1/2] Refactor parser lookahead buffer and increase its size --- src/libsyntax/parse/lexer/mod.rs | 6 +++ src/libsyntax/parse/parser.rs | 73 +++++++++++++++----------------- 2 files changed, 41 insertions(+), 38 deletions(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index e62d0d925cd4f..5e20f6e419276 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -74,6 +74,12 @@ pub struct TokenAndSpan { pub sp: Span, } +impl Default for TokenAndSpan { + fn default() -> Self { + TokenAndSpan { tok: token::Underscore, sp: syntax_pos::DUMMY_SP } + } +} + pub struct StringReader<'a> { pub span_diagnostic: &'a Handler, /// The absolute offset within the codemap of the next character to read diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index eac78f5e6c68c..2509fd12d03fe 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -245,6 +245,22 @@ enum PrevTokenKind { Other, } +// Simple circular buffer used for keeping few next tokens. +#[derive(Default)] +struct LookaheadBuffer { + buffer: [TokenAndSpan; LOOKAHEAD_BUFFER_CAPACITY], + start: usize, + end: usize, +} + +const LOOKAHEAD_BUFFER_CAPACITY: usize = 8; + +impl LookaheadBuffer { + fn len(&self) -> usize { + (LOOKAHEAD_BUFFER_CAPACITY + self.end - self.start) % LOOKAHEAD_BUFFER_CAPACITY + } +} + /* ident is handled by common.rs */ pub struct Parser<'a> { @@ -258,9 +274,7 @@ pub struct Parser<'a> { pub cfg: CrateConfig, /// the previous token kind prev_token_kind: PrevTokenKind, - pub buffer: [TokenAndSpan; 4], - pub buffer_start: isize, - pub buffer_end: isize, + lookahead_buffer: LookaheadBuffer, pub tokens_consumed: usize, pub restrictions: Restrictions, pub quote_depth: usize, // not (yet) related to the quasiquoter @@ -356,10 +370,6 @@ impl<'a> Parser<'a> { _ => PathBuf::from(sess.codemap().span_to_filename(span)), }; directory.pop(); - let placeholder = TokenAndSpan { - tok: token::Underscore, - sp: span, - }; Parser { reader: rdr, @@ -369,14 +379,7 @@ impl<'a> Parser<'a> { span: span, prev_span: span, prev_token_kind: PrevTokenKind::Other, - buffer: [ - placeholder.clone(), - placeholder.clone(), - placeholder.clone(), - placeholder.clone(), - ], - buffer_start: 0, - buffer_end: 0, + lookahead_buffer: Default::default(), tokens_consumed: 0, restrictions: Restrictions::empty(), quote_depth: 0, @@ -937,19 +940,13 @@ impl<'a> Parser<'a> { _ => PrevTokenKind::Other, }; - let next = if self.buffer_start == self.buffer_end { + let next = if self.lookahead_buffer.start == self.lookahead_buffer.end { self.reader.real_token() } else { // Avoid token copies with `replace`. - let buffer_start = self.buffer_start as usize; - let next_index = (buffer_start + 1) & 3; - self.buffer_start = next_index as isize; - - let placeholder = TokenAndSpan { - tok: token::Underscore, - sp: self.span, - }; - mem::replace(&mut self.buffer[buffer_start], placeholder) + let old_start = self.lookahead_buffer.start; + self.lookahead_buffer.start = (old_start + 1) % LOOKAHEAD_BUFFER_CAPACITY; + mem::replace(&mut self.lookahead_buffer.buffer[old_start], Default::default()) }; self.span = next.sp; self.token = next.tok; @@ -982,21 +979,22 @@ impl<'a> Parser<'a> { self.expected_tokens.clear(); } - pub fn buffer_length(&mut self) -> isize { - if self.buffer_start <= self.buffer_end { - return self.buffer_end - self.buffer_start; - } - return (4 - self.buffer_start) + self.buffer_end; - } - pub fn look_ahead(&mut self, distance: usize, f: F) -> R where + pub fn look_ahead(&mut self, dist: usize, f: F) -> R where F: FnOnce(&token::Token) -> R, { - let dist = distance as isize; - while self.buffer_length() < dist { - self.buffer[self.buffer_end as usize] = self.reader.real_token(); - self.buffer_end = (self.buffer_end + 1) & 3; + if dist == 0 { + f(&self.token) + } else if dist < LOOKAHEAD_BUFFER_CAPACITY { + while self.lookahead_buffer.len() < dist { + self.lookahead_buffer.buffer[self.lookahead_buffer.end] = self.reader.real_token(); + self.lookahead_buffer.end = + (self.lookahead_buffer.end + 1) % LOOKAHEAD_BUFFER_CAPACITY; + } + let index = (self.lookahead_buffer.start + dist - 1) % LOOKAHEAD_BUFFER_CAPACITY; + f(&self.lookahead_buffer.buffer[index].tok) + } else { + self.bug("lookahead distance is too large"); } - f(&self.buffer[((self.buffer_start + dist - 1) & 3) as usize].tok) } pub fn fatal(&self, m: &str) -> DiagnosticBuilder<'a> { self.sess.span_diagnostic.struct_span_fatal(self.span, m) @@ -1118,7 +1116,6 @@ impl<'a> Parser<'a> { Ok(ast::TyKind::ImplTrait(bounds)) } - pub fn parse_ty_path(&mut self) -> PResult<'a, TyKind> { Ok(TyKind::Path(None, self.parse_path(PathStyle::Type)?)) } From fea630ef9d738aabaf6cbf3ccedb1bc1adae1e6d Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Wed, 19 Oct 2016 23:33:41 +0300 Subject: [PATCH 2/2] Tweak path parsing logic --- src/libsyntax/parse/parser.rs | 38 +++++++++++-------- src/libsyntax/parse/token.rs | 11 ++++-- src/test/compile-fail/associated-path-shl.rs | 20 ++++++++++ .../keyword-self-as-identifier.rs | 4 +- .../keyword-super-as-identifier.rs | 4 +- .../keyword-super.rs | 4 +- .../compile-fail/self-vs-path-ambiguity.rs | 23 +++++++++++ src/test/compile-fail/self_type_keyword-2.rs | 12 +++++- src/test/compile-fail/self_type_keyword.rs | 7 ---- src/test/run-pass/union/union-backcomp.rs | 6 +++ 10 files changed, 93 insertions(+), 36 deletions(-) create mode 100644 src/test/compile-fail/associated-path-shl.rs rename src/test/{parse-fail => compile-fail}/keyword-self-as-identifier.rs (81%) rename src/test/{parse-fail => compile-fail}/keyword-super-as-identifier.rs (81%) rename src/test/{parse-fail => compile-fail}/keyword-super.rs (81%) create mode 100644 src/test/compile-fail/self-vs-path-ambiguity.rs diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 2509fd12d03fe..463ec334cc567 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -3620,7 +3620,7 @@ impl<'a> Parser<'a> { // Parse box pat let subpat = self.parse_pat()?; pat = PatKind::Box(subpat); - } else if self.token.is_ident() && self.token.is_path_start() && + } else if self.token.is_ident() && !self.token.is_any_keyword() && self.look_ahead(1, |t| match *t { token::OpenDelim(token::Paren) | token::OpenDelim(token::Brace) | token::DotDotDot | token::ModSep | token::Not => false, @@ -3871,6 +3871,11 @@ impl<'a> Parser<'a> { }) } + fn is_union_item(&mut self) -> bool { + self.token.is_keyword(keywords::Union) && + self.look_ahead(1, |t| t.is_ident() && !t.is_any_keyword()) + } + fn parse_stmt_without_recovery(&mut self, macro_legacy_warnings: bool) -> PResult<'a, Option> { @@ -3885,10 +3890,10 @@ impl<'a> Parser<'a> { node: StmtKind::Local(self.parse_local(attrs.into())?), span: mk_sp(lo, self.prev_span.hi), } - } else if self.token.is_path_start() && self.token != token::Lt && { - !self.check_keyword(keywords::Union) || - self.look_ahead(1, |t| *t == token::Not || *t == token::ModSep) - } { + // Starts like a simple path, but not a union item. + } else if self.token.is_path_start() && + !self.token.is_qpath_start() && + !self.is_union_item() { let pth = self.parse_path(PathStyle::Expr)?; if !self.eat(&token::Not) { @@ -4599,6 +4604,10 @@ impl<'a> Parser<'a> { token::Ident(ident) => { this.bump(); codemap::respan(this.prev_span, ident) } _ => unreachable!() }; + let isolated_self = |this: &mut Self, n| { + this.look_ahead(n, |t| t.is_keyword(keywords::SelfValue)) && + this.look_ahead(n + 1, |t| t != &token::ModSep) + }; // Parse optional self parameter of a method. // Only a limited set of initial token sequences is considered self parameters, anything @@ -4611,22 +4620,22 @@ impl<'a> Parser<'a> { // &'lt self // &'lt mut self // ¬_self - if self.look_ahead(1, |t| t.is_keyword(keywords::SelfValue)) { + if isolated_self(self, 1) { self.bump(); (SelfKind::Region(None, Mutability::Immutable), expect_ident(self)) } else if self.look_ahead(1, |t| t.is_keyword(keywords::Mut)) && - self.look_ahead(2, |t| t.is_keyword(keywords::SelfValue)) { + isolated_self(self, 2) { self.bump(); self.bump(); (SelfKind::Region(None, Mutability::Mutable), expect_ident(self)) } else if self.look_ahead(1, |t| t.is_lifetime()) && - self.look_ahead(2, |t| t.is_keyword(keywords::SelfValue)) { + isolated_self(self, 2) { self.bump(); let lt = self.parse_lifetime()?; (SelfKind::Region(Some(lt), Mutability::Immutable), expect_ident(self)) } else if self.look_ahead(1, |t| t.is_lifetime()) && self.look_ahead(2, |t| t.is_keyword(keywords::Mut)) && - self.look_ahead(3, |t| t.is_keyword(keywords::SelfValue)) { + isolated_self(self, 3) { self.bump(); let lt = self.parse_lifetime()?; self.bump(); @@ -4641,12 +4650,12 @@ impl<'a> Parser<'a> { // *mut self // *not_self // Emit special error for `self` cases. - if self.look_ahead(1, |t| t.is_keyword(keywords::SelfValue)) { + if isolated_self(self, 1) { self.bump(); self.span_err(self.span, "cannot pass `self` by raw pointer"); (SelfKind::Value(Mutability::Immutable), expect_ident(self)) } else if self.look_ahead(1, |t| t.is_mutability()) && - self.look_ahead(2, |t| t.is_keyword(keywords::SelfValue)) { + isolated_self(self, 2) { self.bump(); self.bump(); self.span_err(self.span, "cannot pass `self` by raw pointer"); @@ -4656,7 +4665,7 @@ impl<'a> Parser<'a> { } } token::Ident(..) => { - if self.token.is_keyword(keywords::SelfValue) { + if isolated_self(self, 0) { // self // self: TYPE let eself_ident = expect_ident(self); @@ -4667,7 +4676,7 @@ impl<'a> Parser<'a> { (SelfKind::Value(Mutability::Immutable), eself_ident) } } else if self.token.is_keyword(keywords::Mut) && - self.look_ahead(1, |t| t.is_keyword(keywords::SelfValue)) { + isolated_self(self, 1) { // mut self // mut self: TYPE self.bump(); @@ -5958,8 +5967,7 @@ impl<'a> Parser<'a> { maybe_append(attrs, extra_attrs)); return Ok(Some(item)); } - if self.check_keyword(keywords::Union) && - self.look_ahead(1, |t| t.is_ident() && !t.is_any_keyword()) { + if self.is_union_item() { // UNION ITEM self.bump(); let (ident, item_, extra_attrs) = self.parse_item_union()?; diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 26b5b99c8cce6..4d0da660302ae 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -159,10 +159,8 @@ impl Token { /// Returns `true` if the token can appear at the start of an expression. pub fn can_begin_expr(&self) -> bool { match *self { - OpenDelim(_) => true, + OpenDelim(..) => true, Ident(..) => true, - Underscore => true, - Tilde => true, Literal(..) => true, Not => true, BinOp(Minus) => true, @@ -172,6 +170,7 @@ impl Token { OrOr => true, // in lambda syntax AndAnd => true, // double borrow DotDot | DotDotDot => true, // range notation + Lt | BinOp(Shl) => true, // associated path ModSep => true, Interpolated(NtExpr(..)) => true, Interpolated(NtIdent(..)) => true, @@ -236,8 +235,12 @@ impl Token { self.is_keyword(keywords::Const) } + pub fn is_qpath_start(&self) -> bool { + self == &Lt || self == &BinOp(Shl) + } + pub fn is_path_start(&self) -> bool { - self == &ModSep || self == &Lt || self.is_path() || + self == &ModSep || self.is_qpath_start() || self.is_path() || self.is_path_segment_keyword() || self.is_ident() && !self.is_any_keyword() } diff --git a/src/test/compile-fail/associated-path-shl.rs b/src/test/compile-fail/associated-path-shl.rs new file mode 100644 index 0000000000000..6bc110239cdbf --- /dev/null +++ b/src/test/compile-fail/associated-path-shl.rs @@ -0,0 +1,20 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Check that associated paths starting with `<<` are successfully parsed. + +fn main() { + let _: <::B>::C; //~ ERROR type name `A` is undefined or not in scope + let _ = <::B>::C; //~ ERROR type name `A` is undefined or not in scope + let <::B>::C; //~ ERROR type name `A` is undefined or not in scope + let 0 ... <::B>::C; //~ ERROR type name `A` is undefined or not in scope + //~^ ERROR only char and numeric types are allowed in range patterns + <::B>::C; //~ ERROR type name `A` is undefined or not in scope +} diff --git a/src/test/parse-fail/keyword-self-as-identifier.rs b/src/test/compile-fail/keyword-self-as-identifier.rs similarity index 81% rename from src/test/parse-fail/keyword-self-as-identifier.rs rename to src/test/compile-fail/keyword-self-as-identifier.rs index f8b93a1796bfe..650874711a669 100644 --- a/src/test/parse-fail/keyword-self-as-identifier.rs +++ b/src/test/compile-fail/keyword-self-as-identifier.rs @@ -8,8 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// compile-flags: -Z parse-only - fn main() { - let Self = "foo"; //~ error: expected identifier, found keyword `Self` + let Self = "foo"; //~ ERROR unresolved unit struct/variant or constant `Self` } diff --git a/src/test/parse-fail/keyword-super-as-identifier.rs b/src/test/compile-fail/keyword-super-as-identifier.rs similarity index 81% rename from src/test/parse-fail/keyword-super-as-identifier.rs rename to src/test/compile-fail/keyword-super-as-identifier.rs index a48683a4f54dc..531705563e2e0 100644 --- a/src/test/parse-fail/keyword-super-as-identifier.rs +++ b/src/test/compile-fail/keyword-super-as-identifier.rs @@ -8,8 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// compile-flags: -Z parse-only - fn main() { - let super = "foo"; //~ error: expected identifier, found keyword `super` + let super = "foo"; //~ ERROR unresolved unit struct/variant or constant `super` } diff --git a/src/test/parse-fail/keyword-super.rs b/src/test/compile-fail/keyword-super.rs similarity index 81% rename from src/test/parse-fail/keyword-super.rs rename to src/test/compile-fail/keyword-super.rs index 671be8c44b9c6..9ac9e800c843b 100644 --- a/src/test/parse-fail/keyword-super.rs +++ b/src/test/compile-fail/keyword-super.rs @@ -8,8 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// compile-flags: -Z parse-only - fn main() { - let super: isize; //~ ERROR expected identifier, found keyword `super` + let super: isize; //~ ERROR unresolved unit struct/variant or constant `super` } diff --git a/src/test/compile-fail/self-vs-path-ambiguity.rs b/src/test/compile-fail/self-vs-path-ambiguity.rs new file mode 100644 index 0000000000000..9753014e7810a --- /dev/null +++ b/src/test/compile-fail/self-vs-path-ambiguity.rs @@ -0,0 +1,23 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Check that `self::foo` is parsed as a general pattern and not a self argument. + +struct S; + +impl S { + fn f(self::S: S) {} + fn g(&self::S: &S) {} + fn h(&mut self::S: &mut S) {} + fn i(&'a self::S: &S) {} //~ ERROR unexpected lifetime `'a` in pattern + //~^ ERROR expected one of `)` or `mut`, found `'a` +} + +fn main() {} diff --git a/src/test/compile-fail/self_type_keyword-2.rs b/src/test/compile-fail/self_type_keyword-2.rs index 613f54eb33134..118d3d8a0bec2 100644 --- a/src/test/compile-fail/self_type_keyword-2.rs +++ b/src/test/compile-fail/self_type_keyword-2.rs @@ -10,4 +10,14 @@ use self::Self as Foo; //~ ERROR unresolved import `self::Self` -pub fn main() {} +pub fn main() { + let Self = 5; + //~^ ERROR unresolved unit struct/variant or constant `Self` + + match 15 { + Self => (), + //~^ ERROR unresolved unit struct/variant or constant `Self` + Foo { x: Self } => (), + //~^ ERROR unresolved unit struct/variant or constant `Self` + } +} diff --git a/src/test/compile-fail/self_type_keyword.rs b/src/test/compile-fail/self_type_keyword.rs index 0f2a3f12107ef..db6bcc611b823 100644 --- a/src/test/compile-fail/self_type_keyword.rs +++ b/src/test/compile-fail/self_type_keyword.rs @@ -17,12 +17,7 @@ struct Bar<'Self>; //~^ ERROR lifetimes cannot use keyword names pub fn main() { - let Self = 5; - //~^ ERROR expected identifier, found keyword `Self` - match 15 { - Self => (), - //~^ ERROR expected identifier, found keyword `Self` ref Self => (), //~^ ERROR expected identifier, found keyword `Self` mut Self => (), @@ -31,8 +26,6 @@ pub fn main() { //~^ ERROR expected identifier, found keyword `Self` Self!() => (), //~^ ERROR macro undefined: 'Self!' - Foo { x: Self } => (), - //~^ ERROR expected identifier, found keyword `Self` Foo { Self } => (), //~^ ERROR expected identifier, found keyword `Self` } diff --git a/src/test/run-pass/union/union-backcomp.rs b/src/test/run-pass/union/union-backcomp.rs index 9394b618ddf25..0f8c996bebda8 100644 --- a/src/test/run-pass/union/union-backcomp.rs +++ b/src/test/run-pass/union/union-backcomp.rs @@ -10,6 +10,12 @@ #![feature(untagged_unions)] +macro_rules! union { + () => (struct S;) +} + +union!(); + fn union() {} fn main() {