Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lexer: improve errors #9308

Merged
merged 3 commits into from
Sep 20, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 91 additions & 26 deletions src/libsyntax/parse/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,46 @@ impl reader for TtReader {
fn dup(@mut self) -> @mut reader { dup_tt_reader(self) as @mut reader }
}

// report a lexical error spanning [`from_pos`, `to_pos`)
fn fatal_span(rdr: @mut StringReader,
from_pos: BytePos,
to_pos: BytePos,
m: ~str)
-> ! {
rdr.peek_span = codemap::mk_sp(from_pos, to_pos);
rdr.fatal(m);
}

// report a lexical error spanning [`from_pos`, `to_pos`), appending an
// escaped character to the error message
fn fatal_span_char(rdr: @mut StringReader,
from_pos: BytePos,
to_pos: BytePos,
m: ~str,
c: char)
-> ! {
let mut m = m;
m.push_str(": ");
char::escape_default(c, |c| m.push_char(c));
fatal_span(rdr, from_pos, to_pos, m);
}

// report a lexical error spanning [`from_pos`, `to_pos`), appending the
// offending string to the error message
fn fatal_span_verbose(rdr: @mut StringReader,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this really necessary to have? The whole idea of spans is to highlight the code in question, so as long as the span is accurate there should be no need to include the actual code in the error text?

from_pos: BytePos,
to_pos: BytePos,
m: ~str)
-> ! {
let mut m = m;
m.push_str(": ");
let s = rdr.src.slice(
byte_offset(rdr, from_pos).to_uint(),
byte_offset(rdr, to_pos).to_uint());
m.push_str(s);
fatal_span(rdr, from_pos, to_pos, m);
}

// EFFECT: advance peek_tok and peek_span to refer to the next token.
// EFFECT: update the interner, maybe.
fn string_advance_token(r: @mut StringReader) {
Expand Down Expand Up @@ -327,7 +367,8 @@ fn consume_block_comment(rdr: @mut StringReader)
bump(rdr);
}
if is_eof(rdr) {
rdr.fatal(~"unterminated block doc-comment");
fatal_span(rdr, start_bpos, rdr.last_pos,
~"unterminated block doc-comment");
} else {
bump(rdr);
bump(rdr);
Expand All @@ -344,8 +385,12 @@ fn consume_block_comment(rdr: @mut StringReader)
}
}
} else {
let start_bpos = rdr.last_pos - BytePos(2u);
loop {
if is_eof(rdr) { rdr.fatal(~"unterminated block comment"); }
if is_eof(rdr) {
fatal_span(rdr, start_bpos, rdr.last_pos,
~"unterminated block comment");
}
if rdr.curr == '*' && nextch(rdr) == '/' {
bump(rdr);
bump(rdr);
Expand All @@ -361,7 +406,7 @@ fn consume_block_comment(rdr: @mut StringReader)
if res.is_some() { res } else { consume_whitespace_and_comments(rdr) }
}

fn scan_exponent(rdr: @mut StringReader) -> Option<~str> {
fn scan_exponent(rdr: @mut StringReader, start_bpos: BytePos) -> Option<~str> {
let mut c = rdr.curr;
let mut rslt = ~"";
if c == 'e' || c == 'E' {
Expand All @@ -375,7 +420,10 @@ fn scan_exponent(rdr: @mut StringReader) -> Option<~str> {
let exponent = scan_digits(rdr, 10u);
if exponent.len() > 0u {
return Some(rslt + exponent);
} else { rdr.fatal(~"scan_exponent: bad fp literal"); }
} else {
fatal_span(rdr, start_bpos, rdr.last_pos,
~"scan_exponent: bad fp literal");
}
} else { return None::<~str>; }
}

Expand All @@ -399,6 +447,7 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
let mut base = 10u;
let mut c = c;
let mut n = nextch(rdr);
let start_bpos = rdr.last_pos;
if c == '0' && n == 'x' {
bump(rdr);
bump(rdr);
Expand Down Expand Up @@ -442,11 +491,13 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
else { either::Right(ast::ty_u64) };
}
if num_str.len() == 0u {
rdr.fatal(~"no valid digits found for number");
fatal_span(rdr, start_bpos, rdr.last_pos,
~"no valid digits found for number");
}
let parsed = match from_str_radix::<u64>(num_str, base as uint) {
Some(p) => p,
None => rdr.fatal(~"int literal is too large")
None => fatal_span(rdr, start_bpos, rdr.last_pos,
~"int literal is too large")
};

match tp {
Expand All @@ -464,12 +515,14 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
}
if is_float {
match base {
16u => rdr.fatal(~"hexadecimal float literal is not supported"),
2u => rdr.fatal(~"binary float literal is not supported"),
16u => fatal_span(rdr, start_bpos, rdr.last_pos,
~"hexadecimal float literal is not supported"),
2u => fatal_span(rdr, start_bpos, rdr.last_pos,
~"binary float literal is not supported"),
_ => ()
}
}
match scan_exponent(rdr) {
match scan_exponent(rdr, start_bpos) {
Some(ref s) => {
is_float = true;
num_str.push_str(*s);
Expand Down Expand Up @@ -507,11 +560,13 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(num_str));
} else {
if num_str.len() == 0u {
rdr.fatal(~"no valid digits found for number");
fatal_span(rdr, start_bpos, rdr.last_pos,
~"no valid digits found for number");
}
let parsed = match from_str_radix::<u64>(num_str, base as uint) {
Some(p) => p,
None => rdr.fatal(~"int literal is too large")
None => fatal_span(rdr, start_bpos, rdr.last_pos,
~"int literal is too large")
};

debug!("lexing %s as an unsuffixed integer literal",
Expand All @@ -523,19 +578,23 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
fn scan_numeric_escape(rdr: @mut StringReader, n_hex_digits: uint) -> char {
let mut accum_int = 0;
let mut i = n_hex_digits;
let start_bpos = rdr.last_pos;
while i != 0u {
let n = rdr.curr;
bump(rdr);
if !is_hex_digit(n) {
rdr.fatal(fmt!("illegal numeric character escape: %d", n as int));
fatal_span_char(rdr, rdr.last_pos, rdr.pos,
~"illegal character in numeric character escape",
n);
}
bump(rdr);
accum_int *= 16;
accum_int += hex_digit_val(n);
i -= 1u;
}
match char::from_u32(accum_int as u32) {
Some(x) => x,
None => rdr.fatal(fmt!("illegal numeric character escape"))
None => fatal_span(rdr, start_bpos, rdr.last_pos,
~"illegal numeric character escape")
}
}

Expand Down Expand Up @@ -691,6 +750,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
if c2 == '\\' {
// '\X' for some X must be a character constant:
let escaped = rdr.curr;
let escaped_pos = rdr.last_pos;
bump(rdr);
match escaped {
'n' => { c2 = '\n'; }
Expand All @@ -704,32 +764,39 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
'u' => { c2 = scan_numeric_escape(rdr, 4u); }
'U' => { c2 = scan_numeric_escape(rdr, 8u); }
c2 => {
rdr.fatal(fmt!("unknown character escape: %d", c2 as int));
fatal_span_char(rdr, escaped_pos, rdr.last_pos,
~"unknown character escape", c2);
}
}
}
if rdr.curr != '\'' {
rdr.fatal(~"unterminated character constant");
fatal_span_verbose(rdr,
// Byte offsetting here is okay because the
// character before position `start` is an
// ascii single quote.
start - BytePos(1u),
rdr.last_pos,
~"unterminated character constant");
}
bump(rdr); // advance curr past token
return token::LIT_CHAR(c2 as u32);
}
'"' => {
let mut accum_str = ~"";
let n = rdr.last_pos;
let start_bpos = rdr.last_pos;
bump(rdr);
while rdr.curr != '"' {
if is_eof(rdr) {
do with_str_from(rdr, n) |s| {
rdr.fatal(fmt!("unterminated double quote string: %s", s));
}
fatal_span(rdr, start_bpos, rdr.last_pos,
~"unterminated double quote string");
}

let ch = rdr.curr;
bump(rdr);
match ch {
'\\' => {
let escaped = rdr.curr;
let escaped_pos = rdr.last_pos;
bump(rdr);
match escaped {
'n' => accum_str.push_char('\n'),
Expand All @@ -750,7 +817,8 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
accum_str.push_char(scan_numeric_escape(rdr, 8u));
}
c2 => {
rdr.fatal(fmt!("unknown string escape: %d", c2 as int));
fatal_span_char(rdr, escaped_pos, rdr.last_pos,
~"unknown string escape", c2);
}
}
}
Expand Down Expand Up @@ -786,11 +854,8 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
'^' => { return binop(rdr, token::CARET); }
'%' => { return binop(rdr, token::PERCENT); }
c => {
// So the error span points to the unrecognized character
rdr.peek_span = codemap::mk_sp(rdr.last_pos, rdr.pos);
let mut cs = ~"";
char::escape_default(c, |c| cs.push_char(c));
rdr.fatal(fmt!("unknown start of token: %s", cs));
fatal_span_char(rdr, rdr.last_pos, rdr.pos,
~"unknown start of token", c);
}
}
}
Expand Down
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-bad-fp-lit.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static f: float =
1e+ //~ ERROR: scan_exponent: bad fp literal
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you ensure that 1e+ is included in the error message as well

;
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-hex-float-lit.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static f: float =
0x539.0 //~ ERROR: hexadecimal float literal is not supported
;
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-illegal-num-char-escape-2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static c: char =
'\Uffffffff' //~ ERROR: illegal numeric character escape
;
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-illegal-num-char-escape.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static c: char =
'\u539_' //~ ERROR: illegal character in numeric character escape
;
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-int-lit-too-large-2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static i: int =
99999999999999999999999999999999u32 //~ ERROR: int literal is too large
;
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-int-lit-too-large.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static i: int =
99999999999999999999999999999999 //~ ERROR: int literal is too large
;
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-no-valid-digits-2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static i: int =
0xu32 //~ ERROR: no valid digits
;
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-no-valid-digits.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static i: int =
0x //~ ERROR: no valid digits
;
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-unknown-char-escape.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

static c: char =
'\●' //~ ERROR: unknown character escape
;
13 changes: 13 additions & 0 deletions src/test/compile-fail/lex-unknown-start-tok.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

fn main() {
● //~ ERROR: unknown start of token
}
Loading