diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs index 0bc9e61927436..d3b0866d4a79c 100644 --- a/src/libsyntax/parse/lexer.rs +++ b/src/libsyntax/parse/lexer.rs @@ -149,6 +149,46 @@ impl reader for TtReader { fn dup(@mut self) -> @mut reader { dup_tt_reader(self) as @mut reader } } +// report a lexical error spanning [`from_pos`, `to_pos`) +fn fatal_span(rdr: @mut StringReader, + from_pos: BytePos, + to_pos: BytePos, + m: ~str) + -> ! { + rdr.peek_span = codemap::mk_sp(from_pos, to_pos); + rdr.fatal(m); +} + +// report a lexical error spanning [`from_pos`, `to_pos`), appending an +// escaped character to the error message +fn fatal_span_char(rdr: @mut StringReader, + from_pos: BytePos, + to_pos: BytePos, + m: ~str, + c: char) + -> ! { + let mut m = m; + m.push_str(": "); + char::escape_default(c, |c| m.push_char(c)); + fatal_span(rdr, from_pos, to_pos, m); +} + +// report a lexical error spanning [`from_pos`, `to_pos`), appending the +// offending string to the error message +fn fatal_span_verbose(rdr: @mut StringReader, + from_pos: BytePos, + to_pos: BytePos, + m: ~str) + -> ! { + let mut m = m; + m.push_str(": "); + let s = rdr.src.slice( + byte_offset(rdr, from_pos).to_uint(), + byte_offset(rdr, to_pos).to_uint()); + m.push_str(s); + fatal_span(rdr, from_pos, to_pos, m); +} + // EFFECT: advance peek_tok and peek_span to refer to the next token. // EFFECT: update the interner, maybe. fn string_advance_token(r: @mut StringReader) { @@ -327,7 +367,8 @@ fn consume_block_comment(rdr: @mut StringReader) bump(rdr); } if is_eof(rdr) { - rdr.fatal(~"unterminated block doc-comment"); + fatal_span(rdr, start_bpos, rdr.last_pos, + ~"unterminated block doc-comment"); } else { bump(rdr); bump(rdr); @@ -344,8 +385,12 @@ fn consume_block_comment(rdr: @mut StringReader) } } } else { + let start_bpos = rdr.last_pos - BytePos(2u); loop { - if is_eof(rdr) { rdr.fatal(~"unterminated block comment"); } + if is_eof(rdr) { + fatal_span(rdr, start_bpos, rdr.last_pos, + ~"unterminated block comment"); + } if rdr.curr == '*' && nextch(rdr) == '/' { bump(rdr); bump(rdr); @@ -361,7 +406,7 @@ fn consume_block_comment(rdr: @mut StringReader) if res.is_some() { res } else { consume_whitespace_and_comments(rdr) } } -fn scan_exponent(rdr: @mut StringReader) -> Option<~str> { +fn scan_exponent(rdr: @mut StringReader, start_bpos: BytePos) -> Option<~str> { let mut c = rdr.curr; let mut rslt = ~""; if c == 'e' || c == 'E' { @@ -375,7 +420,10 @@ fn scan_exponent(rdr: @mut StringReader) -> Option<~str> { let exponent = scan_digits(rdr, 10u); if exponent.len() > 0u { return Some(rslt + exponent); - } else { rdr.fatal(~"scan_exponent: bad fp literal"); } + } else { + fatal_span(rdr, start_bpos, rdr.last_pos, + ~"scan_exponent: bad fp literal"); + } } else { return None::<~str>; } } @@ -399,6 +447,7 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { let mut base = 10u; let mut c = c; let mut n = nextch(rdr); + let start_bpos = rdr.last_pos; if c == '0' && n == 'x' { bump(rdr); bump(rdr); @@ -442,11 +491,13 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { else { either::Right(ast::ty_u64) }; } if num_str.len() == 0u { - rdr.fatal(~"no valid digits found for number"); + fatal_span(rdr, start_bpos, rdr.last_pos, + ~"no valid digits found for number"); } let parsed = match from_str_radix::(num_str, base as uint) { Some(p) => p, - None => rdr.fatal(~"int literal is too large") + None => fatal_span(rdr, start_bpos, rdr.last_pos, + ~"int literal is too large") }; match tp { @@ -464,12 +515,14 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { } if is_float { match base { - 16u => rdr.fatal(~"hexadecimal float literal is not supported"), - 2u => rdr.fatal(~"binary float literal is not supported"), + 16u => fatal_span(rdr, start_bpos, rdr.last_pos, + ~"hexadecimal float literal is not supported"), + 2u => fatal_span(rdr, start_bpos, rdr.last_pos, + ~"binary float literal is not supported"), _ => () } } - match scan_exponent(rdr) { + match scan_exponent(rdr, start_bpos) { Some(ref s) => { is_float = true; num_str.push_str(*s); @@ -507,11 +560,13 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(num_str)); } else { if num_str.len() == 0u { - rdr.fatal(~"no valid digits found for number"); + fatal_span(rdr, start_bpos, rdr.last_pos, + ~"no valid digits found for number"); } let parsed = match from_str_radix::(num_str, base as uint) { Some(p) => p, - None => rdr.fatal(~"int literal is too large") + None => fatal_span(rdr, start_bpos, rdr.last_pos, + ~"int literal is too large") }; debug!("lexing %s as an unsuffixed integer literal", @@ -523,19 +578,23 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { fn scan_numeric_escape(rdr: @mut StringReader, n_hex_digits: uint) -> char { let mut accum_int = 0; let mut i = n_hex_digits; + let start_bpos = rdr.last_pos; while i != 0u { let n = rdr.curr; - bump(rdr); if !is_hex_digit(n) { - rdr.fatal(fmt!("illegal numeric character escape: %d", n as int)); + fatal_span_char(rdr, rdr.last_pos, rdr.pos, + ~"illegal character in numeric character escape", + n); } + bump(rdr); accum_int *= 16; accum_int += hex_digit_val(n); i -= 1u; } match char::from_u32(accum_int as u32) { Some(x) => x, - None => rdr.fatal(fmt!("illegal numeric character escape")) + None => fatal_span(rdr, start_bpos, rdr.last_pos, + ~"illegal numeric character escape") } } @@ -691,6 +750,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { if c2 == '\\' { // '\X' for some X must be a character constant: let escaped = rdr.curr; + let escaped_pos = rdr.last_pos; bump(rdr); match escaped { 'n' => { c2 = '\n'; } @@ -704,25 +764,31 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { 'u' => { c2 = scan_numeric_escape(rdr, 4u); } 'U' => { c2 = scan_numeric_escape(rdr, 8u); } c2 => { - rdr.fatal(fmt!("unknown character escape: %d", c2 as int)); + fatal_span_char(rdr, escaped_pos, rdr.last_pos, + ~"unknown character escape", c2); } } } if rdr.curr != '\'' { - rdr.fatal(~"unterminated character constant"); + fatal_span_verbose(rdr, + // Byte offsetting here is okay because the + // character before position `start` is an + // ascii single quote. + start - BytePos(1u), + rdr.last_pos, + ~"unterminated character constant"); } bump(rdr); // advance curr past token return token::LIT_CHAR(c2 as u32); } '"' => { let mut accum_str = ~""; - let n = rdr.last_pos; + let start_bpos = rdr.last_pos; bump(rdr); while rdr.curr != '"' { if is_eof(rdr) { - do with_str_from(rdr, n) |s| { - rdr.fatal(fmt!("unterminated double quote string: %s", s)); - } + fatal_span(rdr, start_bpos, rdr.last_pos, + ~"unterminated double quote string"); } let ch = rdr.curr; @@ -730,6 +796,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { match ch { '\\' => { let escaped = rdr.curr; + let escaped_pos = rdr.last_pos; bump(rdr); match escaped { 'n' => accum_str.push_char('\n'), @@ -750,7 +817,8 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { accum_str.push_char(scan_numeric_escape(rdr, 8u)); } c2 => { - rdr.fatal(fmt!("unknown string escape: %d", c2 as int)); + fatal_span_char(rdr, escaped_pos, rdr.last_pos, + ~"unknown string escape", c2); } } } @@ -786,11 +854,8 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { '^' => { return binop(rdr, token::CARET); } '%' => { return binop(rdr, token::PERCENT); } c => { - // So the error span points to the unrecognized character - rdr.peek_span = codemap::mk_sp(rdr.last_pos, rdr.pos); - let mut cs = ~""; - char::escape_default(c, |c| cs.push_char(c)); - rdr.fatal(fmt!("unknown start of token: %s", cs)); + fatal_span_char(rdr, rdr.last_pos, rdr.pos, + ~"unknown start of token", c); } } } diff --git a/src/test/compile-fail/lex-bad-fp-lit.rs b/src/test/compile-fail/lex-bad-fp-lit.rs new file mode 100644 index 0000000000000..5a5e9d7d8f238 --- /dev/null +++ b/src/test/compile-fail/lex-bad-fp-lit.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static f: float = + 1e+ //~ ERROR: scan_exponent: bad fp literal +; diff --git a/src/test/compile-fail/lex-hex-float-lit.rs b/src/test/compile-fail/lex-hex-float-lit.rs new file mode 100644 index 0000000000000..457c6126c44a5 --- /dev/null +++ b/src/test/compile-fail/lex-hex-float-lit.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static f: float = + 0x539.0 //~ ERROR: hexadecimal float literal is not supported +; diff --git a/src/test/compile-fail/lex-illegal-num-char-escape-2.rs b/src/test/compile-fail/lex-illegal-num-char-escape-2.rs new file mode 100644 index 0000000000000..fe46cec776dfa --- /dev/null +++ b/src/test/compile-fail/lex-illegal-num-char-escape-2.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static c: char = + '\Uffffffff' //~ ERROR: illegal numeric character escape +; diff --git a/src/test/compile-fail/lex-illegal-num-char-escape.rs b/src/test/compile-fail/lex-illegal-num-char-escape.rs new file mode 100644 index 0000000000000..858afffb443f3 --- /dev/null +++ b/src/test/compile-fail/lex-illegal-num-char-escape.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static c: char = + '\u539_' //~ ERROR: illegal character in numeric character escape +; diff --git a/src/test/compile-fail/lex-int-lit-too-large-2.rs b/src/test/compile-fail/lex-int-lit-too-large-2.rs new file mode 100644 index 0000000000000..39d1cba64b08b --- /dev/null +++ b/src/test/compile-fail/lex-int-lit-too-large-2.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static i: int = + 99999999999999999999999999999999u32 //~ ERROR: int literal is too large +; diff --git a/src/test/compile-fail/lex-int-lit-too-large.rs b/src/test/compile-fail/lex-int-lit-too-large.rs new file mode 100644 index 0000000000000..6343be651fa59 --- /dev/null +++ b/src/test/compile-fail/lex-int-lit-too-large.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static i: int = + 99999999999999999999999999999999 //~ ERROR: int literal is too large +; diff --git a/src/test/compile-fail/lex-no-valid-digits-2.rs b/src/test/compile-fail/lex-no-valid-digits-2.rs new file mode 100644 index 0000000000000..549dbf5bc8c6c --- /dev/null +++ b/src/test/compile-fail/lex-no-valid-digits-2.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static i: int = + 0xu32 //~ ERROR: no valid digits +; diff --git a/src/test/compile-fail/lex-no-valid-digits.rs b/src/test/compile-fail/lex-no-valid-digits.rs new file mode 100644 index 0000000000000..6a5b8e93f010a --- /dev/null +++ b/src/test/compile-fail/lex-no-valid-digits.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static i: int = + 0x //~ ERROR: no valid digits +; diff --git a/src/test/compile-fail/lex-unknown-char-escape.rs b/src/test/compile-fail/lex-unknown-char-escape.rs new file mode 100644 index 0000000000000..f2445c2b60eba --- /dev/null +++ b/src/test/compile-fail/lex-unknown-char-escape.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static c: char = + '\●' //~ ERROR: unknown character escape +; diff --git a/src/test/compile-fail/lex-unknown-start-tok.rs b/src/test/compile-fail/lex-unknown-start-tok.rs new file mode 100644 index 0000000000000..1bb682303451b --- /dev/null +++ b/src/test/compile-fail/lex-unknown-start-tok.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +fn main() { + ● //~ ERROR: unknown start of token +} diff --git a/src/test/compile-fail/lex-unknown-str-escape.rs b/src/test/compile-fail/lex-unknown-str-escape.rs new file mode 100644 index 0000000000000..f7809b02b0be3 --- /dev/null +++ b/src/test/compile-fail/lex-unknown-str-escape.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static s: &'static str = + "\●" //~ ERROR: unknown string escape +; diff --git a/src/test/compile-fail/lex-unterminated-char-const.rs b/src/test/compile-fail/lex-unterminated-char-const.rs new file mode 100644 index 0000000000000..551360ff9e095 --- /dev/null +++ b/src/test/compile-fail/lex-unterminated-char-const.rs @@ -0,0 +1,13 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +static c: char = + '● //~ ERROR: unterminated character constant +;