Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Correct off-by-one errors in lexer spans #2393

Merged
merged 3 commits into from
Aug 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 3 additions & 11 deletions crates/noirc_errors/src/position.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ impl<T: Hash> Hash for Spanned<T> {

impl<T> Spanned<T> {
pub fn from_position(start: Position, end: Position, contents: T) -> Spanned<T> {
Spanned { span: Span(ByteSpan::new(start, end)), contents }
Spanned { span: Span::inclusive(start, end), contents }
}

pub const fn from(t_span: Span, contents: T) -> Spanned<T> {
Expand All @@ -57,16 +57,8 @@ impl<T> std::borrow::Borrow<T> for Spanned<T> {
pub struct Span(ByteSpan);

impl Span {
pub fn new(range: Range<u32>) -> Span {
Span(ByteSpan::from(range))
}

pub fn exclusive(start: u32, end: u32) -> Span {
Span::new(start..end)
}

pub fn inclusive(start: u32, end: u32) -> Span {
Span::exclusive(start, end + 1)
Span(ByteSpan::from(start..end + 1))
}

pub fn single_char(start: u32) -> Span {
Expand Down Expand Up @@ -103,7 +95,7 @@ impl chumsky::Span for Span {
type Offset = u32;

fn new(_context: Self::Context, range: Range<Self::Offset>) -> Self {
Span::new(range)
Span(ByteSpan::from(range))
}

fn context(&self) -> Self::Context {}
Expand Down
2 changes: 1 addition & 1 deletion crates/noirc_errors/src/reporter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ fn convert_diagnostic(
.iter()
.map(|sl| {
let start_span = sl.span.start() as usize;
let end_span = sl.span.end() as usize + 1;
let end_span = sl.span.end() as usize;
Label::secondary(file_id.as_usize(), start_span..end_span).with_message(&sl.message)
})
.collect()
Expand Down
4 changes: 2 additions & 2 deletions crates/noirc_evaluator/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,10 @@ impl RuntimeError {
match self {
RuntimeError::InternalError(_) => {
Diagnostic::simple_error(
"Internal Consistency Evaluators Errors: \n
"Internal Consistency Evaluators Errors: \n
This is likely a bug. Consider Opening an issue at https://github.com/noir-lang/noir/issues".to_owned(),
"".to_string(),
noirc_errors::Span::new(0..0)
noirc_errors::Span::inclusive(0, 0)
)
}
_ => {
Expand Down
77 changes: 53 additions & 24 deletions crates/noirc_frontend/src/lexer/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ impl<'a> Lexer<'a> {
if self.peek_char_is('&') {
// When we issue this error the first '&' will already be consumed
// and the next token issued will be the next '&'.
let span = Span::new(self.position..self.position + 1);
let span = Span::inclusive(self.position, self.position + 1);
Err(LexerErrorKind::LogicalAnd { span })
} else {
self.single_char_token(Token::Ampersand)
Expand Down Expand Up @@ -102,7 +102,7 @@ impl<'a> Lexer<'a> {
Some('}') => self.single_char_token(Token::RightBrace),
Some('[') => self.single_char_token(Token::LeftBracket),
Some(']') => self.single_char_token(Token::RightBracket),
Some('"') => Ok(self.eat_string_literal(false)),
Some('"') => self.eat_string_literal(),
Some('f') => self.eat_format_string_or_alpha_numeric(),
Some('#') => self.eat_attribute(),
Some(ch) if ch.is_ascii_alphanumeric() || ch == '_' => self.eat_alpha_numeric(ch),
Expand Down Expand Up @@ -195,9 +195,7 @@ impl<'a> Lexer<'a> {
&mut self,
initial_char: Option<char>,
predicate: F,
) -> (String, Position, Position) {
let start = self.position;

) -> String {
// This function is only called when we want to continue consuming a character of the same type.
// For example, we see a digit and we want to consume the whole integer
// Therefore, the current character which triggered this function will need to be appended
Expand All @@ -212,15 +210,15 @@ impl<'a> Lexer<'a> {
// If not, return word. The next character will be analyzed on the next iteration of next_token,
// Which will increment the cursor
if !predicate(peek_char) {
return (word, start, self.position);
return word;
}
word.push(peek_char);

// If we arrive at this point, then the char has been added to the word and we should increment the cursor
self.next_char();
}

(word, start, self.position)
word
}

fn eat_alpha_numeric(&mut self, initial_char: char) -> SpannedTokenResult {
Expand All @@ -236,6 +234,8 @@ impl<'a> Lexer<'a> {
}

fn eat_attribute(&mut self) -> SpannedTokenResult {
let start = self.position;

if !self.peek_char_is('[') {
return Err(LexerErrorKind::UnexpectedCharacter {
span: Span::single_char(self.position),
Expand All @@ -245,7 +245,7 @@ impl<'a> Lexer<'a> {
}
self.next_char();

let (word, start, end) = self.eat_while(None, |ch| ch != ']');
let word = self.eat_while(None, |ch| ch != ']');

if !self.peek_char_is(']') {
return Err(LexerErrorKind::UnexpectedCharacter {
Expand All @@ -256,28 +256,32 @@ impl<'a> Lexer<'a> {
}
self.next_char();

let attribute = Attribute::lookup_attribute(&word, Span::exclusive(start, end))?;
let end = self.position;

// Move start position backwards to cover the left bracket
// Move end position forwards to cover the right bracket
Ok(attribute.into_span(start - 1, end + 1))
let attribute = Attribute::lookup_attribute(&word, Span::inclusive(start, end))?;

Ok(attribute.into_span(start, end))
}

//XXX(low): Can increase performance if we use iterator semantic and utilize some of the methods on String. See below
// https://doc.rust-lang.org/stable/std/primitive.str.html#method.rsplit
fn eat_word(&mut self, initial_char: char) -> SpannedTokenResult {
let (word, start, end) = self.eat_while(Some(initial_char), |ch| {
let start = self.position;

let word = self.eat_while(Some(initial_char), |ch| {
ch.is_ascii_alphabetic() || ch.is_numeric() || ch == '_'
});

let end = self.position;

// Check if word either an identifier or a keyword
if let Some(keyword_token) = Keyword::lookup_keyword(&word) {
return Ok(keyword_token.into_span(start, end));
}

// Check if word an int type
// if no error occurred, then it is either a valid integer type or it is not an int type
let parsed_token = IntType::lookup_int_type(&word, Span::exclusive(start, end))?;
let parsed_token = IntType::lookup_int_type(&word, Span::inclusive(start, end))?;

// Check if it is an int type
if let Some(int_type_token) = parsed_token {
Expand All @@ -290,14 +294,18 @@ impl<'a> Lexer<'a> {
}

fn eat_digit(&mut self, initial_char: char) -> SpannedTokenResult {
let (integer_str, start, end) = self.eat_while(Some(initial_char), |ch| {
let start = self.position;

let integer_str = self.eat_while(Some(initial_char), |ch| {
ch.is_ascii_digit() | ch.is_ascii_hexdigit() | (ch == 'x')
});

let end = self.position;

let integer = match FieldElement::try_from_str(&integer_str) {
None => {
return Err(LexerErrorKind::InvalidIntegerLiteral {
span: Span::exclusive(start, end),
span: Span::inclusive(start, end),
found: integer_str,
})
}
Expand All @@ -308,18 +316,38 @@ impl<'a> Lexer<'a> {
Ok(integer_token.into_span(start, end))
}

fn eat_string_literal(&mut self, is_format_string: bool) -> SpannedToken {
let (str_literal, start_span, end_span) = self.eat_while(None, |ch| ch != '"');
let str_literal_token =
if is_format_string { Token::FmtStr(str_literal) } else { Token::Str(str_literal) };
fn eat_string_literal(&mut self) -> SpannedTokenResult {
let start = self.position;

let str_literal = self.eat_while(None, |ch| ch != '"');

let str_literal_token = Token::Str(str_literal);

self.next_char(); // Advance past the closing quote

let end = self.position;
Ok(str_literal_token.into_span(start, end))
}

// This differs from `eat_string_literal` in that we want the leading `f` to be captured in the Span
fn eat_fmt_string(&mut self) -> SpannedTokenResult {
let start = self.position;

self.next_char();

let str_literal = self.eat_while(None, |ch| ch != '"');

let str_literal_token = Token::FmtStr(str_literal);

self.next_char(); // Advance past the closing quote
str_literal_token.into_span(start_span, end_span)

let end = self.position;
Ok(str_literal_token.into_span(start, end))
}

fn eat_format_string_or_alpha_numeric(&mut self) -> SpannedTokenResult {
if self.peek_char_is('"') {
self.next_char();
Ok(self.eat_string_literal(true))
self.eat_fmt_string()
} else {
self.eat_alpha_numeric('f')
}
Expand All @@ -331,7 +359,7 @@ impl<'a> Lexer<'a> {
}

fn parse_block_comment(&mut self) -> SpannedTokenResult {
let span = Span::new(self.position..self.position + 1);
jfecher marked this conversation as resolved.
Show resolved Hide resolved
let start = self.position;
let mut depth = 1usize;

while let Some(ch) = self.next_char() {
Expand All @@ -358,6 +386,7 @@ impl<'a> Lexer<'a> {
if depth == 0 {
self.next_token()
} else {
let span = Span::inclusive(start, self.position);
Err(LexerErrorKind::UnterminatedBlockComment { span })
}
}
Expand Down