diff --git a/Cargo.lock b/Cargo.lock index e62f966b8fd..dec9cf7ad4f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -268,7 +268,7 @@ dependencies = [ "ark-std", "derivative", "hashbrown 0.13.2", - "itertools", + "itertools 0.10.5", "num-traits", "zeroize", ] @@ -285,7 +285,7 @@ dependencies = [ "ark-std", "derivative", "digest", - "itertools", + "itertools 0.10.5", "num-bigint", "num-traits", "paste", @@ -374,6 +374,15 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" +[[package]] +name = "ascii-canvas" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8824ecca2e851cec16968d54a01dd372ef8f95b244fb84b84e70128be347c3c6" +dependencies = [ + "term", +] + [[package]] name = "assert_cmd" version = "2.0.12" @@ -1178,7 +1187,7 @@ dependencies = [ "clap", "criterion-plot", "is-terminal", - "itertools", + "itertools 0.10.5", "num-traits", "once_cell", "oorandom", @@ -1199,7 +1208,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", - "itertools", + "itertools 0.10.5", ] [[package]] @@ -1255,6 +1264,12 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "crypto-bigint" version = "0.4.9" @@ -1526,6 +1541,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "ena" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c533630cf40e9caa44bd91aadc88a75d75a4c3a12b4cfde353cbed41daa1e1f1" +dependencies = [ + "log", +] + [[package]] name = "encode_unicode" version = "0.3.6" @@ -2364,6 +2388,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.9" @@ -2534,6 +2567,37 @@ dependencies = [ "libc", ] +[[package]] +name = "lalrpop" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cb077ad656299f160924eb2912aa147d7339ea7d69e1b5517326fdcec3c1ca" +dependencies = [ + "ascii-canvas", + "bit-set", + "ena", + "itertools 0.11.0", + "lalrpop-util", + "petgraph", + "pico-args", + "regex", + "regex-syntax 0.8.2", + "string_cache", + "term", + "tiny-keccak", + "unicode-xid", + "walkdir", +] + +[[package]] +name = "lalrpop-util" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553" +dependencies = [ + "regex-automata 0.4.5", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -2856,6 +2920,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "nibble_vec" version = "0.1.0" @@ -3087,6 +3157,8 @@ dependencies = [ "chumsky", "fm", "iter-extended", + "lalrpop", + "lalrpop-util", "noirc_errors", "noirc_printable_type", "petgraph", @@ -3386,6 +3458,12 @@ dependencies = [ "siphasher", ] +[[package]] +name = "pico-args" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" + [[package]] name = "pin-project-lite" version = "0.2.13" @@ -3470,6 +3548,12 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "predicates" version = "2.1.5" @@ -3478,7 +3562,7 @@ checksum = "59230a63c37f3e18569bdb90e4a89cbf5bf8b06fea0b84e65ea10cc4df47addd" dependencies = [ "difflib", "float-cmp", - "itertools", + "itertools 0.10.5", "normalize-line-endings", "predicates-core", "regex", @@ -3492,7 +3576,7 @@ checksum = "09963355b9f467184c04017ced4a2ba2d75cbcb4e7462690d388233253d4b1a9" dependencies = [ "anstyle", "difflib", - "itertools", + "itertools 0.10.5", "predicates-core", ] @@ -4603,6 +4687,19 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb" +[[package]] +name = "string_cache" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" +dependencies = [ + "new_debug_unreachable", + "once_cell", + "parking_lot 0.12.1", + "phf_shared", + "precomputed-hash", +] + [[package]] name = "strsim" version = "0.10.0" @@ -4845,6 +4942,15 @@ dependencies = [ "time-core", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -5266,9 +5372,9 @@ dependencies = [ [[package]] name = "walkdir" -version = "2.3.3" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", diff --git a/compiler/noirc_frontend/Cargo.toml b/compiler/noirc_frontend/Cargo.toml index 03b92e15032..e39ab2fe106 100644 --- a/compiler/noirc_frontend/Cargo.toml +++ b/compiler/noirc_frontend/Cargo.toml @@ -24,9 +24,16 @@ small-ord-set = "0.1.3" regex = "1.9.1" tracing.workspace = true petgraph = "0.6" +lalrpop-util = { version = "0.20.2", features = ["lexer"] } [dev-dependencies] base64.workspace = true strum = "0.24" strum_macros = "0.24" tempfile.workspace = true + +[build-dependencies] +lalrpop = "0.20.2" + +[features] +experimental_parser = [] diff --git a/compiler/noirc_frontend/build.rs b/compiler/noirc_frontend/build.rs new file mode 100644 index 00000000000..eb896a377ae --- /dev/null +++ b/compiler/noirc_frontend/build.rs @@ -0,0 +1,28 @@ +use std::fs::{read_to_string, File}; +use std::io::Write; + +fn main() { + lalrpop::Configuration::new() + .emit_rerun_directives(true) + .use_cargo_dir_conventions() + .process() + .unwrap(); + + // here, we get a lint error from "extern crate core" so patching that until lalrpop does + // (adding cfg directives appears to be unsupported by lalrpop) + let out_dir = std::env::var("OUT_DIR").unwrap(); + let parser_path = std::path::Path::new(&out_dir).join("noir_parser.rs"); + let content_str = read_to_string(parser_path.clone()).unwrap(); + let mut parser_file = File::create(parser_path).unwrap(); + for line in content_str.lines() { + if line.contains("extern crate core") { + parser_file + .write_all( + format!("{}\n", line.replace("extern crate core", "use core")).as_bytes(), + ) + .unwrap(); + } else { + parser_file.write_all(format!("{}\n", line).as_bytes()).unwrap(); + } + } +} diff --git a/compiler/noirc_frontend/src/lexer/lexer.rs b/compiler/noirc_frontend/src/lexer/lexer.rs index 265b9e4b5a3..2d1ebf530e3 100644 --- a/compiler/noirc_frontend/src/lexer/lexer.rs +++ b/compiler/noirc_frontend/src/lexer/lexer.rs @@ -2,7 +2,9 @@ use crate::token::{Attribute, DocStyle}; use super::{ errors::LexerErrorKind, - token::{IntType, Keyword, SpannedToken, Token, Tokens}, + token::{ + token_to_borrowed_token, BorrowedToken, IntType, Keyword, SpannedToken, Token, Tokens, + }, }; use acvm::FieldElement; use noirc_errors::{Position, Span}; @@ -21,6 +23,21 @@ pub struct Lexer<'a> { pub type SpannedTokenResult = Result; +pub(crate) fn from_spanned_token_result( + token_result: &SpannedTokenResult, +) -> Result<(usize, BorrowedToken<'_>, usize), LexerErrorKind> { + token_result + .as_ref() + .map(|spanned_token| { + ( + spanned_token.to_span().start() as usize, + token_to_borrowed_token(spanned_token.into()), + spanned_token.to_span().end() as usize, + ) + }) + .map_err(Clone::clone) +} + impl<'a> Lexer<'a> { /// Given a source file of noir code, return all the tokens in the file /// in order, along with any lexing errors that occurred. @@ -94,7 +111,7 @@ impl<'a> Lexer<'a> { fn next_token(&mut self) -> SpannedTokenResult { match self.next_char() { - Some(x) if x.is_whitespace() => { + Some(x) if Self::is_code_whitespace(x) => { let spanned = self.eat_whitespace(x); if self.skip_whitespaces { self.next_token() @@ -560,16 +577,21 @@ impl<'a> Lexer<'a> { } } + fn is_code_whitespace(c: char) -> bool { + c == '\t' || c == '\n' || c == '\r' || c == ' ' + } + /// Skips white space. They are not significant in the source language fn eat_whitespace(&mut self, initial_char: char) -> SpannedToken { let start = self.position; - let whitespace = self.eat_while(initial_char.into(), |ch| ch.is_whitespace()); + let whitespace = self.eat_while(initial_char.into(), Self::is_code_whitespace); SpannedToken::new(Token::Whitespace(whitespace), Span::inclusive(start, self.position)) } } impl<'a> Iterator for Lexer<'a> { type Item = SpannedTokenResult; + fn next(&mut self) -> Option { if self.done { None @@ -578,10 +600,12 @@ impl<'a> Iterator for Lexer<'a> { } } } + #[cfg(test)] mod tests { use super::*; use crate::token::{FunctionAttribute, SecondaryAttribute, TestScope}; + #[test] fn test_single_double_char() { let input = "! != + ( ) { } [ ] | , ; : :: < <= > >= & - -> . .. % / * = == << >>"; diff --git a/compiler/noirc_frontend/src/lexer/token.rs b/compiler/noirc_frontend/src/lexer/token.rs index 357b1ead593..86f26fd1c97 100644 --- a/compiler/noirc_frontend/src/lexer/token.rs +++ b/compiler/noirc_frontend/src/lexer/token.rs @@ -9,12 +9,105 @@ use crate::lexer::errors::LexerErrorKind; /// smallest unit of grammar. A parser may (will) decide to parse /// items differently depending on the Tokens present but will /// never parse the same ordering of identical tokens differently. +#[derive(PartialEq, Eq, Hash, Debug, Clone, PartialOrd, Ord)] +pub enum BorrowedToken<'input> { + Ident(&'input str), + Int(FieldElement), + Bool(bool), + Str(&'input str), + /// the u8 is the number of hashes, i.e. r###.. + RawStr(&'input str, u8), + FmtStr(&'input str), + Keyword(Keyword), + IntType(IntType), + Attribute(Attribute), + LineComment(&'input str, Option), + BlockComment(&'input str, Option), + /// < + Less, + /// <= + LessEqual, + /// > + Greater, + /// >= + GreaterEqual, + /// == + Equal, + /// != + NotEqual, + /// + + Plus, + /// - + Minus, + /// * + Star, + /// / + Slash, + /// % + Percent, + /// & + Ampersand, + /// ^ + Caret, + /// << + ShiftLeft, + /// >> + ShiftRight, + /// . + Dot, + /// .. + DoubleDot, + /// ( + LeftParen, + /// ) + RightParen, + /// { + LeftBrace, + /// } + RightBrace, + /// [ + LeftBracket, + /// ] + RightBracket, + /// -> + Arrow, + /// | + Pipe, + /// # + Pound, + /// , + Comma, + /// : + Colon, + /// :: + DoubleColon, + /// ; + Semicolon, + /// ! + Bang, + /// = + Assign, + #[allow(clippy::upper_case_acronyms)] + EOF, + + Whitespace(&'input str), + + /// An invalid character is one that is not in noir's language or grammar. + /// + /// We don't report invalid tokens in the source as errors until parsing to + /// avoid reporting the error twice (once while lexing, again when it is encountered + /// during parsing). Reporting during lexing then removing these from the token stream + /// would not be equivalent as it would change the resulting parse. + Invalid(char), +} + #[derive(PartialEq, Eq, Hash, Debug, Clone, PartialOrd, Ord)] pub enum Token { Ident(String), Int(FieldElement), Bool(bool), Str(String), + /// the u8 is the number of hashes, i.e. r###.. RawStr(String, u8), FmtStr(String), Keyword(Keyword), @@ -100,6 +193,57 @@ pub enum Token { Invalid(char), } +pub fn token_to_borrowed_token(token: &Token) -> BorrowedToken<'_> { + match token { + Token::Ident(ref s) => BorrowedToken::Ident(s), + Token::Int(n) => BorrowedToken::Int(*n), + Token::Bool(b) => BorrowedToken::Bool(*b), + Token::Str(ref b) => BorrowedToken::Str(b), + Token::FmtStr(ref b) => BorrowedToken::FmtStr(b), + Token::RawStr(ref b, hashes) => BorrowedToken::RawStr(b, *hashes), + Token::Keyword(k) => BorrowedToken::Keyword(*k), + Token::Attribute(ref a) => BorrowedToken::Attribute(a.clone()), + Token::LineComment(ref s, _style) => BorrowedToken::LineComment(s, *_style), + Token::BlockComment(ref s, _style) => BorrowedToken::BlockComment(s, *_style), + Token::IntType(ref i) => BorrowedToken::IntType(i.clone()), + Token::Less => BorrowedToken::Less, + Token::LessEqual => BorrowedToken::LessEqual, + Token::Greater => BorrowedToken::Greater, + Token::GreaterEqual => BorrowedToken::GreaterEqual, + Token::Equal => BorrowedToken::Equal, + Token::NotEqual => BorrowedToken::NotEqual, + Token::Plus => BorrowedToken::Plus, + Token::Minus => BorrowedToken::Minus, + Token::Star => BorrowedToken::Star, + Token::Slash => BorrowedToken::Slash, + Token::Percent => BorrowedToken::Percent, + Token::Ampersand => BorrowedToken::Ampersand, + Token::Caret => BorrowedToken::Caret, + Token::ShiftLeft => BorrowedToken::ShiftLeft, + Token::ShiftRight => BorrowedToken::ShiftRight, + Token::Dot => BorrowedToken::Dot, + Token::DoubleDot => BorrowedToken::DoubleDot, + Token::LeftParen => BorrowedToken::LeftParen, + Token::RightParen => BorrowedToken::RightParen, + Token::LeftBrace => BorrowedToken::LeftBrace, + Token::RightBrace => BorrowedToken::RightBrace, + Token::LeftBracket => BorrowedToken::LeftBracket, + Token::RightBracket => BorrowedToken::RightBracket, + Token::Arrow => BorrowedToken::Arrow, + Token::Pipe => BorrowedToken::Pipe, + Token::Pound => BorrowedToken::Pound, + Token::Comma => BorrowedToken::Comma, + Token::Colon => BorrowedToken::Colon, + Token::DoubleColon => BorrowedToken::DoubleColon, + Token::Semicolon => BorrowedToken::Semicolon, + Token::Assign => BorrowedToken::Assign, + Token::Bang => BorrowedToken::Bang, + Token::EOF => BorrowedToken::EOF, + Token::Invalid(c) => BorrowedToken::Invalid(*c), + Token::Whitespace(ref s) => BorrowedToken::Whitespace(s), + } +} + #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)] pub enum DocStyle { Outer, @@ -126,6 +270,12 @@ impl From for Token { } } +impl<'a> From<&'a SpannedToken> for &'a Token { + fn from(spt: &'a SpannedToken) -> Self { + &spt.0.contents + } +} + impl SpannedToken { pub fn new(token: Token, span: Span) -> SpannedToken { SpannedToken(Spanned::from(span, token)) diff --git a/compiler/noirc_frontend/src/noir_parser.lalrpop b/compiler/noirc_frontend/src/noir_parser.lalrpop new file mode 100644 index 00000000000..c8d293fb72f --- /dev/null +++ b/compiler/noirc_frontend/src/noir_parser.lalrpop @@ -0,0 +1,164 @@ +use noirc_errors::Span; + +use crate::lexer::token::BorrowedToken; +use crate::lexer::token as noir_token; +use crate::lexer::errors::LexerErrorKind; +use crate::parser::TopLevelStatement; +use crate::{Ident, Path, PathKind, UseTree, UseTreeKind}; + +use lalrpop_util::ErrorRecovery; + +grammar<'input, 'err>(input: &'input str, errors: &'err mut [ErrorRecovery, &'static str>]); + +extern { + type Location = usize; + + type Error = LexerErrorKind; + + // NOTE: each token needs a terminal defined + enum BorrowedToken<'input> { + string => BorrowedToken::Str(<&'input str>), + ident => BorrowedToken::Ident(<&'input str>), + + // symbols + "<" => BorrowedToken::Less, + "<=" => BorrowedToken::LessEqual, + ">" => BorrowedToken::Greater, + ">=" => BorrowedToken::GreaterEqual, + "==" => BorrowedToken::Equal, + "!=" => BorrowedToken::NotEqual, + "+" => BorrowedToken::Plus, + "-" => BorrowedToken::Minus, + "*" => BorrowedToken::Star, + "/" => BorrowedToken::Slash, + "%" => BorrowedToken::Percent, + "&" => BorrowedToken::Ampersand, + "^" => BorrowedToken::Caret, + "<<" => BorrowedToken::ShiftLeft, + ">>" => BorrowedToken::ShiftRight, + "." => BorrowedToken::Dot, + ".." => BorrowedToken::DoubleDot, + "(" => BorrowedToken::LeftParen, + ")" => BorrowedToken::RightParen, + "{" => BorrowedToken::LeftBrace, + "}" => BorrowedToken::RightBrace, + "[" => BorrowedToken::LeftBracket, + "]" => BorrowedToken::RightBracket, + "->" => BorrowedToken::Arrow, + "|" => BorrowedToken::Pipe, + "#" => BorrowedToken::Pound, + "," => BorrowedToken::Comma, + ":" => BorrowedToken::Colon, + "::" => BorrowedToken::DoubleColon, + ";" => BorrowedToken::Semicolon, + "!" => BorrowedToken::Bang, + "=" => BorrowedToken::Assign, + // keywords + "as" => BorrowedToken::Keyword(noir_token::Keyword::As), + "assert" => BorrowedToken::Keyword(noir_token::Keyword::Assert), + "assert_eq" => BorrowedToken::Keyword(noir_token::Keyword::AssertEq), + "bool" => BorrowedToken::Keyword(noir_token::Keyword::Bool), + "break" => BorrowedToken::Keyword(noir_token::Keyword::Break), + "call_data" => BorrowedToken::Keyword(noir_token::Keyword::CallData), + "char" => BorrowedToken::Keyword(noir_token::Keyword::Char), + "comptime" => BorrowedToken::Keyword(noir_token::Keyword::CompTime), + "constrain" => BorrowedToken::Keyword(noir_token::Keyword::Constrain), + "continue" => BorrowedToken::Keyword(noir_token::Keyword::Continue), + "contract" => BorrowedToken::Keyword(noir_token::Keyword::Contract), + "crate" => BorrowedToken::Keyword(noir_token::Keyword::Crate), + "dep" => BorrowedToken::Keyword(noir_token::Keyword::Dep), + "distinct" => BorrowedToken::Keyword(noir_token::Keyword::Distinct), + "else" => BorrowedToken::Keyword(noir_token::Keyword::Else), + "Field" => BorrowedToken::Keyword(noir_token::Keyword::Field), + "fn" => BorrowedToken::Keyword(noir_token::Keyword::Fn), + "for" => BorrowedToken::Keyword(noir_token::Keyword::For), + "fmtstr" => BorrowedToken::Keyword(noir_token::Keyword::FormatString), + "global" => BorrowedToken::Keyword(noir_token::Keyword::Global), + "if" => BorrowedToken::Keyword(noir_token::Keyword::If), + "impl" => BorrowedToken::Keyword(noir_token::Keyword::Impl), + "in" => BorrowedToken::Keyword(noir_token::Keyword::In), + "let" => BorrowedToken::Keyword(noir_token::Keyword::Let), + "mod" => BorrowedToken::Keyword(noir_token::Keyword::Mod), + "mut" => BorrowedToken::Keyword(noir_token::Keyword::Mut), + "pub" => BorrowedToken::Keyword(noir_token::Keyword::Pub), + "quote" => BorrowedToken::Keyword(noir_token::Keyword::Quote), + "return" => BorrowedToken::Keyword(noir_token::Keyword::Return), + "return_data" => BorrowedToken::Keyword(noir_token::Keyword::ReturnData), + "str" => BorrowedToken::Keyword(noir_token::Keyword::String), + "struct" => BorrowedToken::Keyword(noir_token::Keyword::Struct), + "trait" => BorrowedToken::Keyword(noir_token::Keyword::Trait), + "type" => BorrowedToken::Keyword(noir_token::Keyword::Type), + "unchecked" => BorrowedToken::Keyword(noir_token::Keyword::Unchecked), + "unconstrained" => BorrowedToken::Keyword(noir_token::Keyword::Unconstrained), + "use" => BorrowedToken::Keyword(noir_token::Keyword::Use), + "where" => BorrowedToken::Keyword(noir_token::Keyword::Where), + "while" => BorrowedToken::Keyword(noir_token::Keyword::While), + // bool + "true" => BorrowedToken::Bool(true), + "false" => BorrowedToken::Bool(false), + + r"[\t\r\n ]+" => BorrowedToken::Whitespace(_), + + EOF => BorrowedToken::EOF, + } +} + +pub(crate) TopLevelStatement: TopLevelStatement = { + "use" r"[\t\r\n ]+" ";" EOF => { + TopLevelStatement::Import(use_tree) + } +} + +UseTree: UseTree = { + // path::to::ident as SomeAlias + => { + let ident = prefix.pop(); + let kind = UseTreeKind::Path(ident, alias); + UseTree { prefix, kind } + }, +} + +pub(crate) Path: Path = { + "crate" "::" => { + let kind = PathKind::Crate; + let span = Span::from(lo as u32..hi as u32); + Path { segments, kind, span } + }, + + "dep" "::" => { + let kind = PathKind::Dep; + let span = Span::from(lo as u32..hi as u32); + Path { segments, kind, span } + }, + + => { + segments.insert(0, id); + let kind = PathKind::Plain; + let span = Span::from(lo as u32..hi as u32); + Path { segments, kind, span } + }, +} + +PathSegments: Vec = { + )*> => { + segments + } +} + +Alias: Ident = { + r"[\t\r\n ]+" "as" r"[\t\r\n ]+" => <>, +} + +Ident: Ident = { + => { + let token = noir_token::Token::Ident(i.to_string()); + let span = Span::from(lo as u32..hi as u32); + Ident::from_token(token, span) + }, +} + +Bool: BorrowedToken<'input> = { + "true" => BorrowedToken::Bool(true), + "false" => BorrowedToken::Bool(false), +}; + diff --git a/compiler/noirc_frontend/src/parser/errors.rs b/compiler/noirc_frontend/src/parser/errors.rs index 43a1f96f13f..895d4e07bbd 100644 --- a/compiler/noirc_frontend/src/parser/errors.rs +++ b/compiler/noirc_frontend/src/parser/errors.rs @@ -110,25 +110,31 @@ impl ParserError { impl std::fmt::Display for ParserError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let reason_str: String = if self.reason.is_none() { + "".to_string() + } else { + format!("\nreason: {}", Diagnostic::from(self.clone())) + }; let mut expected = vecmap(&self.expected_tokens, ToString::to_string); expected.append(&mut vecmap(&self.expected_labels, |label| format!("{label}"))); if expected.is_empty() { - write!(f, "Unexpected {} in input", self.found) + write!(f, "Unexpected {} in input{}", self.found, reason_str) } else if expected.len() == 1 { let first = expected.first().unwrap(); let vowel = "aeiou".contains(first.chars().next().unwrap()); write!( f, - "Expected a{} {} but found {}", + "Expected a{} {} but found {}{}", if vowel { "n" } else { "" }, first, - self.found + self.found, + reason_str ) } else { let expected = expected.iter().map(ToString::to_string).collect::>().join(", "); - write!(f, "Unexpected {}, expected one of {}", self.found, expected) + write!(f, "Unexpected {}, expected one of {}{}", self.found, expected, reason_str) } } } diff --git a/compiler/noirc_frontend/src/parser/parser.rs b/compiler/noirc_frontend/src/parser/parser.rs index 0a21465fe87..1bedf904da2 100644 --- a/compiler/noirc_frontend/src/parser/parser.rs +++ b/compiler/noirc_frontend/src/parser/parser.rs @@ -35,7 +35,7 @@ use super::{spanned, Item, ItemKind}; use crate::ast::{ Expression, ExpressionKind, LetStatement, StatementKind, UnresolvedType, UnresolvedTypeData, }; -use crate::lexer::Lexer; +use crate::lexer::{lexer::from_spanned_token_result, Lexer}; use crate::parser::{force, ignore_then_commit, statement_recovery}; use crate::token::{Keyword, Token, TokenKind}; use crate::{ @@ -47,6 +47,7 @@ use crate::{ use chumsky::prelude::*; use iter_extended::vecmap; +use lalrpop_util::lalrpop_mod; use noirc_errors::{Span, Spanned}; mod assertion; @@ -59,6 +60,9 @@ mod primitives; mod structs; mod traits; +// synthesized by LALRPOP +lalrpop_mod!(pub noir_parser); + #[cfg(test)] mod test_helpers; @@ -77,8 +81,79 @@ pub fn parse_program(source_program: &str) -> (ParsedModule, Vec) { let (module, mut parsing_errors) = program().parse_recovery_verbose(tokens); parsing_errors.extend(lexing_errors.into_iter().map(Into::into)); + let parsed_module = module.unwrap_or(ParsedModule { items: vec![] }); + + if cfg!(feature = "experimental_parser") { + for parsed_item in &parsed_module.items { + if lalrpop_parser_supports_kind(&parsed_item.kind) { + match &parsed_item.kind { + ItemKind::Import(parsed_use_tree) => { + prototype_parse_use_tree(Some(parsed_use_tree), source_program); + } + // other kinds prevented by lalrpop_parser_supports_kind + _ => unreachable!(), + } + } + } + } + (parsed_module, parsing_errors) +} + +fn prototype_parse_use_tree(expected_use_tree_opt: Option<&UseTree>, input: &str) { + // TODO(https://github.com/noir-lang/noir/issues/4777): currently skipping + // recursive use trees, e.g. "use std::{foo, bar}" + if input.contains('{') { + return; + } + + let mut lexer = Lexer::new(input); + lexer = lexer.skip_whitespaces(false); + let mut errors = Vec::new(); + + // NOTE: this is a hack to get the references working + // => this likely means that we'll want to propagate the <'input> lifetime further into Token + let lexer_result = lexer.collect::>(); + let referenced_lexer_result = lexer_result.iter().map(from_spanned_token_result); + + let calculated = noir_parser::TopLevelStatementParser::new().parse( + input, + &mut errors, + referenced_lexer_result, + ); + + if let Some(expected_use_tree) = expected_use_tree_opt { + assert!( + calculated.is_ok(), + "calculated not Ok(_): {:?}\n\nlexer: {:?}\n\ninput: {:?}", + calculated, + lexer_result, + input + ); + + match calculated.unwrap() { + TopLevelStatement::Import(parsed_use_tree) => { + assert_eq!(expected_use_tree, &parsed_use_tree); + } + unexpected_calculated => { + panic!( + "expected a TopLevelStatement::Import, but found: {:?}", + unexpected_calculated + ) + } + } + } else { + assert!( + calculated.is_err(), + "calculated not Err(_): {:?}\n\nlexer: {:?}\n\ninput: {:?}", + calculated, + lexer_result, + input + ); + } +} - (module.unwrap_or(ParsedModule { items: vec![] }), parsing_errors) +fn lalrpop_parser_supports_kind(kind: &ItemKind) -> bool { + matches!(kind, ItemKind::Import(_)) } /// program: module EOF @@ -1512,33 +1587,53 @@ mod test { #[test] fn parse_use() { - parse_all( - use_statement(), - vec![ - "use std::hash", - "use std", - "use foo::bar as hello", - "use bar as bar", - "use foo::{}", - "use foo::{bar,}", - "use foo::{bar, hello}", - "use foo::{bar as bar2, hello}", - "use foo::{bar as bar2, hello::{foo}, nested::{foo, bar}}", - "use dep::{std::println, bar::baz}", - ], - ); + let mut valid_use_statements = vec![ + "use std::hash", + "use std", + "use foo::bar as hello", + "use bar as bar", + "use foo::{}", + "use foo::{bar,}", + "use foo::{bar, hello}", + "use foo::{bar as bar2, hello}", + "use foo::{bar as bar2, hello::{foo}, nested::{foo, bar}}", + "use dep::{std::println, bar::baz}", + ]; - parse_all_failing( - use_statement(), - vec![ - "use std as ;", - "use foobar as as;", - "use hello:: as foo;", - "use foo bar::baz", - "use foo bar::{baz}", - "use foo::{,}", - ], - ); + let mut invalid_use_statements = vec![ + "use std as ;", + "use foobar as as;", + "use hello:: as foo;", + "use foo bar::baz", + "use foo bar::{baz}", + "use foo::{,}", + ]; + + let use_statements = valid_use_statements + .iter_mut() + .map(|valid_str| (valid_str, true)) + .chain(invalid_use_statements.iter_mut().map(|invalid_str| (invalid_str, false))); + + for (use_statement_str, expect_valid) in use_statements { + let mut use_statement_str = use_statement_str.to_string(); + let expected_use_statement = if expect_valid { + let (result_opt, _diagnostics) = + parse_recover(&use_statement(), &use_statement_str); + use_statement_str.push(';'); + match result_opt.unwrap() { + TopLevelStatement::Import(expected_use_statement) => { + Some(expected_use_statement) + } + _ => unreachable!(), + } + } else { + let result = parse_with(&use_statement(), &use_statement_str); + assert!(result.is_err()); + None + }; + + prototype_parse_use_tree(expected_use_statement.as_ref(), &use_statement_str); + } } #[test] diff --git a/deny.toml b/deny.toml index eff233687e8..db7e53cad24 100644 --- a/deny.toml +++ b/deny.toml @@ -58,7 +58,7 @@ allow = [ # bitmaps 2.1.0, im 15.1.0 "MPL-2.0", # Boost Software License - "BSL-1.0", + "BSL-1.0" ] # Allow 1 or more licenses on a per-crate basis, so that particular licenses @@ -70,6 +70,7 @@ exceptions = [ { allow = ["CC0-1.0"], name = "more-asserts" }, { allow = ["CC0-1.0"], name = "jsonrpc" }, { allow = ["CC0-1.0"], name = "notify" }, + { allow = ["CC0-1.0"], name = "tiny-keccak" }, { allow = ["MPL-2.0"], name = "sized-chunks" }, { allow = ["MPL-2.0"], name = "webpki-roots" },