feat: handwritten parser (#6180)

# Description Resolves #853 ## Problem There are some issues with the current parser: - it leads to "stack overflow" with small programs that should probably compile (like a program with a chanin of 17 `if-else-if`) - it leads to some of us not being able to run the noric_frontend tests because of linker errors, and sometimes making changes to the parser will lead to linker errors that we have to workaround - it's (very) slow ## Summary This PR implements a hand-written parser. It parses any program with just one look-ahead token. It has very good error-recovery. I tested the parser's performance by copying the contents of `noir-contracts/contracts/avm_test_contract/src/main.nr` in Aztec-Packages 100 times to a single file. That ends up with a file of about 57K lines. The times: - chumsky parser: 1.52 **seconds** - handwritten parser: 52.97 **milliseconds** Some other benefits: - The linker errors are gone! - Compiling noirc_frontend is slightly faster - Macro code also becomes faster (`quote { ... }.as_expr()`, etc, invoke the parser). For example `test_programs/noir_test_success/comptime_expr/src/main.nr` takes around one second with chumsky and 140ms with the handwritten parser (to do `nargo compile`) - Even though the parser is handwritten, I think the parsing code is relatively simple. It's just "check if we get this token, then do this" or sometimes "check if we get this token followed by this one (or not followed by this one). Also also the `impl`s and `traits` that we needed for chumsky (and the lifetimes, and passing parsers around, and cloning them, and calling `boxed()`, etc.) are gone, which I believe make the code much simpler. That said, chumsky has great helpers to be able to parse things separated by, say, a comma, and this PR at least has that too (`parse_many`). - Compiling an empty program is faster (goes from 650ms to 140ms) - Compiing any program is much faster - Tests run faster (it would become feasible to run tests locally before pushing to CI to avoid CI cycles): - Running noirc_frontend tests: - before: 1:03 minute - after: 6 seconds - Running lsp tests: - before: 55 seconds - after: 6 seconds - Running nargo_cli tests: - before: 2:47 minutes - after: 38 seconds - CI runs faster (for example each of the four partitions take 1 minute instead of 4 minutes - Building the compiler is faster: - before: 1:29 minutes - after: 1:19 minutes (so building noirc_frontend is 10 seconds faster because that's the only thing changed in this PR) - Better parsing recovery and more fine-grained control over the errors we report I tested this parser by running `./boostrap.sh` on the Aztec-Packages contracts and they compile file (of course they didn't compile right away, I had to fix some bugs to get there). ## Additional Context ## Documentation Check one: - [x] No documentation needed. - [ ] Documentation included in this PR. - [ ] **[For Experimental Features]** Documentation to be submitted in a separate PR. # PR Checklist - [x] I have tested the changes locally. - [x] I have formatted the changes with [Prettier](https://prettier.io/) and/or `cargo fmt` on default settings. --------- Co-authored-by: Tom French <15848336+TomAFrench@users.noreply.github.com> Co-authored-by: jfecher <jake@aztecprotocol.com>
noir-lang · Oct 7, 2024 · c4273a0 · c4273a0
1 parent 1a2ca46
commit c4273a0
Show file tree

Hide file tree

Showing 64 changed files with 8,563 additions and 4,249 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -121,10 +121,6 @@ clap = { version = "4.3.19", features = ["derive", "env"] }
 codespan = { version = "0.11.1", features = ["serialization"] }
 codespan-lsp = "0.11.1"
 codespan-reporting = "0.11.1"
-chumsky = { git = "https://github.com/jfecher/chumsky", rev = "ad9d312", default-features = false, features = [
-    "ahash",
-    "std",
-] }
 
 # Benchmarking
 criterion = "0.5.0"

diff --git a/compiler/noirc_errors/Cargo.toml b/compiler/noirc_errors/Cargo.toml
@@ -16,7 +16,6 @@ acvm.workspace = true
 codespan-reporting.workspace = true
 codespan.workspace = true
 fm.workspace = true
-chumsky.workspace = true
 noirc_printable_type.workspace = true
 serde.workspace = true
 serde_with = "3.2.0"

diff --git a/compiler/noirc_errors/src/position.rs b/compiler/noirc_errors/src/position.rs
@@ -8,7 +8,7 @@ use std::{
 
 pub type Position = u32;
 
-#[derive(PartialOrd, Eq, Ord, Debug, Clone)]
+#[derive(PartialOrd, Eq, Ord, Debug, Clone, Default)]
 pub struct Spanned<T> {
     pub contents: T,
     span: Span,
@@ -121,26 +121,6 @@ impl From<Range<u32>> for Span {
     }
 }
 
-impl chumsky::Span for Span {
-    type Context = ();
-
-    type Offset = u32;
-
-    fn new(_context: Self::Context, range: Range<Self::Offset>) -> Self {
-        Span(ByteSpan::from(range))
-    }
-
-    fn context(&self) -> Self::Context {}
-
-    fn start(&self) -> Self::Offset {
-        self.start()
-    }
-
-    fn end(&self) -> Self::Offset {
-        self.end()
-    }
-}
-
 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Deserialize, Serialize)]
 pub struct Location {
     pub span: Span,

diff --git a/compiler/noirc_frontend/Cargo.toml b/compiler/noirc_frontend/Cargo.toml
@@ -16,7 +16,6 @@ noirc_errors.workspace = true
 noirc_printable_type.workspace = true
 fm.workspace = true
 iter-extended.workspace = true
-chumsky.workspace = true
 thiserror.workspace = true
 smol_str.workspace = true
 im.workspace = true

diff --git a/compiler/noirc_frontend/src/ast/expression.rs b/compiler/noirc_frontend/src/ast/expression.rs
@@ -309,6 +309,7 @@ impl Expression {
 pub type BinaryOp = Spanned<BinaryOpKind>;
 
 #[derive(PartialEq, PartialOrd, Eq, Ord, Hash, Debug, Copy, Clone)]
+#[cfg_attr(test, derive(strum_macros::EnumIter))]
 pub enum BinaryOpKind {
     Add,
     Subtract,
@@ -873,7 +874,7 @@ impl FunctionDefinition {
 impl Display for FunctionDefinition {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         writeln!(f, "{:?}", self.attributes)?;
-        write!(f, "fn {} {}", self.signature(), self.body)
+        write!(f, "{} {}", self.signature(), self.body)
     }
 }
 

diff --git a/compiler/noirc_frontend/src/ast/statement.rs b/compiler/noirc_frontend/src/ast/statement.rs
@@ -180,7 +180,7 @@ impl StatementKind {
     }
 }
 
-#[derive(Eq, Debug, Clone)]
+#[derive(Eq, Debug, Clone, Default)]
 pub struct Ident(pub Spanned<String>);
 
 impl Ident {
@@ -333,12 +333,12 @@ impl Display for UseTree {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         write!(f, "{}", self.prefix)?;
 
+        if !self.prefix.segments.is_empty() {
+            write!(f, "::")?;
+        }
+
         match &self.kind {
             UseTreeKind::Path(name, alias) => {
-                if !(self.prefix.segments.is_empty() && self.prefix.kind == PathKind::Plain) {
-                    write!(f, "::")?;
-                }
-
                 write!(f, "{name}")?;
 
                 if let Some(alias) = alias {
@@ -348,7 +348,7 @@ impl Display for UseTree {
                 Ok(())
             }
             UseTreeKind::List(trees) => {
-                write!(f, "::{{")?;
+                write!(f, "{{")?;
                 let tree = vecmap(trees, ToString::to_string).join(", ");
                 write!(f, "{tree}}}")
             }
@@ -467,7 +467,9 @@ impl Path {
     }
 
     pub fn is_ident(&self) -> bool {
-        self.segments.len() == 1 && self.kind == PathKind::Plain
+        self.kind == PathKind::Plain
+            && self.segments.len() == 1
+            && self.segments.first().unwrap().generics.is_none()
     }
 
     pub fn as_ident(&self) -> Option<&Ident> {
@@ -484,6 +486,10 @@ impl Path {
         self.segments.first().cloned().map(|segment| segment.ident)
     }
 
+    pub fn is_empty(&self) -> bool {
+        self.segments.is_empty() && self.kind == PathKind::Plain
+    }
+
     pub fn as_string(&self) -> String {
         let mut string = String::new();
 
@@ -650,14 +656,6 @@ impl Pattern {
         }
     }
 
-    pub(crate) fn into_ident(self) -> Ident {
-        match self {
-            Pattern::Identifier(ident) => ident,
-            Pattern::Mutable(pattern, _, _) => pattern.into_ident(),
-            other => panic!("Pattern::into_ident called on {other} pattern with no identifier"),
-        }
-    }
-
     pub(crate) fn try_as_expression(&self, interner: &NodeInterner) -> Option<Expression> {
         match self {
             Pattern::Identifier(ident) => Some(Expression {
@@ -726,37 +724,36 @@ impl LValue {
         Expression::new(kind, span)
     }
 
-    pub fn from_expression(expr: Expression) -> LValue {
+    pub fn from_expression(expr: Expression) -> Option<LValue> {
         LValue::from_expression_kind(expr.kind, expr.span)
     }
 
-    pub fn from_expression_kind(expr: ExpressionKind, span: Span) -> LValue {
+    pub fn from_expression_kind(expr: ExpressionKind, span: Span) -> Option<LValue> {
         match expr {
-            ExpressionKind::Variable(path) => LValue::Ident(path.as_ident().unwrap().clone()),
-            ExpressionKind::MemberAccess(member_access) => LValue::MemberAccess {
-                object: Box::new(LValue::from_expression(member_access.lhs)),
+            ExpressionKind::Variable(path) => Some(LValue::Ident(path.as_ident().unwrap().clone())),
+            ExpressionKind::MemberAccess(member_access) => Some(LValue::MemberAccess {
+                object: Box::new(LValue::from_expression(member_access.lhs)?),
                 field_name: member_access.rhs,
                 span,
-            },
-            ExpressionKind::Index(index) => LValue::Index {
-                array: Box::new(LValue::from_expression(index.collection)),
+            }),
+            ExpressionKind::Index(index) => Some(LValue::Index {
+                array: Box::new(LValue::from_expression(index.collection)?),
                 index: index.index,
                 span,
-            },
+            }),
             ExpressionKind::Prefix(prefix) => {
                 if matches!(
                     prefix.operator,
                     crate::ast::UnaryOp::Dereference { implicitly_added: false }
                 ) {
-                    LValue::Dereference(Box::new(LValue::from_expression(prefix.rhs)), span)
+                    Some(LValue::Dereference(Box::new(LValue::from_expression(prefix.rhs)?), span))
                 } else {
-                    panic!("Called LValue::from_expression with an invalid prefix operator")
+                    None
                 }
             }
-            ExpressionKind::Interned(id) => LValue::Interned(id, span),
-            _ => {
-                panic!("Called LValue::from_expression with an invalid expression")
-            }
+            ExpressionKind::Parenthesized(expr) => LValue::from_expression(*expr),
+            ExpressionKind::Interned(id) => Some(LValue::Interned(id, span)),
+            _ => None,
         }
     }
 

diff --git a/compiler/noirc_frontend/src/ast/traits.rs b/compiler/noirc_frontend/src/ast/traits.rs
@@ -216,7 +216,24 @@ impl Display for TraitBound {
 
 impl Display for NoirTraitImpl {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        writeln!(f, "impl {}{} for {} {{", self.trait_name, self.trait_generics, self.object_type)?;
+        write!(f, "impl")?;
+        if !self.impl_generics.is_empty() {
+            write!(
+                f,
+                "<{}>",
+                self.impl_generics.iter().map(ToString::to_string).collect::<Vec<_>>().join(", ")
+            )?;
+        }
+
+        write!(f, " {}{} for {}", self.trait_name, self.trait_generics, self.object_type)?;
+        if !self.where_clause.is_empty() {
+            write!(
+                f,
+                " where {}",
+                self.where_clause.iter().map(ToString::to_string).collect::<Vec<_>>().join(", ")
+            )?;
+        }
+        writeln!(f, "{{")?;
 
         for item in self.items.iter() {
             let item = item.to_string();

diff --git a/compiler/noirc_frontend/src/debug/mod.rs b/compiler/noirc_frontend/src/debug/mod.rs
@@ -1,5 +1,6 @@
 use crate::ast::PathSegment;
-use crate::parser::{parse_program, ParsedModule};
+use crate::parse_program;
+use crate::parser::ParsedModule;
 use crate::{
     ast,
     ast::{Path, PathKind},