From d8cc4a9f3579ea311e50b8f9461c3f29ade648dc Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Mon, 12 Feb 2024 15:09:30 +0100 Subject: [PATCH 01/14] Minor rewording of module documentation --- core/src/transform/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/transform/mod.rs b/core/src/transform/mod.rs index 31a3b01451..426d10f019 100644 --- a/core/src/transform/mod.rs +++ b/core/src/transform/mod.rs @@ -1,4 +1,4 @@ -//! Various post transformations of nickel code. +//! Program transformations. use crate::{ cache::ImportResolver, term::{RichTerm, Traverse, TraverseOrder}, From 86fcab3b561d930f2b918884dd3bd54ecf2457c5 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Mon, 12 Feb 2024 18:30:04 +0100 Subject: [PATCH 02/14] Basics of pattern compilation This PR implements core functions to compile patterns and match expressions to simpler Nickel code that doesn't use patterns at all. This is a step toward allowing general pattern in match expressions. For now, this new implementation isn't actually used yet as it lacks a couple new primitive operations to work properly. The implementation also ignores contracts annotation in the pattern. --- core/src/term/mod.rs | 10 +- core/src/term/pattern.rs | 258 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 267 insertions(+), 1 deletion(-) diff --git a/core/src/term/mod.rs b/core/src/term/mod.rs index 3c1614d8d2..c2e03de62b 100644 --- a/core/src/term/mod.rs +++ b/core/src/term/mod.rs @@ -618,6 +618,15 @@ impl fmt::Display for MergePriority { } } +/// Content of a match expression. +#[derive(Debug, PartialEq, Clone)] +pub struct MatchData { + /// Branches of the match expression, where the first component is the pattern on the left hand + /// side of `=>` and the second component is the body of the branch. + pub branches: Vec<(Pattern, RichTerm)>, + pub default: Option, +} + /// A type or a contract together with its corresponding label. #[derive(Debug, PartialEq, Clone)] pub struct LabeledType { @@ -2499,7 +2508,6 @@ pub mod make { Term::LetPattern(pat.into(), t1.into(), t2.into()).into() } - #[cfg(test)] pub fn if_then_else(cond: T1, t1: T2, t2: T3) -> RichTerm where T1: Into, diff --git a/core/src/term/pattern.rs b/core/src/term/pattern.rs index 766f963acc..a93c99515b 100644 --- a/core/src/term/pattern.rs +++ b/core/src/term/pattern.rs @@ -269,3 +269,261 @@ impl ElaborateContract for RecordPattern { }) } } + +/// Compilation of pattern matching down to pattern-less Nickel code. +pub mod compile { + use super::*; + use crate::{ + mk_app, mk_fun, + term::{make, BinaryOp, MatchData, RecordExtKind, RecordOpKind, RichTerm, Term, UnaryOp}, + }; + + // Generate a `%record_insert% id value_id bindings_id` primop application. + fn bindings_insert(id: LocIdent, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { + mk_app!( + make::op2( + BinaryOp::DynExtend { + ext_kind: RecordExtKind::WithValue, + metadata: Default::default(), + pending_contracts: Default::default(), + op_kind: RecordOpKind::IgnoreEmptyOpt, + }, + Term::Var(id), + Term::Var(bindings_id) + ), + Term::Var(value_id) + ) + } + + pub trait CompilePart { + /// Compile part of a broader pattern to a Nickel expression with two free variables (which + /// is equivalent to a function of two arguments): + /// + /// 1. The value being matched on (`value_id`) + /// 2. A dictionary of the current assignment of pattern variables to sub-expressions of the + /// matched expression + /// + /// The compiled expression must return either `null` if the pattern doesn't match, or a + /// dictionary mapping pattern variables to the corresponding sub-expressions of the + /// matched value if the pattern matched with success. + fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm; + } + + impl CompilePart for Pattern { + // Compilation of the top-level pattern wrapper (code between < and > is Rust code, think + // of it as a kind of templating): + // + // < if let Some(alias) = alias { > + // let bindings = %record_insert% bindings arg in + // < } > + // arg bindings + fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { + // The last instruction + // arg bindings + let continuation = mk_app!( + self.data.compile_part(value_id, bindings_id), + Term::Var(value_id), + Term::Var(bindings_id) + ); + + // Either + // + // let bindings = %record_insert% bindings arg in + // continuation + // + // if `alias` is set, or just `continuation` otherwise. + if let Some(alias) = self.alias { + make::let_in( + bindings_id, + bindings_insert(alias, value_id, bindings_id), + continuation, + ) + } else { + continuation + } + } + } + + impl CompilePart for PatternData { + fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { + match self { + PatternData::Any(id) => { + // %record_insert% id value_id bindings_id + bindings_insert(*id, value_id, bindings_id) + } + PatternData::Record(pat) => pat.compile_part(value_id, bindings_id), + PatternData::Enum(pat) => pat.compile_part(value_id, bindings_id), + } + } + } + + impl CompilePart for RecordPattern { + // Compilation of the top-level record pattern wrapper: + // + // if %typeof% value_id == 'Record + // + // if %has_field% field value_id && %is_defined% field value_id then + // let value_id = %static_access(field)% value_id in + // let new_bindings_id = cont in + // + // if new_bindings_id == null then + // null + // else + // new_value_id new_bindings_id + // else + // null + fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { + // The fold block: + // + // + // if %has_field% field value_id && %is_defined% field value_id then + // let new_value_id = %static_access(field)% value_id in + // let new_bindings_id = cont in + // + // if new_bindings_id == null then + // null + // else + // new_value_id new_bindings_id + // else + // null + let inner: RichTerm = + self.patterns + .iter() + .fold(Term::Var(bindings_id).into(), |cont, field_pat| { + let field = field_pat.matched_id; + let new_bindings_id = LocIdent::fresh(); + let new_value_id = LocIdent::fresh(); + + // %has_field% field value_id && %is_defined% field value_id + let has_field = mk_app!( + make::op1( + UnaryOp::BoolAnd(), + make::op2( + BinaryOp::HasField(RecordOpKind::ConsiderAllFields), + Term::Var(field), + Term::Var(value_id), + ) + ), + make::op2( + // BinaryOp::IsDefined(), + todo!(), + Term::Var(field), + Term::Var(value_id), + ) + ); + + // The innermost if: + // + // if new_bindings_id == null then + // null + // else + // new_value_id new_bindings_id + let inner_if = make::if_then_else( + make::op2(BinaryOp::Eq(), Term::Var(new_bindings_id), Term::Null), + Term::Null, + field_pat + .pattern + .compile_part(new_value_id, new_bindings_id), + ); + + // let new_bindings_id = value_id bindings_id in + let inner_let = make::let_in(new_bindings_id, cont, inner_if); + + // let new_value_id = %static_access(field)% value_id in + let outer_let = make::let_in( + new_value_id, + make::op1(UnaryOp::StaticAccess(field), Term::Var(value_id)), + inner_let, + ); + + // if then else null + make::if_then_else(has_field, outer_let, Term::Null) + }); + + // %typeof% value_id == 'Record + let is_record: RichTerm = make::op2( + BinaryOp::Eq(), + make::op1(UnaryOp::Typeof(), Term::Var(value_id)), + Term::Enum("Record".into()), + ); + + // if then inner else null + make::if_then_else(is_record, inner, Term::Null) + } + } + + impl CompilePart for EnumPattern { + fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { + todo!() + } + } + + pub trait Compile { + /// Compile a match expression to a Nickel expression with the provided `value_id` as a + /// free variable (representing a placeholder for the matched expression). + fn compile(self, value_id: LocIdent) -> RichTerm; + } + + impl Compile for MatchData { + // Compilation of a full match expression (code between < and > is Rust code, think of it + // as a kind of templating): + // + // + // let init_bindings_id = {} in + // let bindings_id = value_id init_bindings_id in + // + // if bindings_id == null then + // cont + // else + // # this primop evaluates body with an environment extended with bindings_id + // %with_env% body bindings_id + fn compile(self, value_id: LocIdent) -> RichTerm { + self.branches.into_iter().rev().fold( + // Term::RuntimeError(NonExhaustiveMatch) + Term::RuntimeError(todo!()).into(), + |cont, (pat, body)| { + let init_bindings_id = LocIdent::fresh(); + let bindings_id = LocIdent::fresh(); + + // inner if block: + // + // if bindings_id == null then + // cont + // else + // # this primop evaluates body with an environment extended with bindings_id + // %with_env% bindings_id body + let inner = make::if_then_else( + make::op2(BinaryOp::Eq(), Term::Var(bindings_id), Term::Null), + cont, + mk_app!( + make::op1( + //UnaryOp::WithEnv + todo!(), + Term::Var(bindings_id), + ), + body + ), + ); + + // The two initial chained let-bindings: + // + // let init_bindings_id = {} in + // let bindings_id = value_id init_bindings_id in + // + make::let_in( + init_bindings_id, + Term::Record(RecordData::empty()), + make::let_in( + bindings_id, + pat.compile_part(value_id, init_bindings_id), + inner, + ), + ) + }, + ) + } + } +} From e81474f71908efeac2ee38c2c26ed4ab1dcda954 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Mon, 12 Feb 2024 18:55:05 +0100 Subject: [PATCH 03/14] Add %field_is_defined% To safely check if a pattern matches a value (safely meaning without throwing an uncatchable error), we need a primitive operation to check if we can safely access the field of a record. To do so, we need the field to exists _and_ to have a definition. This is what the added `field_is_defined` checks. --- core/src/eval/operation.rs | 16 ++++++++++++++++ core/src/parser/grammar.lalrpop | 4 ++++ core/src/parser/lexer.rs | 4 ++++ core/src/term/mod.rs | 5 +++++ core/src/typecheck/operation.rs | 9 +++++++++ 5 files changed, 38 insertions(+) diff --git a/core/src/eval/operation.rs b/core/src/eval/operation.rs index 383a679b50..d4299ec5db 100644 --- a/core/src/eval/operation.rs +++ b/core/src/eval/operation.rs @@ -1755,6 +1755,22 @@ impl VirtualMachine { } _ => Err(mk_type_error!("has_field", "String", 1, t1, pos1)), }), + BinaryOp::FieldIsDefined(op_kind) => match_sharedterm!(match (t1) { + Term::Str(id) => { + if let Term::Record(record) = &*t2 { + Ok(Closure::atomic_closure(RichTerm::new( + Term::Bool(matches!( + record.fields.get(&LocIdent::from(id.into_inner())), + Some(field @ Field { value: None, ..}) if matches!(op_kind, RecordOpKind::ConsiderAllFields) || !field.is_empty_optional() + )), + pos_op_inh, + ))) + } else { + Err(mk_type_error!("field_is_defined", "Record", 2, t2, pos2)) + } + } + _ => Err(mk_type_error!("field_is_defined", "String", 1, t1, pos1)), + }), BinaryOp::ArrayConcat() => match_sharedterm!(match (t1) { Term::Array(ts1, attrs1) => match_sharedterm!(match (t2) { Term::Array(ts2, attrs2) => { diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index 5eb01748e7..827780fe17 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -1000,6 +1000,8 @@ BOpPre: BinaryOp = { "go_field" => BinaryOp::GoField(), "has_field" => BinaryOp::HasField(RecordOpKind::IgnoreEmptyOpt), "has_field_all" => BinaryOp::HasField(RecordOpKind::ConsiderAllFields), + "field_is_defined" => BinaryOp::FieldIsDefined(RecordOpKind::IgnoreEmptyOpt), + "field_is_defined_all" => BinaryOp::FieldIsDefined(RecordOpKind::ConsiderAllFields), "elem_at" => BinaryOp::ArrayElemAt(), "hash" => BinaryOp::Hash(), "serialize" => BinaryOp::Serialize(), @@ -1239,6 +1241,8 @@ extern { "has_field" => Token::Normal(NormalToken::HasField), "has_field_all" => Token::Normal(NormalToken::HasFieldAll), + "field_is_defined" => Token::Normal(NormalToken::FieldIsDefined), + "field_is_defined_all" => Token::Normal(NormalToken::FieldIsDefinedAll), "map" => Token::Normal(NormalToken::Map), "generate" => Token::Normal(NormalToken::ArrayGen), "elem_at" => Token::Normal(NormalToken::ElemAt), diff --git a/core/src/parser/lexer.rs b/core/src/parser/lexer.rs index 13580f5a4c..5bdc230edb 100644 --- a/core/src/parser/lexer.rs +++ b/core/src/parser/lexer.rs @@ -270,6 +270,10 @@ pub enum NormalToken<'input> { RecForceOp, #[token("%rec_default%")] RecDefaultOp, + #[token("%field_is_defined%")] + FieldIsDefined, + #[token("%field_is_defined_all%")] + FieldIsDefinedAll, #[token("merge")] Merge, diff --git a/core/src/term/mod.rs b/core/src/term/mod.rs index c2e03de62b..36b0ba405c 100644 --- a/core/src/term/mod.rs +++ b/core/src/term/mod.rs @@ -1590,6 +1590,9 @@ pub enum BinaryOp { /// Test if a record has a specific field. HasField(RecordOpKind), + /// Test if the field of a record exists and has a definition. + FieldIsDefined(RecordOpKind), + /// Concatenate two arrays. ArrayConcat(), @@ -1684,6 +1687,8 @@ impl fmt::Display for BinaryOp { DynAccess() => write!(f, "dyn_access"), HasField(RecordOpKind::IgnoreEmptyOpt) => write!(f, "has_field"), HasField(RecordOpKind::ConsiderAllFields) => write!(f, "has_field_all"), + FieldIsDefined(RecordOpKind::IgnoreEmptyOpt) => write!(f, "field_is_defined"), + FieldIsDefined(RecordOpKind::ConsiderAllFields) => write!(f, "field_is_defined_all"), ArrayConcat() => write!(f, "array_concat"), ArrayElemAt() => write!(f, "elem_at"), Merge(_) => write!(f, "merge"), diff --git a/core/src/typecheck/operation.rs b/core/src/typecheck/operation.rs index efd002e115..3009b30033 100644 --- a/core/src/typecheck/operation.rs +++ b/core/src/typecheck/operation.rs @@ -322,6 +322,15 @@ pub fn get_bop_type( mk_uniftype::bool(), ) } + // forall a. Str -> {_: a} -> Bool + BinaryOp::FieldIsDefined(_) => { + let ty_elt = state.table.fresh_type_uvar(var_level); + ( + mk_uniftype::str(), + mk_uniftype::dict(ty_elt), + mk_uniftype::bool(), + ) + } // forall a. Array a -> Array a -> Array a BinaryOp::ArrayConcat() => { let ty_elt = state.table.fresh_type_uvar(var_level); From ad32fec4ae7028581b46ef23ff737347c1e55b91 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Mon, 12 Feb 2024 19:01:45 +0100 Subject: [PATCH 04/14] Update pattern compilation to use field_is_defined --- core/src/term/pattern.rs | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/core/src/term/pattern.rs b/core/src/term/pattern.rs index a93c99515b..16a860b9f4 100644 --- a/core/src/term/pattern.rs +++ b/core/src/term/pattern.rs @@ -279,7 +279,7 @@ pub mod compile { }; // Generate a `%record_insert% id value_id bindings_id` primop application. - fn bindings_insert(id: LocIdent, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { + fn insert_binding(id: LocIdent, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { mk_app!( make::op2( BinaryOp::DynExtend { @@ -306,6 +306,11 @@ pub mod compile { /// The compiled expression must return either `null` if the pattern doesn't match, or a /// dictionary mapping pattern variables to the corresponding sub-expressions of the /// matched value if the pattern matched with success. + /// + /// Although the `value` and `bindings` could be passed as [crate::term::RichTerm] in all + /// generality, forcing them to be variable makes it less likely that the compilation + /// duplicates sub-expressions: because the value and the bindings must always be passed in + /// a variable, they are free to share without risk of duplicating work. fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm; } @@ -335,7 +340,7 @@ pub mod compile { if let Some(alias) = self.alias { make::let_in( bindings_id, - bindings_insert(alias, value_id, bindings_id), + insert_binding(alias, value_id, bindings_id), continuation, ) } else { @@ -349,7 +354,7 @@ pub mod compile { match self { PatternData::Any(id) => { // %record_insert% id value_id bindings_id - bindings_insert(*id, value_id, bindings_id) + insert_binding(*id, value_id, bindings_id) } PatternData::Record(pat) => pat.compile_part(value_id, bindings_id), PatternData::Enum(pat) => pat.compile_part(value_id, bindings_id), @@ -376,7 +381,7 @@ pub mod compile { // The fold block: // // - // if %has_field% field value_id && %is_defined% field value_id then + // if %field_is_defined% field value_id then // let new_value_id = %static_access(field)% value_id in // let new_bindings_id = cont in // @@ -394,22 +399,11 @@ pub mod compile { let new_bindings_id = LocIdent::fresh(); let new_value_id = LocIdent::fresh(); - // %has_field% field value_id && %is_defined% field value_id - let has_field = mk_app!( - make::op1( - UnaryOp::BoolAnd(), - make::op2( - BinaryOp::HasField(RecordOpKind::ConsiderAllFields), - Term::Var(field), - Term::Var(value_id), - ) - ), - make::op2( - // BinaryOp::IsDefined(), - todo!(), - Term::Var(field), - Term::Var(value_id), - ) + // %field_is_defined% field value_id + let has_field = make::op2( + BinaryOp::FieldIsDefined(RecordOpKind::ConsiderAllFields), + Term::Var(field), + Term::Var(value_id), ); // The innermost if: From a35e0747c839509b3c067e8e6815582acaeb9035 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Mon, 12 Feb 2024 19:32:12 +0100 Subject: [PATCH 05/14] Use new with_env primop for pattern compilation --- core/src/eval/operation.rs | 41 +++++++++++++++++++++++++++++++++ core/src/parser/grammar.lalrpop | 1 + core/src/parser/lexer.rs | 3 +++ core/src/term/mod.rs | 11 +++++++++ core/src/term/pattern.rs | 9 +------- core/src/typecheck/operation.rs | 11 +++++++++ 6 files changed, 68 insertions(+), 8 deletions(-) diff --git a/core/src/eval/operation.rs b/core/src/eval/operation.rs index d4299ec5db..10f4591b51 100644 --- a/core/src/eval/operation.rs +++ b/core/src/eval/operation.rs @@ -1159,6 +1159,47 @@ impl VirtualMachine { pos_op_inh, ))) } + UnaryOp::WithEnv() => { + // The continuation, that we must evaluate in the augmented environment. + let (mut cont, _) = self + .stack + .pop_arg(&self.cache) + .ok_or_else(|| EvalError::NotEnoughArgs(2, String::from("with_env"), pos_op))?; + + match_sharedterm!(match (t) { + Term::Record(data) => { + for (id, field) in data.fields { + if let Some(value) = field.value { + match_sharedterm!(match (value.term) { + Term::Closure(idx) => { + cont.env.insert(id.ident(), idx); + } + _ => { + cont.env.insert( + id.ident(), + self.cache.add( + Closure { + body: value, + env: env.clone(), + }, + BindingType::Normal, + ), + ); + } + }); + } else { + // This should not really happen, as `with_env` is intended to be + // used with very simple records: no metadata, no recursive fields, + // no field without definition, etc. + debug_assert!(false); + } + } + + Ok(cont) + } + _ => Err(mk_type_error!("with_env", "Record")), + }) + } } } diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index 827780fe17..dd0239b5d1 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -1282,6 +1282,7 @@ extern { "enum_unwrap_variant" => Token::Normal(NormalToken::EnumUnwrapVariant), "enum_is_variant" => Token::Normal(NormalToken::EnumIsVariant), "enum_get_tag" => Token::Normal(NormalToken::EnumGetTag), + "with_env" => Token::Normal(NormalToken::WithEnv), "{" => Token::Normal(NormalToken::LBrace), "}" => Token::Normal(NormalToken::RBrace), diff --git a/core/src/parser/lexer.rs b/core/src/parser/lexer.rs index 5bdc230edb..7c78bc295d 100644 --- a/core/src/parser/lexer.rs +++ b/core/src/parser/lexer.rs @@ -346,6 +346,9 @@ pub enum NormalToken<'input> { #[token("%eval_nix%")] EvalNix, + #[token("%with_env%")] + WithEnv, + #[token("{")] LBrace, #[token("}")] diff --git a/core/src/term/mod.rs b/core/src/term/mod.rs index 36b0ba405c..42bdc669ac 100644 --- a/core/src/term/mod.rs +++ b/core/src/term/mod.rs @@ -1379,6 +1379,15 @@ pub enum UnaryOp { EnumIsVariant(), /// Extract the tag from an enum tag or an enum variant. EnumGetTag(), + + /// Take a record representing bindings to be added to the local environment, and proceed to + /// run the second argument (which isn't a proper primop argument but is stored on the stack) + /// in the environment augmented with the bindings. + /// + /// [Self::WithEnv] is currently used to implement pattern matching, where patterns are + /// compiled to a code that produces the bindings, and then the body of a matching branch need + /// to be evaluated with those new bindings available. + WithEnv(), } impl fmt::Display for UnaryOp { @@ -1434,6 +1443,8 @@ impl fmt::Display for UnaryOp { EnumUnwrapVariant() => write!(f, "enum_unwrap_variant"), EnumIsVariant() => write!(f, "enum_is_variant"), EnumGetTag() => write!(f, "enum_get_tag"), + + WithEnv() => write!(f, "with_env"), } } } diff --git a/core/src/term/pattern.rs b/core/src/term/pattern.rs index 16a860b9f4..3073396bb6 100644 --- a/core/src/term/pattern.rs +++ b/core/src/term/pattern.rs @@ -492,14 +492,7 @@ pub mod compile { let inner = make::if_then_else( make::op2(BinaryOp::Eq(), Term::Var(bindings_id), Term::Null), cont, - mk_app!( - make::op1( - //UnaryOp::WithEnv - todo!(), - Term::Var(bindings_id), - ), - body - ), + mk_app!(make::op1(UnaryOp::WithEnv(), Term::Var(bindings_id),), body), ); // The two initial chained let-bindings: diff --git a/core/src/typecheck/operation.rs b/core/src/typecheck/operation.rs index 3009b30033..b5199db614 100644 --- a/core/src/typecheck/operation.rs +++ b/core/src/typecheck/operation.rs @@ -220,6 +220,17 @@ pub fn get_uop_type( // Note that is_variant breaks parametricity, so it can't get a polymorphic type. // Dyn -> Bool UnaryOp::EnumIsVariant() => (mk_uniftype::dynamic(), mk_uniftype::bool()), + // [crate::term::UnaryOp::WithEnv] shouldn't appear anywhere in actual code, because its + // second argument can't be properly typechecked: it has unbound variables. However, it's + // not hard to come up with a vague working type for it, so we do. + // forall a. {_ : a} -> Dyn -> Dyn + UnaryOp::WithEnv() => { + let ty_elt = state.table.fresh_type_uvar(var_level); + ( + mk_uniftype::dict(ty_elt), + mk_uty_arrow!(mk_uniftype::dynamic(), mk_uniftype::dynamic()), + ) + } }) } From fc03093778a717f327453be824b174e67db654e2 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Mon, 12 Feb 2024 19:52:06 +0100 Subject: [PATCH 06/14] Add non exhaustive match error As match expressions are going to handle more general patterns, the existing NonExhaustiveMatch error - which was in fact specialized for enums - is renamed to NonExhaustiveMatchEnum, and the previous name is reused for a more general errors which doesn't assume that the pattern or the matched value are enums. --- core/src/error/mod.rs | 34 ++++++++++-- core/src/eval/operation.rs | 2 +- core/src/term/pattern.rs | 107 +++++++++++++++++++++++-------------- 3 files changed, 100 insertions(+), 43 deletions(-) diff --git a/core/src/error/mod.rs b/core/src/error/mod.rs index 30c0412c02..f7708547ab 100644 --- a/core/src/error/mod.rs +++ b/core/src/error/mod.rs @@ -147,8 +147,10 @@ pub enum EvalError { }, /// A non-equatable term was compared for equality. EqError { eq_pos: TermPos, term: RichTerm }, - /// A value didn't match any branch of a `match` expression at runtime. - NonExhaustiveMatch { + /// A value didn't match any branch of a `match` expression at runtime. This is a specialized + /// version of [Self::NonExhaustiveMatch] when all branches are enum patterns. In this case, + /// the error message is more informative than the generic one. + NonExhaustiveEnumMatch { /// The list of expected patterns. Currently, those are just enum tags. expected: Vec, /// The original term matched @@ -156,6 +158,12 @@ pub enum EvalError { /// The position of the `match` expression pos: TermPos, }, + NonExhaustiveMatch { + /// The original term matched. + value: RichTerm, + /// The position of the `match` expression + pos: TermPos, + }, /// Tried to query a field of something that wasn't a record. QueryNonRecord { /// Position of the original unevaluated expression. @@ -1270,7 +1278,7 @@ impl IntoDiagnostics for EvalError { .with_message("cannot compare values for equality") .with_labels(labels)] } - EvalError::NonExhaustiveMatch { + EvalError::NonExhaustiveEnumMatch { expected, found, pos, @@ -1303,6 +1311,26 @@ impl IntoDiagnostics for EvalError { "But it has been applied to an argument which doesn't match any of those patterns".to_owned(), ])] } + EvalError::NonExhaustiveMatch { value, pos } => { + let mut labels = Vec::new(); + + if let Some(span) = pos.into_opt() { + labels.push(primary(&span).with_message("in this match expression")); + } + + labels.push( + secondary_term(&value, files) + .with_message("this value doesn't match any branch"), + ); + + vec![Diagnostic::error() + .with_message("non-exhaustive pattern matching") + .with_labels(labels) + .with_notes(vec![ + format!("This match expression isn't exhaustive"), + "It has been applied to an argument which doesn't match any of the branches".to_owned(), + ])] + } EvalError::IllegalPolymorphicTailAccess { action, label: contract_label, diff --git a/core/src/eval/operation.rs b/core/src/eval/operation.rs index 10f4591b51..4a48d55de9 100644 --- a/core/src/eval/operation.rs +++ b/core/src/eval/operation.rs @@ -352,7 +352,7 @@ impl VirtualMachine { env: cases_env, }) .or(default) - .ok_or_else(|| EvalError::NonExhaustiveMatch { + .ok_or_else(|| EvalError::NonExhaustiveEnumMatch { expected: cases.keys().copied().collect(), found: RichTerm::new(Term::Enum(*en), pos), pos: pos_op_inh, diff --git a/core/src/term/pattern.rs b/core/src/term/pattern.rs index 3073396bb6..195821c00e 100644 --- a/core/src/term/pattern.rs +++ b/core/src/term/pattern.rs @@ -2,6 +2,7 @@ use std::collections::{hash_map::Entry, HashMap}; use crate::{ + error::EvalError, identifier::LocIdent, label::Label, mk_app, @@ -274,7 +275,7 @@ impl ElaborateContract for RecordPattern { pub mod compile { use super::*; use crate::{ - mk_app, mk_fun, + mk_app, term::{make, BinaryOp, MatchData, RecordExtKind, RecordOpKind, RichTerm, Term, UnaryOp}, }; @@ -447,7 +448,7 @@ pub mod compile { } impl CompilePart for EnumPattern { - fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { + fn compile_part(&self, _value_id: LocIdent, _bindings_id: LocIdent) -> RichTerm { todo!() } } @@ -455,13 +456,15 @@ pub mod compile { pub trait Compile { /// Compile a match expression to a Nickel expression with the provided `value_id` as a /// free variable (representing a placeholder for the matched expression). - fn compile(self, value_id: LocIdent) -> RichTerm; + fn compile(self, value: RichTerm, pos: TermPos) -> RichTerm; } impl Compile for MatchData { // Compilation of a full match expression (code between < and > is Rust code, think of it // as a kind of templating): // + // let value_id = value in + // // RichTerm { - self.branches.into_iter().rev().fold( - // Term::RuntimeError(NonExhaustiveMatch) - Term::RuntimeError(todo!()).into(), - |cont, (pat, body)| { - let init_bindings_id = LocIdent::fresh(); - let bindings_id = LocIdent::fresh(); - - // inner if block: - // - // if bindings_id == null then - // cont - // else - // # this primop evaluates body with an environment extended with bindings_id - // %with_env% bindings_id body - let inner = make::if_then_else( - make::op2(BinaryOp::Eq(), Term::Var(bindings_id), Term::Null), - cont, - mk_app!(make::op1(UnaryOp::WithEnv(), Term::Var(bindings_id),), body), - ); - - // The two initial chained let-bindings: - // - // let init_bindings_id = {} in - // let bindings_id = value_id init_bindings_id in - // - make::let_in( - init_bindings_id, - Term::Record(RecordData::empty()), + fn compile(self, value: RichTerm, pos: TermPos) -> RichTerm { + let default_branch = self.default.unwrap_or_else(|| { + Term::RuntimeError(EvalError::NonExhaustiveMatch { + value: value.clone(), + pos, + }) + .into() + }); + let value_id = LocIdent::fresh(); + + // The fold block: + // + // + // let init_bindings_id = {} in + // let bindings_id = value_id init_bindings_id in + // + // if bindings_id == null then + // cont + // else + // # this primop evaluates body with an environment extended with bindings_id + // %with_env% body bindings_id + let fold_block = + self.branches + .into_iter() + .rev() + .fold(default_branch, |cont, (pat, body)| { + let init_bindings_id = LocIdent::fresh(); + let bindings_id = LocIdent::fresh(); + + // inner if block: + // + // if bindings_id == null then + // cont + // else + // # this primop evaluates body with an environment extended with bindings_id + // %with_env% bindings_id body + let inner = make::if_then_else( + make::op2(BinaryOp::Eq(), Term::Var(bindings_id), Term::Null), + cont, + mk_app!(make::op1(UnaryOp::WithEnv(), Term::Var(bindings_id),), body), + ); + + // The two initial chained let-bindings: + // + // let init_bindings_id = {} in + // let bindings_id = value_id init_bindings_id in + // make::let_in( - bindings_id, - pat.compile_part(value_id, init_bindings_id), - inner, - ), - ) - }, - ) + init_bindings_id, + Term::Record(RecordData::empty()), + make::let_in( + bindings_id, + pat.compile_part(value_id, init_bindings_id), + inner, + ), + ) + }); + + // let value_id = value in + make::let_in(value_id, value, fold_block) } } } From 44fc414153dff72d4985be5eabfa6b3a67ecffe6 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Tue, 13 Feb 2024 00:04:45 +0100 Subject: [PATCH 07/14] Fix bugs in pattern compil. and associated primops --- core/src/eval/operation.rs | 13 ++++++++++++- core/src/term/pattern.rs | 28 ++++++++++++---------------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/core/src/eval/operation.rs b/core/src/eval/operation.rs index 4a48d55de9..c146c4975e 100644 --- a/core/src/eval/operation.rs +++ b/core/src/eval/operation.rs @@ -1166,6 +1166,17 @@ impl VirtualMachine { .pop_arg(&self.cache) .ok_or_else(|| EvalError::NotEnoughArgs(2, String::from("with_env"), pos_op))?; + // This is fragile...unwrapping all the closures to reach an actual term (and, more + // importantly, and environment) + loop { + match_sharedterm!(match (cont.body.term) { + Term::Closure(idx) => { + cont = self.cache.get(idx); + } + _ => break, + }); + } + match_sharedterm!(match (t) { Term::Record(data) => { for (id, field) in data.fields { @@ -1802,7 +1813,7 @@ impl VirtualMachine { Ok(Closure::atomic_closure(RichTerm::new( Term::Bool(matches!( record.fields.get(&LocIdent::from(id.into_inner())), - Some(field @ Field { value: None, ..}) if matches!(op_kind, RecordOpKind::ConsiderAllFields) || !field.is_empty_optional() + Some(field @ Field { value: Some(_), ..}) if matches!(op_kind, RecordOpKind::ConsiderAllFields) || !field.is_empty_optional() )), pos_op_inh, ))) diff --git a/core/src/term/pattern.rs b/core/src/term/pattern.rs index 195821c00e..fb0967a004 100644 --- a/core/src/term/pattern.rs +++ b/core/src/term/pattern.rs @@ -279,7 +279,7 @@ pub mod compile { term::{make, BinaryOp, MatchData, RecordExtKind, RecordOpKind, RichTerm, Term, UnaryOp}, }; - // Generate a `%record_insert% id value_id bindings_id` primop application. + // Generate a `%record_insert% "" value_id bindings_id` primop application. fn insert_binding(id: LocIdent, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { mk_app!( make::op2( @@ -289,7 +289,7 @@ pub mod compile { pending_contracts: Default::default(), op_kind: RecordOpKind::IgnoreEmptyOpt, }, - Term::Var(id), + Term::Str(id.label().into()), Term::Var(bindings_id) ), Term::Var(value_id) @@ -325,12 +325,8 @@ pub mod compile { // arg bindings fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { // The last instruction - // arg bindings - let continuation = mk_app!( - self.data.compile_part(value_id, bindings_id), - Term::Var(value_id), - Term::Var(bindings_id) - ); + // + let continuation = self.data.compile_part(value_id, bindings_id); // Either // @@ -354,7 +350,7 @@ pub mod compile { fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { match self { PatternData::Any(id) => { - // %record_insert% id value_id bindings_id + // %record_insert% "" value_id bindings_id insert_binding(*id, value_id, bindings_id) } PatternData::Record(pat) => pat.compile_part(value_id, bindings_id), @@ -375,7 +371,7 @@ pub mod compile { // if new_bindings_id == null then // null // else - // new_value_id new_bindings_id + // // else // null fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { @@ -389,7 +385,7 @@ pub mod compile { // if new_bindings_id == null then // null // else - // new_value_id new_bindings_id + // // else // null let inner: RichTerm = @@ -403,7 +399,7 @@ pub mod compile { // %field_is_defined% field value_id let has_field = make::op2( BinaryOp::FieldIsDefined(RecordOpKind::ConsiderAllFields), - Term::Var(field), + Term::Str(field.label().into()), Term::Var(value_id), ); @@ -412,7 +408,7 @@ pub mod compile { // if new_bindings_id == null then // null // else - // new_value_id new_bindings_id + // let inner_if = make::if_then_else( make::op2(BinaryOp::Eq(), Term::Var(new_bindings_id), Term::Null), Term::Null, @@ -421,7 +417,7 @@ pub mod compile { .compile_part(new_value_id, new_bindings_id), ); - // let new_bindings_id = value_id bindings_id in + // let new_bindings_id = cont in let inner_let = make::let_in(new_bindings_id, cont, inner_if); // let new_value_id = %static_access(field)% value_id in @@ -494,7 +490,7 @@ pub mod compile { // - initial accumulator is the default branch (or error if not default branch) // > // let init_bindings_id = {} in - // let bindings_id = value_id init_bindings_id in + // let bindings_id = in // // if bindings_id == null then // cont @@ -525,7 +521,7 @@ pub mod compile { // The two initial chained let-bindings: // // let init_bindings_id = {} in - // let bindings_id = value_id init_bindings_id in + // let bindings_id = in // make::let_in( init_bindings_id, From 78656c6790da5eb4834ea0aea519fb4f22649dd9 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Tue, 13 Feb 2024 12:54:04 +0100 Subject: [PATCH 08/14] Correctly handle record pattern tail The partial implementation of record pattern compilation didn't handle the record pattern tail yet: for example, a closed pattern `{foo, bar}` would match a larger record `{foo=1, bar=2, baz=3}`. This commit additionally keeps the current rest of the record being matched, that is the original record minus all the matched fields, to either check that it is empty at the end (closed pattern), or to bind this rest to an identifier if the pattern captures the rest with `{foo, bar, ..rest}`. --- core/src/term/pattern.rs | 245 ++++++++++++++++++++++++++++++++------- 1 file changed, 204 insertions(+), 41 deletions(-) diff --git a/core/src/term/pattern.rs b/core/src/term/pattern.rs index fb0967a004..1d6e9ac20d 100644 --- a/core/src/term/pattern.rs +++ b/core/src/term/pattern.rs @@ -279,16 +279,29 @@ pub mod compile { term::{make, BinaryOp, MatchData, RecordExtKind, RecordOpKind, RichTerm, Term, UnaryOp}, }; - // Generate a `%record_insert% "" value_id bindings_id` primop application. + /// The special field used to capture the rest of a record pattern. Although it is stored in + /// the bindings, it's not an actual bindings and is filtered out at the end of the + /// corresponding record pattern. + const REST_FIELD: &str = "$rest"; + + /// Generate a standard `%record_insert` primop as generated by the parser. + fn record_insert() -> BinaryOp { + BinaryOp::DynExtend { + ext_kind: RecordExtKind::WithValue, + metadata: Default::default(), + pending_contracts: Default::default(), + // We don't really care for optional fields here and we don't need to filter them out + op_kind: RecordOpKind::ConsiderAllFields, + } + } + + /// Generate a Nickel expression which inserts a new binding in the working dictionary. + /// + /// `%record_insert% "" bindings_id value_id` fn insert_binding(id: LocIdent, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { mk_app!( make::op2( - BinaryOp::DynExtend { - ext_kind: RecordExtKind::WithValue, - metadata: Default::default(), - pending_contracts: Default::default(), - op_kind: RecordOpKind::IgnoreEmptyOpt, - }, + record_insert(), Term::Str(id.label().into()), Term::Var(bindings_id) ), @@ -296,6 +309,37 @@ pub mod compile { ) } + /// Generate a Nickel expression which update the `REST_FIELD` field of the working dictionary, + /// putting `value_id` minus the current field. + /// + /// ```nickel + /// %record_insert% "" + /// (%record_remove% "" bindings_id) + /// (%record_remove% "" value_id) + /// ``` + fn update_rest(field: LocIdent, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { + let value_shrinked = make::op2( + BinaryOp::DynRemove(RecordOpKind::ConsiderAllFields), + Term::Str(field.label().into()), + Term::Var(value_id), + ); + + let bindings_shrinked = make::op2( + BinaryOp::DynRemove(RecordOpKind::ConsiderAllFields), + Term::Str(REST_FIELD.into()), + Term::Var(bindings_id), + ); + + mk_app!( + make::op2( + record_insert(), + Term::Str(REST_FIELD.into()), + bindings_shrinked, + ), + value_shrinked + ) + } + pub trait CompilePart { /// Compile part of a broader pattern to a Nickel expression with two free variables (which /// is equivalent to a function of two arguments): @@ -317,7 +361,7 @@ pub mod compile { impl CompilePart for Pattern { // Compilation of the top-level pattern wrapper (code between < and > is Rust code, think - // of it as a kind of templating): + // a template of some sort): // // < if let Some(alias) = alias { > // let bindings = %record_insert% bindings arg in @@ -360,13 +404,25 @@ pub mod compile { } impl CompilePart for RecordPattern { - // Compilation of the top-level record pattern wrapper: + // Compilation of the top-level record pattern wrapper. + // + // To check that the value doesn't contain extra fields, or to capture the rest of the + // record when using the `..rest` construct, we need to remove matched fields from the + // original value at each stage and thread this working value in addition to the bindings. + // + // We don't have tuples, and to avoid adding an indirection (by storing the current state + // as `{rest, bindings}` where bindings itself is a record), we store this rest alongside + // the bindings in a special field that can't conflict with a valid identifier (at least as + // long as we don't allow raw identifiers in patterns): `$rest`. Note that this is an + // implementation detail which isn't very hard to change, should we have to. // // if %typeof% value_id == 'Record + // let final_bindings_id = // - // if %has_field% field value_id && %is_defined% field value_id then - // let value_id = %static_access(field)% value_id in - // let new_bindings_id = cont in + // if %field_is_defined% field value_id then + // let prev_bindings_id = cont in + // let value_id = %static_access(field)% (%static_access(REST_FIELD)% prev_bindings_id) + // let new_bindings_id = in // // if new_bindings_id == null then // null @@ -374,27 +430,50 @@ pub mod compile { // // else // null + // in + // + // # if tail is empty, check that the value doesn't contain extra fields + // if final_bindings_id == null || + // (%static_access% final_bindings_id) != {} then + // null + // else + // %record_remove% "" final_bindings_id + // + // # move the rest from REST_FIELD to rest, and remove REST_FIELD + // if final_bindings_id == null then + // null + // else + // %record_remove% "" + // (%record_insert% + // final_bindings_id + // (%static_access% final_bindings_id) + // ) + // + // %record_remove% "" final_bindings_id + // + // else + // null fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { // The fold block: // // - // if %field_is_defined% field value_id then - // let new_value_id = %static_access(field)% value_id in - // let new_bindings_id = cont in + // if %field_is_defined% field value_id then + // let bindings_id = cont in + // let value_id = %static_access(field)% (%static_access(REST_FIELD)% bindings_id) + // let bindings_id = in // - // if new_bindings_id == null then - // null - // else - // - // else - // null - let inner: RichTerm = + // if new_bindings_id == null then + // null + // else + // + // + let fold_block: RichTerm = self.patterns .iter() .fold(Term::Var(bindings_id).into(), |cont, field_pat| { let field = field_pat.matched_id; - let new_bindings_id = LocIdent::fresh(); - let new_value_id = LocIdent::fresh(); + let bindings_id = LocIdent::fresh(); + let value_id = LocIdent::fresh(); // %field_is_defined% field value_id let has_field = make::op2( @@ -405,30 +484,42 @@ pub mod compile { // The innermost if: // - // if new_bindings_id == null then + // if bindings_id == null then // null // else - // + // let inner_if = make::if_then_else( - make::op2(BinaryOp::Eq(), Term::Var(new_bindings_id), Term::Null), + make::op2(BinaryOp::Eq(), Term::Var(bindings_id), Term::Null), Term::Null, - field_pat - .pattern - .compile_part(new_value_id, new_bindings_id), + field_pat.pattern.compile_part(value_id, bindings_id), ); - // let new_bindings_id = cont in - let inner_let = make::let_in(new_bindings_id, cont, inner_if); + // let bindings_id = in + // + let updated_bindings_let = make::let_in( + bindings_id, + update_rest(field, value_id, bindings_id), + inner_if, + ); - // let new_value_id = %static_access(field)% value_id in - let outer_let = make::let_in( - new_value_id, - make::op1(UnaryOp::StaticAccess(field), Term::Var(value_id)), - inner_let, + // let value_id = %static_access(field)% (%static_access(REST_FIELD)% bindings_id) + let value_let = make::let_in( + value_id, + make::op1( + UnaryOp::StaticAccess(field), + make::op1( + UnaryOp::StaticAccess(REST_FIELD.into()), + Term::Var(value_id), + ), + ), + updated_bindings_let, ); - // if then else null - make::if_then_else(has_field, outer_let, Term::Null) + // let bindings_id = cont in + let outer_bindings_let = make::let_in(bindings_id, cont, value_let); + + // if then else null + make::if_then_else(has_field, outer_bindings_let, Term::Null) }); // %typeof% value_id == 'Record @@ -438,8 +529,80 @@ pub mod compile { Term::Enum("Record".into()), ); - // if then inner else null - make::if_then_else(is_record, inner, Term::Null) + let final_bindings_id = LocIdent::fresh(); + + // %record_remove% "" final_bindings_id + let bindings_without_rest = make::op2( + BinaryOp::DynRemove(RecordOpKind::ConsiderAllFields), + Term::Str(REST_FIELD.into()), + Term::Var(final_bindings_id), + ); + + // the last block of the outer if, which depends on the tail of the record pattern + let tail_block = match self.tail { + // if final_bindings_id == null || + // (%static_access% final_bindings_id) != {} then + // null + // else + // %record_remove% "" final_bindings_id + RecordPatternTail::Empty => make::if_then_else( + mk_app!( + make::op1( + UnaryOp::BoolOr(), + make::op2(BinaryOp::Eq(), Term::Var(final_bindings_id), Term::Null) + ), + make::op1( + UnaryOp::BoolNot(), + make::op2( + BinaryOp::Eq(), + make::op1( + UnaryOp::StaticAccess(REST_FIELD.into()), + Term::Var(final_bindings_id) + ), + Term::Record(RecordData::empty()) + ) + ) + ), + Term::Null, + bindings_without_rest, + ), + // %record_remove% "" final_bindings_id + RecordPatternTail::Open => bindings_without_rest, + // if final_bindings_id == null then + // null + // else + // %record_remove% "" + // (%record_insert% + // final_bindings_id + // (%static_access% final_bindings_id) + // ) + RecordPatternTail::Capture(rest) => make::if_then_else( + make::op2(BinaryOp::Eq(), Term::Var(final_bindings_id), Term::Null), + Term::Null, + make::op2( + BinaryOp::DynRemove(RecordOpKind::ConsiderAllFields), + Term::Str(REST_FIELD.into()), + mk_app!( + make::op2( + record_insert(), + Term::Str(rest.label().into()), + Term::Var(final_bindings_id), + ), + make::op1( + UnaryOp::StaticAccess(REST_FIELD.into()), + Term::Var(final_bindings_id) + ) + ), + ), + ), + }; + + // The let enclosing the fold block and the final block: + // let final_bindings_id = in + let outer_let = make::let_in(final_bindings_id, fold_block, tail_block); + + // if then else null + make::if_then_else(is_record, outer_let, Term::Null) } } From 05a3225a8789c39041d4436036c44d6c018c8b7a Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Tue, 13 Feb 2024 16:04:32 +0100 Subject: [PATCH 09/14] Fix record pattern compilation After doing some experimentation, fix a number of bugs observed in the compilation of record patterns: wrong order of arguments, wrong identifiers used, clash of the special rest field between patterns and sub-patterns, and so on. --- core/src/term/pattern.rs | 204 ++++++++++++++++++++++----------------- 1 file changed, 114 insertions(+), 90 deletions(-) diff --git a/core/src/term/pattern.rs b/core/src/term/pattern.rs index 1d6e9ac20d..c8db8c1049 100644 --- a/core/src/term/pattern.rs +++ b/core/src/term/pattern.rs @@ -279,11 +279,6 @@ pub mod compile { term::{make, BinaryOp, MatchData, RecordExtKind, RecordOpKind, RichTerm, Term, UnaryOp}, }; - /// The special field used to capture the rest of a record pattern. Although it is stored in - /// the bindings, it's not an actual bindings and is filtered out at the end of the - /// corresponding record pattern. - const REST_FIELD: &str = "$rest"; - /// Generate a standard `%record_insert` primop as generated by the parser. fn record_insert() -> BinaryOp { BinaryOp::DynExtend { @@ -309,34 +304,41 @@ pub mod compile { ) } - /// Generate a Nickel expression which update the `REST_FIELD` field of the working dictionary, - /// putting `value_id` minus the current field. + /// Generate a Nickel expression which update the `REST_FIELD` field of the working bindings by + /// remove the `field` from it. /// /// ```nickel /// %record_insert% "" /// (%record_remove% "" bindings_id) - /// (%record_remove% "" value_id) + /// (%record_remove% "" + /// (%static_access(REST_FIELD) bindings_id) + /// ) /// ``` - fn update_rest(field: LocIdent, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { - let value_shrinked = make::op2( + fn remove_from_rest(rest_field: LocIdent, field: LocIdent, bindings_id: LocIdent) -> RichTerm { + let rest = make::op1( + UnaryOp::StaticAccess(rest_field.into()), + Term::Var(bindings_id), + ); + + let rest_shrinked = make::op2( BinaryOp::DynRemove(RecordOpKind::ConsiderAllFields), Term::Str(field.label().into()), - Term::Var(value_id), + rest, ); let bindings_shrinked = make::op2( BinaryOp::DynRemove(RecordOpKind::ConsiderAllFields), - Term::Str(REST_FIELD.into()), + Term::Str(rest_field.into()), Term::Var(bindings_id), ); mk_app!( make::op2( record_insert(), - Term::Str(REST_FIELD.into()), + Term::Str(rest_field.into()), bindings_shrinked, ), - value_shrinked + rest_shrinked ) } @@ -412,32 +414,38 @@ pub mod compile { // // We don't have tuples, and to avoid adding an indirection (by storing the current state // as `{rest, bindings}` where bindings itself is a record), we store this rest alongside - // the bindings in a special field that can't conflict with a valid identifier (at least as - // long as we don't allow raw identifiers in patterns): `$rest`. Note that this is an + // the bindings in a special field which is a freshly generated indentifier. This is an // implementation detail which isn't very hard to change, should we have to. // // if %typeof% value_id == 'Record // let final_bindings_id = - // + // + // " bindings_id value_id` + // > // if %field_is_defined% field value_id then - // let prev_bindings_id = cont in - // let value_id = %static_access(field)% (%static_access(REST_FIELD)% prev_bindings_id) - // let new_bindings_id = in + // let local_bindings_id = cont in // - // if new_bindings_id == null then + // if local_bindings_id == null then // null // else - // + // let local_value_id = %static_access(field)% (%static_access(REST_FIELD)% local_bindings_id) + // let local_bindings_id = in + // // else // null + // + // // in + // // - // # if tail is empty, check that the value doesn't contain extra fields - // if final_bindings_id == null || - // (%static_access% final_bindings_id) != {} then - // null - // else - // %record_remove% "" final_bindings_id + // # if tail is empty, check that the value doesn't contain extra fields + // if final_bindings_id == null || + // (%static_access% final_bindings_id) != {} then + // null + // else + // %record_remove% "" final_bindings_id // // # move the rest from REST_FIELD to rest, and remove REST_FIELD // if final_bindings_id == null then @@ -454,73 +462,89 @@ pub mod compile { // else // null fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { + let rest_field = LocIdent::fresh(); + + // `%record_insert% "" bindings_id value_id` + let init_bindings = mk_app!( + make::op2( + record_insert(), + Term::Str(rest_field.into()), + Term::Var(bindings_id) + ), + Term::Var(value_id) + ); + // The fold block: // - // + // " bindings_id value_id` + // > + // + // // if %field_is_defined% field value_id then - // let bindings_id = cont in - // let value_id = %static_access(field)% (%static_access(REST_FIELD)% bindings_id) - // let bindings_id = in + // let local_bindings_id = cont in // - // if new_bindings_id == null then + // if local_bindings_id == null then // null // else - // - // + // let local_value_id = %static_access(field)% (%static_access(REST_FIELD)% local_bindings_id) + // let local_bindings_id = in + // let fold_block: RichTerm = - self.patterns - .iter() - .fold(Term::Var(bindings_id).into(), |cont, field_pat| { - let field = field_pat.matched_id; - let bindings_id = LocIdent::fresh(); - let value_id = LocIdent::fresh(); - - // %field_is_defined% field value_id - let has_field = make::op2( - BinaryOp::FieldIsDefined(RecordOpKind::ConsiderAllFields), - Term::Str(field.label().into()), - Term::Var(value_id), - ); - - // The innermost if: - // - // if bindings_id == null then - // null - // else - // - let inner_if = make::if_then_else( - make::op2(BinaryOp::Eq(), Term::Var(bindings_id), Term::Null), - Term::Null, - field_pat.pattern.compile_part(value_id, bindings_id), - ); - - // let bindings_id = in - // - let updated_bindings_let = make::let_in( - bindings_id, - update_rest(field, value_id, bindings_id), - inner_if, - ); - - // let value_id = %static_access(field)% (%static_access(REST_FIELD)% bindings_id) - let value_let = make::let_in( - value_id, + self.patterns.iter().fold(init_bindings, |cont, field_pat| { + let field = field_pat.matched_id; + let local_bindings_id = LocIdent::fresh(); + let local_value_id = LocIdent::fresh(); + + // let bindings_id = in + // + let updated_bindings_let = make::let_in( + local_bindings_id, + remove_from_rest(rest_field, field, local_bindings_id), + field_pat + .pattern + .compile_part(local_value_id, local_bindings_id), + ); + + // let value_id = %static_access(field)% (%static_access(REST_FIELD)% local_bindings_id) + // in + let inner_else_block = make::let_in( + local_value_id, + make::op1( + UnaryOp::StaticAccess(field), make::op1( - UnaryOp::StaticAccess(field), - make::op1( - UnaryOp::StaticAccess(REST_FIELD.into()), - Term::Var(value_id), - ), + UnaryOp::StaticAccess(rest_field.into()), + Term::Var(local_bindings_id), ), - updated_bindings_let, - ); - - // let bindings_id = cont in - let outer_bindings_let = make::let_in(bindings_id, cont, value_let); - - // if then else null - make::if_then_else(has_field, outer_bindings_let, Term::Null) - }); + ), + updated_bindings_let, + ); + + // The innermost if: + // + // if local_bindings_id == null then + // null + // else + // + let inner_if = make::if_then_else( + make::op2(BinaryOp::Eq(), Term::Var(local_bindings_id), Term::Null), + Term::Null, + inner_else_block, + ); + + // let local_bindings_id = cont in + let binding_cont_let = make::let_in(local_bindings_id, cont, inner_if); + + // %field_is_defined% field value_id + let has_field = make::op2( + BinaryOp::FieldIsDefined(RecordOpKind::ConsiderAllFields), + Term::Str(field.label().into()), + Term::Var(value_id), + ); + + make::if_then_else(has_field, binding_cont_let, Term::Null) + }); // %typeof% value_id == 'Record let is_record: RichTerm = make::op2( @@ -534,7 +558,7 @@ pub mod compile { // %record_remove% "" final_bindings_id let bindings_without_rest = make::op2( BinaryOp::DynRemove(RecordOpKind::ConsiderAllFields), - Term::Str(REST_FIELD.into()), + Term::Str(rest_field.into()), Term::Var(final_bindings_id), ); @@ -556,7 +580,7 @@ pub mod compile { make::op2( BinaryOp::Eq(), make::op1( - UnaryOp::StaticAccess(REST_FIELD.into()), + UnaryOp::StaticAccess(rest_field.into()), Term::Var(final_bindings_id) ), Term::Record(RecordData::empty()) @@ -581,7 +605,7 @@ pub mod compile { Term::Null, make::op2( BinaryOp::DynRemove(RecordOpKind::ConsiderAllFields), - Term::Str(REST_FIELD.into()), + Term::Str(rest_field.into()), mk_app!( make::op2( record_insert(), @@ -589,7 +613,7 @@ pub mod compile { Term::Var(final_bindings_id), ), make::op1( - UnaryOp::StaticAccess(REST_FIELD.into()), + UnaryOp::StaticAccess(rest_field.into()), Term::Var(final_bindings_id) ) ), From 5a93029309859bd6abb3482c73693bd21ceb87b1 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Tue, 13 Feb 2024 16:48:36 +0100 Subject: [PATCH 10/14] with_env -> pattern_branch Rename the `with_env` primop to `pattern_branch` in order to avoid confusion about its scope and usage: this shouldn't be used as a general environment augmentation operation, although it kinda is, because of existing footguns. --- core/src/eval/operation.rs | 2 +- core/src/parser/grammar.lalrpop | 2 +- core/src/parser/lexer.rs | 4 ++-- core/src/term/mod.rs | 21 +++++++++++++-------- core/src/term/pattern.rs | 11 +++++++---- core/src/typecheck/operation.rs | 2 +- 6 files changed, 25 insertions(+), 17 deletions(-) diff --git a/core/src/eval/operation.rs b/core/src/eval/operation.rs index c146c4975e..56279c4bfd 100644 --- a/core/src/eval/operation.rs +++ b/core/src/eval/operation.rs @@ -1159,7 +1159,7 @@ impl VirtualMachine { pos_op_inh, ))) } - UnaryOp::WithEnv() => { + UnaryOp::PatternBranch() => { // The continuation, that we must evaluate in the augmented environment. let (mut cont, _) = self .stack diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index dd0239b5d1..8f793c7f6c 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -1282,7 +1282,7 @@ extern { "enum_unwrap_variant" => Token::Normal(NormalToken::EnumUnwrapVariant), "enum_is_variant" => Token::Normal(NormalToken::EnumIsVariant), "enum_get_tag" => Token::Normal(NormalToken::EnumGetTag), - "with_env" => Token::Normal(NormalToken::WithEnv), + "pattern_branch" => Token::Normal(NormalToken::PatternBranch), "{" => Token::Normal(NormalToken::LBrace), "}" => Token::Normal(NormalToken::RBrace), diff --git a/core/src/parser/lexer.rs b/core/src/parser/lexer.rs index 7c78bc295d..af11fa1c2a 100644 --- a/core/src/parser/lexer.rs +++ b/core/src/parser/lexer.rs @@ -346,8 +346,8 @@ pub enum NormalToken<'input> { #[token("%eval_nix%")] EvalNix, - #[token("%with_env%")] - WithEnv, + #[token("%pattern_branch%")] + PatternBranch, #[token("{")] LBrace, diff --git a/core/src/term/mod.rs b/core/src/term/mod.rs index 42bdc669ac..a2ea75011e 100644 --- a/core/src/term/mod.rs +++ b/core/src/term/mod.rs @@ -1380,14 +1380,19 @@ pub enum UnaryOp { /// Extract the tag from an enum tag or an enum variant. EnumGetTag(), - /// Take a record representing bindings to be added to the local environment, and proceed to - /// run the second argument (which isn't a proper primop argument but is stored on the stack) - /// in the environment augmented with the bindings. + /// Take a record representing bindings to be added to the local environment and proceed to + /// evaluate a pattern branch given as a second argument (which isn't a proper primop argument + /// but is stored on the stack) in its environment augmented with the bindings. /// - /// [Self::WithEnv] is currently used to implement pattern matching, where patterns are - /// compiled to a code that produces the bindings, and then the body of a matching branch need - /// to be evaluated with those new bindings available. - WithEnv(), + /// [Self::PatternBranch()] isn't specific to pattern branches: what it does is to take a set + /// of extra bindings and a term, and run the term in the augmented environment. While it could + /// useful to implement other operations, it would be fragile as a generic `with_env` operator, + /// because the term to be run must not be burried into a closure, or the environment + /// augmentation would be shallow and have no effect on the actual content of the term (we have + /// the same kind of constraints when updating record fields with the recursive environment of + /// a record, for example). This is why the name tries to make it clear that it shouldn't be + /// used blindly for something else. + PatternBranch(), } impl fmt::Display for UnaryOp { @@ -1444,7 +1449,7 @@ impl fmt::Display for UnaryOp { EnumIsVariant() => write!(f, "enum_is_variant"), EnumGetTag() => write!(f, "enum_get_tag"), - WithEnv() => write!(f, "with_env"), + PatternBranch() => write!(f, "with_env"), } } } diff --git a/core/src/term/pattern.rs b/core/src/term/pattern.rs index c8db8c1049..02da17ef18 100644 --- a/core/src/term/pattern.rs +++ b/core/src/term/pattern.rs @@ -659,7 +659,7 @@ pub mod compile { // cont // else // # this primop evaluates body with an environment extended with bindings_id - // %with_env% body bindings_id + // %pattern_branch% body bindings_id fn compile(self, value: RichTerm, pos: TermPos) -> RichTerm { let default_branch = self.default.unwrap_or_else(|| { Term::RuntimeError(EvalError::NonExhaustiveMatch { @@ -683,7 +683,7 @@ pub mod compile { // cont // else // # this primop evaluates body with an environment extended with bindings_id - // %with_env% body bindings_id + // %pattern_branch% body bindings_id let fold_block = self.branches .into_iter() @@ -698,11 +698,14 @@ pub mod compile { // cont // else // # this primop evaluates body with an environment extended with bindings_id - // %with_env% bindings_id body + // %pattern_branch% bindings_id body let inner = make::if_then_else( make::op2(BinaryOp::Eq(), Term::Var(bindings_id), Term::Null), cont, - mk_app!(make::op1(UnaryOp::WithEnv(), Term::Var(bindings_id),), body), + mk_app!( + make::op1(UnaryOp::PatternBranch(), Term::Var(bindings_id),), + body + ), ); // The two initial chained let-bindings: diff --git a/core/src/typecheck/operation.rs b/core/src/typecheck/operation.rs index b5199db614..a953f22f00 100644 --- a/core/src/typecheck/operation.rs +++ b/core/src/typecheck/operation.rs @@ -224,7 +224,7 @@ pub fn get_uop_type( // second argument can't be properly typechecked: it has unbound variables. However, it's // not hard to come up with a vague working type for it, so we do. // forall a. {_ : a} -> Dyn -> Dyn - UnaryOp::WithEnv() => { + UnaryOp::PatternBranch() => { let ty_elt = state.table.fresh_type_uvar(var_level); ( mk_uniftype::dict(ty_elt), From f9f17a4d40b92a2203833ce901bc5a9f9c286e78 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Tue, 13 Feb 2024 16:52:13 +0100 Subject: [PATCH 11/14] pattern.rs -> pattern/mod.rs --- core/src/term/{pattern.rs => pattern/mod.rs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename core/src/term/{pattern.rs => pattern/mod.rs} (100%) diff --git a/core/src/term/pattern.rs b/core/src/term/pattern/mod.rs similarity index 100% rename from core/src/term/pattern.rs rename to core/src/term/pattern/mod.rs From 8900d09e7a3baede675c7434bd4c0cc83159c716 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Tue, 13 Feb 2024 17:07:07 +0100 Subject: [PATCH 12/14] split compile mod from pattern mod in its own file --- core/src/term/pattern/compile.rs | 478 +++++++++++++++++++++++++++++++ core/src/term/pattern/mod.rs | 461 +---------------------------- 2 files changed, 480 insertions(+), 459 deletions(-) create mode 100644 core/src/term/pattern/compile.rs diff --git a/core/src/term/pattern/compile.rs b/core/src/term/pattern/compile.rs new file mode 100644 index 0000000000..1f0b80eaee --- /dev/null +++ b/core/src/term/pattern/compile.rs @@ -0,0 +1,478 @@ +//! Compilation of pattern matching down to pattern-less Nickel code. +//! +//! # Algorithm +//! +//! Compiling patterns amounts to generate a decision tree - concretely, a term composed mostly of +//! nested if-then-else - which either succeeds to match a value and returns the bindings of +//! pattern variables, or fails and returns `null`. +//! +//! Compilation of pattern matching is a well-studied problem in the literature, where efficient +//! algorithms try to avoid the duplication of checks by "grouping" them in a smart way. A standard +//! resource on this topic is the paper [_Compiling Pattern Matching to Good Decision +//! Trees_](https://dl.acm.org/doi/10.1145/1411304.1411311) by Luc Maranget. +//! +//! The current version of pattern compilation in Nickel is naive: it simply compiles each pattern +//! to a checking expression and tries them all until one works. We don't expect pattern matching +//! to be relevant for performance anytime soon (allegedly, there are much more impacting aspects +//! to handle before that). We might revisit this in the future if pattern matching turns out to be +//! a bottleneck. +//! +//! Most build blocks are generated programmatically rather than written out as e.g. members of the +//! [internals][crate::stdlib::internals] stdlib module. While clunkier, this lets more easily +//! change the compilation strategy in the future and is already a more efficient in the current +//! setting (combining building blocks from the standard library would require much more function +//! applications, while we can generate inlined versions on-the-fly here). +use super::*; +use crate::{ + mk_app, + term::{make, BinaryOp, MatchData, RecordExtKind, RecordOpKind, RichTerm, Term, UnaryOp}, +}; + +/// Generate a standard `%record_insert` primop as generated by the parser. +fn record_insert() -> BinaryOp { + BinaryOp::DynExtend { + ext_kind: RecordExtKind::WithValue, + metadata: Default::default(), + pending_contracts: Default::default(), + // We don't really care for optional fields here and we don't need to filter them out + op_kind: RecordOpKind::ConsiderAllFields, + } +} + +/// Generate a Nickel expression which inserts a new binding in the working dictionary. +/// +/// `%record_insert% "" bindings_id value_id` +fn insert_binding(id: LocIdent, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { + mk_app!( + make::op2( + record_insert(), + Term::Str(id.label().into()), + Term::Var(bindings_id) + ), + Term::Var(value_id) + ) +} + +/// Generate a Nickel expression which update the `REST_FIELD` field of the working bindings by +/// remove the `field` from it. +/// +/// ```nickel +/// %record_insert% "" +/// (%record_remove% "" bindings_id) +/// (%record_remove% "" +/// (%static_access(REST_FIELD) bindings_id) +/// ) +/// ``` +fn remove_from_rest(rest_field: LocIdent, field: LocIdent, bindings_id: LocIdent) -> RichTerm { + let rest = make::op1( + UnaryOp::StaticAccess(rest_field.into()), + Term::Var(bindings_id), + ); + + let rest_shrinked = make::op2( + BinaryOp::DynRemove(RecordOpKind::ConsiderAllFields), + Term::Str(field.label().into()), + rest, + ); + + let bindings_shrinked = make::op2( + BinaryOp::DynRemove(RecordOpKind::ConsiderAllFields), + Term::Str(rest_field.into()), + Term::Var(bindings_id), + ); + + mk_app!( + make::op2( + record_insert(), + Term::Str(rest_field.into()), + bindings_shrinked, + ), + rest_shrinked + ) +} + +pub trait CompilePart { + /// Compile part of a broader pattern to a Nickel expression with two free variables (which + /// is equivalent to a function of two arguments): + /// + /// 1. The value being matched on (`value_id`) + /// 2. A dictionary of the current assignment of pattern variables to sub-expressions of the + /// matched expression + /// + /// The compiled expression must return either `null` if the pattern doesn't match, or a + /// dictionary mapping pattern variables to the corresponding sub-expressions of the + /// matched value if the pattern matched with success. + /// + /// Although the `value` and `bindings` could be passed as [crate::term::RichTerm] in all + /// generality, forcing them to be variable makes it less likely that the compilation + /// duplicates sub-expressions: because the value and the bindings must always be passed in + /// a variable, they are free to share without risk of duplicating work. + fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm; +} + +impl CompilePart for Pattern { + // Compilation of the top-level pattern wrapper (code between < and > is Rust code, think + // a template of some sort): + // + // < if let Some(alias) = alias { > + // let bindings = %record_insert% bindings arg in + // < } > + // arg bindings + fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { + // The last instruction + // + let continuation = self.data.compile_part(value_id, bindings_id); + + // Either + // + // let bindings = %record_insert% bindings arg in + // continuation + // + // if `alias` is set, or just `continuation` otherwise. + if let Some(alias) = self.alias { + make::let_in( + bindings_id, + insert_binding(alias, value_id, bindings_id), + continuation, + ) + } else { + continuation + } + } +} + +impl CompilePart for PatternData { + fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { + match self { + PatternData::Any(id) => { + // %record_insert% "" value_id bindings_id + insert_binding(*id, value_id, bindings_id) + } + PatternData::Record(pat) => pat.compile_part(value_id, bindings_id), + PatternData::Enum(pat) => pat.compile_part(value_id, bindings_id), + } + } +} + +impl CompilePart for RecordPattern { + // Compilation of the top-level record pattern wrapper. + // + // To check that the value doesn't contain extra fields, or to capture the rest of the + // record when using the `..rest` construct, we need to remove matched fields from the + // original value at each stage and thread this working value in addition to the bindings. + // + // We don't have tuples, and to avoid adding an indirection (by storing the current state + // as `{rest, bindings}` where bindings itself is a record), we store this rest alongside + // the bindings in a special field which is a freshly generated indentifier. This is an + // implementation detail which isn't very hard to change, should we have to. + // + // if %typeof% value_id == 'Record + // let final_bindings_id = + // + // " bindings_id value_id` + // > + // if %field_is_defined% field value_id then + // let local_bindings_id = cont in + // + // if local_bindings_id == null then + // null + // else + // let local_value_id = %static_access(field)% (%static_access(REST_FIELD)% local_bindings_id) + // let local_bindings_id = in + // + // else + // null + // + // + // in + // + // + // # if tail is empty, check that the value doesn't contain extra fields + // if final_bindings_id == null || + // (%static_access% final_bindings_id) != {} then + // null + // else + // %record_remove% "" final_bindings_id + // + // # move the rest from REST_FIELD to rest, and remove REST_FIELD + // if final_bindings_id == null then + // null + // else + // %record_remove% "" + // (%record_insert% + // final_bindings_id + // (%static_access% final_bindings_id) + // ) + // + // %record_remove% "" final_bindings_id + // + // else + // null + fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { + let rest_field = LocIdent::fresh(); + + // `%record_insert% "" bindings_id value_id` + let init_bindings = mk_app!( + make::op2( + record_insert(), + Term::Str(rest_field.into()), + Term::Var(bindings_id) + ), + Term::Var(value_id) + ); + + // The fold block: + // + // " bindings_id value_id` + // > + // + // + // if %field_is_defined% field value_id then + // let local_bindings_id = cont in + // + // if local_bindings_id == null then + // null + // else + // let local_value_id = %static_access(field)% (%static_access(REST_FIELD)% local_bindings_id) + // let local_bindings_id = in + // + let fold_block: RichTerm = self.patterns.iter().fold(init_bindings, |cont, field_pat| { + let field = field_pat.matched_id; + let local_bindings_id = LocIdent::fresh(); + let local_value_id = LocIdent::fresh(); + + // let bindings_id = in + // + let updated_bindings_let = make::let_in( + local_bindings_id, + remove_from_rest(rest_field, field, local_bindings_id), + field_pat + .pattern + .compile_part(local_value_id, local_bindings_id), + ); + + // let value_id = %static_access(field)% (%static_access(REST_FIELD)% local_bindings_id) + // in + let inner_else_block = make::let_in( + local_value_id, + make::op1( + UnaryOp::StaticAccess(field), + make::op1( + UnaryOp::StaticAccess(rest_field.into()), + Term::Var(local_bindings_id), + ), + ), + updated_bindings_let, + ); + + // The innermost if: + // + // if local_bindings_id == null then + // null + // else + // + let inner_if = make::if_then_else( + make::op2(BinaryOp::Eq(), Term::Var(local_bindings_id), Term::Null), + Term::Null, + inner_else_block, + ); + + // let local_bindings_id = cont in + let binding_cont_let = make::let_in(local_bindings_id, cont, inner_if); + + // %field_is_defined% field value_id + let has_field = make::op2( + BinaryOp::FieldIsDefined(RecordOpKind::ConsiderAllFields), + Term::Str(field.label().into()), + Term::Var(value_id), + ); + + make::if_then_else(has_field, binding_cont_let, Term::Null) + }); + + // %typeof% value_id == 'Record + let is_record: RichTerm = make::op2( + BinaryOp::Eq(), + make::op1(UnaryOp::Typeof(), Term::Var(value_id)), + Term::Enum("Record".into()), + ); + + let final_bindings_id = LocIdent::fresh(); + + // %record_remove% "" final_bindings_id + let bindings_without_rest = make::op2( + BinaryOp::DynRemove(RecordOpKind::ConsiderAllFields), + Term::Str(rest_field.into()), + Term::Var(final_bindings_id), + ); + + // the last block of the outer if, which depends on the tail of the record pattern + let tail_block = match self.tail { + // if final_bindings_id == null || + // (%static_access% final_bindings_id) != {} then + // null + // else + // %record_remove% "" final_bindings_id + RecordPatternTail::Empty => make::if_then_else( + mk_app!( + make::op1( + UnaryOp::BoolOr(), + make::op2(BinaryOp::Eq(), Term::Var(final_bindings_id), Term::Null) + ), + make::op1( + UnaryOp::BoolNot(), + make::op2( + BinaryOp::Eq(), + make::op1( + UnaryOp::StaticAccess(rest_field.into()), + Term::Var(final_bindings_id) + ), + Term::Record(RecordData::empty()) + ) + ) + ), + Term::Null, + bindings_without_rest, + ), + // %record_remove% "" final_bindings_id + RecordPatternTail::Open => bindings_without_rest, + // if final_bindings_id == null then + // null + // else + // %record_remove% "" + // (%record_insert% + // final_bindings_id + // (%static_access% final_bindings_id) + // ) + RecordPatternTail::Capture(rest) => make::if_then_else( + make::op2(BinaryOp::Eq(), Term::Var(final_bindings_id), Term::Null), + Term::Null, + make::op2( + BinaryOp::DynRemove(RecordOpKind::ConsiderAllFields), + Term::Str(rest_field.into()), + mk_app!( + make::op2( + record_insert(), + Term::Str(rest.label().into()), + Term::Var(final_bindings_id), + ), + make::op1( + UnaryOp::StaticAccess(rest_field.into()), + Term::Var(final_bindings_id) + ) + ), + ), + ), + }; + + // The let enclosing the fold block and the final block: + // let final_bindings_id = in + let outer_let = make::let_in(final_bindings_id, fold_block, tail_block); + + // if then else null + make::if_then_else(is_record, outer_let, Term::Null) + } +} + +impl CompilePart for EnumPattern { + fn compile_part(&self, _value_id: LocIdent, _bindings_id: LocIdent) -> RichTerm { + todo!() + } +} + +pub trait Compile { + /// Compile a match expression to a Nickel expression with the provided `value_id` as a + /// free variable (representing a placeholder for the matched expression). + fn compile(self, value: RichTerm, pos: TermPos) -> RichTerm; +} + +impl Compile for MatchData { + // Compilation of a full match expression (code between < and > is Rust code, think of it + // as a kind of templating): + // + // let value_id = value in + // + // + // let init_bindings_id = {} in + // let bindings_id = value_id init_bindings_id in + // + // if bindings_id == null then + // cont + // else + // # this primop evaluates body with an environment extended with bindings_id + // %pattern_branch% body bindings_id + fn compile(self, value: RichTerm, pos: TermPos) -> RichTerm { + let default_branch = self.default.unwrap_or_else(|| { + Term::RuntimeError(EvalError::NonExhaustiveMatch { + value: value.clone(), + pos, + }) + .into() + }); + let value_id = LocIdent::fresh(); + + // The fold block: + // + // + // let init_bindings_id = {} in + // let bindings_id = in + // + // if bindings_id == null then + // cont + // else + // # this primop evaluates body with an environment extended with bindings_id + // %pattern_branch% body bindings_id + let fold_block = + self.branches + .into_iter() + .rev() + .fold(default_branch, |cont, (pat, body)| { + let init_bindings_id = LocIdent::fresh(); + let bindings_id = LocIdent::fresh(); + + // inner if block: + // + // if bindings_id == null then + // cont + // else + // # this primop evaluates body with an environment extended with bindings_id + // %pattern_branch% bindings_id body + let inner = make::if_then_else( + make::op2(BinaryOp::Eq(), Term::Var(bindings_id), Term::Null), + cont, + mk_app!( + make::op1(UnaryOp::PatternBranch(), Term::Var(bindings_id),), + body + ), + ); + + // The two initial chained let-bindings: + // + // let init_bindings_id = {} in + // let bindings_id = in + // + make::let_in( + init_bindings_id, + Term::Record(RecordData::empty()), + make::let_in( + bindings_id, + pat.compile_part(value_id, init_bindings_id), + inner, + ), + ) + }); + + // let value_id = value in + make::let_in(value_id, value, fold_block) + } +} diff --git a/core/src/term/pattern/mod.rs b/core/src/term/pattern/mod.rs index 02da17ef18..b67f37f724 100644 --- a/core/src/term/pattern/mod.rs +++ b/core/src/term/pattern/mod.rs @@ -16,6 +16,8 @@ use crate::{ typ::{Type, TypeF}, }; +pub mod compile; + #[derive(Debug, PartialEq, Clone)] pub enum PatternData { /// A simple pattern consisting of an identifier. Match anything and bind the result to the @@ -270,462 +272,3 @@ impl ElaborateContract for RecordPattern { }) } } - -/// Compilation of pattern matching down to pattern-less Nickel code. -pub mod compile { - use super::*; - use crate::{ - mk_app, - term::{make, BinaryOp, MatchData, RecordExtKind, RecordOpKind, RichTerm, Term, UnaryOp}, - }; - - /// Generate a standard `%record_insert` primop as generated by the parser. - fn record_insert() -> BinaryOp { - BinaryOp::DynExtend { - ext_kind: RecordExtKind::WithValue, - metadata: Default::default(), - pending_contracts: Default::default(), - // We don't really care for optional fields here and we don't need to filter them out - op_kind: RecordOpKind::ConsiderAllFields, - } - } - - /// Generate a Nickel expression which inserts a new binding in the working dictionary. - /// - /// `%record_insert% "" bindings_id value_id` - fn insert_binding(id: LocIdent, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { - mk_app!( - make::op2( - record_insert(), - Term::Str(id.label().into()), - Term::Var(bindings_id) - ), - Term::Var(value_id) - ) - } - - /// Generate a Nickel expression which update the `REST_FIELD` field of the working bindings by - /// remove the `field` from it. - /// - /// ```nickel - /// %record_insert% "" - /// (%record_remove% "" bindings_id) - /// (%record_remove% "" - /// (%static_access(REST_FIELD) bindings_id) - /// ) - /// ``` - fn remove_from_rest(rest_field: LocIdent, field: LocIdent, bindings_id: LocIdent) -> RichTerm { - let rest = make::op1( - UnaryOp::StaticAccess(rest_field.into()), - Term::Var(bindings_id), - ); - - let rest_shrinked = make::op2( - BinaryOp::DynRemove(RecordOpKind::ConsiderAllFields), - Term::Str(field.label().into()), - rest, - ); - - let bindings_shrinked = make::op2( - BinaryOp::DynRemove(RecordOpKind::ConsiderAllFields), - Term::Str(rest_field.into()), - Term::Var(bindings_id), - ); - - mk_app!( - make::op2( - record_insert(), - Term::Str(rest_field.into()), - bindings_shrinked, - ), - rest_shrinked - ) - } - - pub trait CompilePart { - /// Compile part of a broader pattern to a Nickel expression with two free variables (which - /// is equivalent to a function of two arguments): - /// - /// 1. The value being matched on (`value_id`) - /// 2. A dictionary of the current assignment of pattern variables to sub-expressions of the - /// matched expression - /// - /// The compiled expression must return either `null` if the pattern doesn't match, or a - /// dictionary mapping pattern variables to the corresponding sub-expressions of the - /// matched value if the pattern matched with success. - /// - /// Although the `value` and `bindings` could be passed as [crate::term::RichTerm] in all - /// generality, forcing them to be variable makes it less likely that the compilation - /// duplicates sub-expressions: because the value and the bindings must always be passed in - /// a variable, they are free to share without risk of duplicating work. - fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm; - } - - impl CompilePart for Pattern { - // Compilation of the top-level pattern wrapper (code between < and > is Rust code, think - // a template of some sort): - // - // < if let Some(alias) = alias { > - // let bindings = %record_insert% bindings arg in - // < } > - // arg bindings - fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { - // The last instruction - // - let continuation = self.data.compile_part(value_id, bindings_id); - - // Either - // - // let bindings = %record_insert% bindings arg in - // continuation - // - // if `alias` is set, or just `continuation` otherwise. - if let Some(alias) = self.alias { - make::let_in( - bindings_id, - insert_binding(alias, value_id, bindings_id), - continuation, - ) - } else { - continuation - } - } - } - - impl CompilePart for PatternData { - fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { - match self { - PatternData::Any(id) => { - // %record_insert% "" value_id bindings_id - insert_binding(*id, value_id, bindings_id) - } - PatternData::Record(pat) => pat.compile_part(value_id, bindings_id), - PatternData::Enum(pat) => pat.compile_part(value_id, bindings_id), - } - } - } - - impl CompilePart for RecordPattern { - // Compilation of the top-level record pattern wrapper. - // - // To check that the value doesn't contain extra fields, or to capture the rest of the - // record when using the `..rest` construct, we need to remove matched fields from the - // original value at each stage and thread this working value in addition to the bindings. - // - // We don't have tuples, and to avoid adding an indirection (by storing the current state - // as `{rest, bindings}` where bindings itself is a record), we store this rest alongside - // the bindings in a special field which is a freshly generated indentifier. This is an - // implementation detail which isn't very hard to change, should we have to. - // - // if %typeof% value_id == 'Record - // let final_bindings_id = - // - // " bindings_id value_id` - // > - // if %field_is_defined% field value_id then - // let local_bindings_id = cont in - // - // if local_bindings_id == null then - // null - // else - // let local_value_id = %static_access(field)% (%static_access(REST_FIELD)% local_bindings_id) - // let local_bindings_id = in - // - // else - // null - // - // - // in - // - // - // # if tail is empty, check that the value doesn't contain extra fields - // if final_bindings_id == null || - // (%static_access% final_bindings_id) != {} then - // null - // else - // %record_remove% "" final_bindings_id - // - // # move the rest from REST_FIELD to rest, and remove REST_FIELD - // if final_bindings_id == null then - // null - // else - // %record_remove% "" - // (%record_insert% - // final_bindings_id - // (%static_access% final_bindings_id) - // ) - // - // %record_remove% "" final_bindings_id - // - // else - // null - fn compile_part(&self, value_id: LocIdent, bindings_id: LocIdent) -> RichTerm { - let rest_field = LocIdent::fresh(); - - // `%record_insert% "" bindings_id value_id` - let init_bindings = mk_app!( - make::op2( - record_insert(), - Term::Str(rest_field.into()), - Term::Var(bindings_id) - ), - Term::Var(value_id) - ); - - // The fold block: - // - // " bindings_id value_id` - // > - // - // - // if %field_is_defined% field value_id then - // let local_bindings_id = cont in - // - // if local_bindings_id == null then - // null - // else - // let local_value_id = %static_access(field)% (%static_access(REST_FIELD)% local_bindings_id) - // let local_bindings_id = in - // - let fold_block: RichTerm = - self.patterns.iter().fold(init_bindings, |cont, field_pat| { - let field = field_pat.matched_id; - let local_bindings_id = LocIdent::fresh(); - let local_value_id = LocIdent::fresh(); - - // let bindings_id = in - // - let updated_bindings_let = make::let_in( - local_bindings_id, - remove_from_rest(rest_field, field, local_bindings_id), - field_pat - .pattern - .compile_part(local_value_id, local_bindings_id), - ); - - // let value_id = %static_access(field)% (%static_access(REST_FIELD)% local_bindings_id) - // in - let inner_else_block = make::let_in( - local_value_id, - make::op1( - UnaryOp::StaticAccess(field), - make::op1( - UnaryOp::StaticAccess(rest_field.into()), - Term::Var(local_bindings_id), - ), - ), - updated_bindings_let, - ); - - // The innermost if: - // - // if local_bindings_id == null then - // null - // else - // - let inner_if = make::if_then_else( - make::op2(BinaryOp::Eq(), Term::Var(local_bindings_id), Term::Null), - Term::Null, - inner_else_block, - ); - - // let local_bindings_id = cont in - let binding_cont_let = make::let_in(local_bindings_id, cont, inner_if); - - // %field_is_defined% field value_id - let has_field = make::op2( - BinaryOp::FieldIsDefined(RecordOpKind::ConsiderAllFields), - Term::Str(field.label().into()), - Term::Var(value_id), - ); - - make::if_then_else(has_field, binding_cont_let, Term::Null) - }); - - // %typeof% value_id == 'Record - let is_record: RichTerm = make::op2( - BinaryOp::Eq(), - make::op1(UnaryOp::Typeof(), Term::Var(value_id)), - Term::Enum("Record".into()), - ); - - let final_bindings_id = LocIdent::fresh(); - - // %record_remove% "" final_bindings_id - let bindings_without_rest = make::op2( - BinaryOp::DynRemove(RecordOpKind::ConsiderAllFields), - Term::Str(rest_field.into()), - Term::Var(final_bindings_id), - ); - - // the last block of the outer if, which depends on the tail of the record pattern - let tail_block = match self.tail { - // if final_bindings_id == null || - // (%static_access% final_bindings_id) != {} then - // null - // else - // %record_remove% "" final_bindings_id - RecordPatternTail::Empty => make::if_then_else( - mk_app!( - make::op1( - UnaryOp::BoolOr(), - make::op2(BinaryOp::Eq(), Term::Var(final_bindings_id), Term::Null) - ), - make::op1( - UnaryOp::BoolNot(), - make::op2( - BinaryOp::Eq(), - make::op1( - UnaryOp::StaticAccess(rest_field.into()), - Term::Var(final_bindings_id) - ), - Term::Record(RecordData::empty()) - ) - ) - ), - Term::Null, - bindings_without_rest, - ), - // %record_remove% "" final_bindings_id - RecordPatternTail::Open => bindings_without_rest, - // if final_bindings_id == null then - // null - // else - // %record_remove% "" - // (%record_insert% - // final_bindings_id - // (%static_access% final_bindings_id) - // ) - RecordPatternTail::Capture(rest) => make::if_then_else( - make::op2(BinaryOp::Eq(), Term::Var(final_bindings_id), Term::Null), - Term::Null, - make::op2( - BinaryOp::DynRemove(RecordOpKind::ConsiderAllFields), - Term::Str(rest_field.into()), - mk_app!( - make::op2( - record_insert(), - Term::Str(rest.label().into()), - Term::Var(final_bindings_id), - ), - make::op1( - UnaryOp::StaticAccess(rest_field.into()), - Term::Var(final_bindings_id) - ) - ), - ), - ), - }; - - // The let enclosing the fold block and the final block: - // let final_bindings_id = in - let outer_let = make::let_in(final_bindings_id, fold_block, tail_block); - - // if then else null - make::if_then_else(is_record, outer_let, Term::Null) - } - } - - impl CompilePart for EnumPattern { - fn compile_part(&self, _value_id: LocIdent, _bindings_id: LocIdent) -> RichTerm { - todo!() - } - } - - pub trait Compile { - /// Compile a match expression to a Nickel expression with the provided `value_id` as a - /// free variable (representing a placeholder for the matched expression). - fn compile(self, value: RichTerm, pos: TermPos) -> RichTerm; - } - - impl Compile for MatchData { - // Compilation of a full match expression (code between < and > is Rust code, think of it - // as a kind of templating): - // - // let value_id = value in - // - // - // let init_bindings_id = {} in - // let bindings_id = value_id init_bindings_id in - // - // if bindings_id == null then - // cont - // else - // # this primop evaluates body with an environment extended with bindings_id - // %pattern_branch% body bindings_id - fn compile(self, value: RichTerm, pos: TermPos) -> RichTerm { - let default_branch = self.default.unwrap_or_else(|| { - Term::RuntimeError(EvalError::NonExhaustiveMatch { - value: value.clone(), - pos, - }) - .into() - }); - let value_id = LocIdent::fresh(); - - // The fold block: - // - // - // let init_bindings_id = {} in - // let bindings_id = in - // - // if bindings_id == null then - // cont - // else - // # this primop evaluates body with an environment extended with bindings_id - // %pattern_branch% body bindings_id - let fold_block = - self.branches - .into_iter() - .rev() - .fold(default_branch, |cont, (pat, body)| { - let init_bindings_id = LocIdent::fresh(); - let bindings_id = LocIdent::fresh(); - - // inner if block: - // - // if bindings_id == null then - // cont - // else - // # this primop evaluates body with an environment extended with bindings_id - // %pattern_branch% bindings_id body - let inner = make::if_then_else( - make::op2(BinaryOp::Eq(), Term::Var(bindings_id), Term::Null), - cont, - mk_app!( - make::op1(UnaryOp::PatternBranch(), Term::Var(bindings_id),), - body - ), - ); - - // The two initial chained let-bindings: - // - // let init_bindings_id = {} in - // let bindings_id = in - // - make::let_in( - init_bindings_id, - Term::Record(RecordData::empty()), - make::let_in( - bindings_id, - pat.compile_part(value_id, init_bindings_id), - inner, - ), - ) - }); - - // let value_id = value in - make::let_in(value_id, value, fold_block) - } - } -} From 81018e432156f98acb2e8ef8633ea57a7c6a420e Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Tue, 13 Feb 2024 18:37:41 +0100 Subject: [PATCH 13/14] Fix clippy and rustdoc warnings, remove unneeded code --- core/src/eval/operation.rs | 11 ----------- core/src/term/mod.rs | 4 ++-- core/src/term/pattern/compile.rs | 19 ++++++++----------- 3 files changed, 10 insertions(+), 24 deletions(-) diff --git a/core/src/eval/operation.rs b/core/src/eval/operation.rs index 56279c4bfd..cfb07ec177 100644 --- a/core/src/eval/operation.rs +++ b/core/src/eval/operation.rs @@ -1166,17 +1166,6 @@ impl VirtualMachine { .pop_arg(&self.cache) .ok_or_else(|| EvalError::NotEnoughArgs(2, String::from("with_env"), pos_op))?; - // This is fragile...unwrapping all the closures to reach an actual term (and, more - // importantly, and environment) - loop { - match_sharedterm!(match (cont.body.term) { - Term::Closure(idx) => { - cont = self.cache.get(idx); - } - _ => break, - }); - } - match_sharedterm!(match (t) { Term::Record(data) => { for (id, field) in data.fields { diff --git a/core/src/term/mod.rs b/core/src/term/mod.rs index a2ea75011e..3ac7558123 100644 --- a/core/src/term/mod.rs +++ b/core/src/term/mod.rs @@ -1384,8 +1384,8 @@ pub enum UnaryOp { /// evaluate a pattern branch given as a second argument (which isn't a proper primop argument /// but is stored on the stack) in its environment augmented with the bindings. /// - /// [Self::PatternBranch()] isn't specific to pattern branches: what it does is to take a set - /// of extra bindings and a term, and run the term in the augmented environment. While it could + /// [Self::PatternBranch] isn't specific to pattern branches: what it does is to take a set of + /// extra bindings and a term, and run the term in the augmented environment. While it could /// useful to implement other operations, it would be fragile as a generic `with_env` operator, /// because the term to be run must not be burried into a closure, or the environment /// augmentation would be shallow and have no effect on the actual content of the term (we have diff --git a/core/src/term/pattern/compile.rs b/core/src/term/pattern/compile.rs index 1f0b80eaee..e4dc0bf55d 100644 --- a/core/src/term/pattern/compile.rs +++ b/core/src/term/pattern/compile.rs @@ -18,10 +18,10 @@ //! a bottleneck. //! //! Most build blocks are generated programmatically rather than written out as e.g. members of the -//! [internals][crate::stdlib::internals] stdlib module. While clunkier, this lets more easily -//! change the compilation strategy in the future and is already a more efficient in the current -//! setting (combining building blocks from the standard library would require much more function -//! applications, while we can generate inlined versions on-the-fly here). +//! [internals] stdlib module. While clunkier, this lets more easily change the compilation +//! strategy in the future and is already a more efficient in the current setting (combining +//! building blocks from the standard library would require much more function applications, while +//! we can generate inlined versions on-the-fly here). use super::*; use crate::{ mk_app, @@ -64,10 +64,7 @@ fn insert_binding(id: LocIdent, value_id: LocIdent, bindings_id: LocIdent) -> Ri /// ) /// ``` fn remove_from_rest(rest_field: LocIdent, field: LocIdent, bindings_id: LocIdent) -> RichTerm { - let rest = make::op1( - UnaryOp::StaticAccess(rest_field.into()), - Term::Var(bindings_id), - ); + let rest = make::op1(UnaryOp::StaticAccess(rest_field), Term::Var(bindings_id)); let rest_shrinked = make::op2( BinaryOp::DynRemove(RecordOpKind::ConsiderAllFields), @@ -262,7 +259,7 @@ impl CompilePart for RecordPattern { make::op1( UnaryOp::StaticAccess(field), make::op1( - UnaryOp::StaticAccess(rest_field.into()), + UnaryOp::StaticAccess(rest_field), Term::Var(local_bindings_id), ), ), @@ -328,7 +325,7 @@ impl CompilePart for RecordPattern { make::op2( BinaryOp::Eq(), make::op1( - UnaryOp::StaticAccess(rest_field.into()), + UnaryOp::StaticAccess(rest_field), Term::Var(final_bindings_id) ), Term::Record(RecordData::empty()) @@ -361,7 +358,7 @@ impl CompilePart for RecordPattern { Term::Var(final_bindings_id), ), make::op1( - UnaryOp::StaticAccess(rest_field.into()), + UnaryOp::StaticAccess(rest_field), Term::Var(final_bindings_id) ) ), From 2c206c6677b87be2a108f6ac8c98520b6075c626 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Thu, 15 Feb 2024 19:39:39 +0100 Subject: [PATCH 14/14] Implement reviewer suggestions Co-authored-by: jneem --- core/src/error/mod.rs | 8 ++------ core/src/term/pattern/compile.rs | 4 ++-- core/src/typecheck/operation.rs | 2 +- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/core/src/error/mod.rs b/core/src/error/mod.rs index f7708547ab..2e72e8162e 100644 --- a/core/src/error/mod.rs +++ b/core/src/error/mod.rs @@ -1324,12 +1324,8 @@ impl IntoDiagnostics for EvalError { ); vec![Diagnostic::error() - .with_message("non-exhaustive pattern matching") - .with_labels(labels) - .with_notes(vec![ - format!("This match expression isn't exhaustive"), - "It has been applied to an argument which doesn't match any of the branches".to_owned(), - ])] + .with_message("unmatched pattern") + .with_labels(labels)] } EvalError::IllegalPolymorphicTailAccess { action, diff --git a/core/src/term/pattern/compile.rs b/core/src/term/pattern/compile.rs index e4dc0bf55d..db3a696de4 100644 --- a/core/src/term/pattern/compile.rs +++ b/core/src/term/pattern/compile.rs @@ -28,7 +28,7 @@ use crate::{ term::{make, BinaryOp, MatchData, RecordExtKind, RecordOpKind, RichTerm, Term, UnaryOp}, }; -/// Generate a standard `%record_insert` primop as generated by the parser. +/// Generate a standard `%record_insert%` primop as generated by the parser. fn record_insert() -> BinaryOp { BinaryOp::DynExtend { ext_kind: RecordExtKind::WithValue, @@ -393,7 +393,7 @@ impl Compile for MatchData { // // let value_id = value in // - // diff --git a/core/src/typecheck/operation.rs b/core/src/typecheck/operation.rs index a953f22f00..abde19999d 100644 --- a/core/src/typecheck/operation.rs +++ b/core/src/typecheck/operation.rs @@ -220,7 +220,7 @@ pub fn get_uop_type( // Note that is_variant breaks parametricity, so it can't get a polymorphic type. // Dyn -> Bool UnaryOp::EnumIsVariant() => (mk_uniftype::dynamic(), mk_uniftype::bool()), - // [crate::term::UnaryOp::WithEnv] shouldn't appear anywhere in actual code, because its + // [crate::term::UnaryOp::PatternBranch] shouldn't appear anywhere in actual code, because its // second argument can't be properly typechecked: it has unbound variables. However, it's // not hard to come up with a vague working type for it, so we do. // forall a. {_ : a} -> Dyn -> Dyn