From c3e180f077ac83ff0d9c744d1009f31ff386702c Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Mon, 11 Mar 2024 15:06:16 +0000 Subject: [PATCH] Specialized pattern compilation for enum tags (#1846) The recent introduction of full pattern matching incurred a performance regression for very large enum contracts. Indeed, previously enum-only pattern matching would directly index into a hashmap of the branches of the pattern - where the key is the enum tag which is the pattern associated with the branch -, giving amortized constant time. The more powerful and generic pattern matching introduced later instead compiles each pattern to a (potentially complex) booleanc check. This is necessary in general, but for match statements that are just composed of bare enum tags, this trade a constant time operation for a linear (in the number of branches). This shown important regressions on some code bases making heavy usage of very large enum types/contracts. This commit specializes the compilation of match expressions when all the patterns are enum tags to recover the old behavior. We just reuse the unary operator `%match%`, which was precisely used to implement the old pattern matching, and was just standing there unused since the introduction of the new match expression compilation. --- ..._eval_stderr_non_exhaustive_match.ncl.snap | 10 ++-- core/src/error/mod.rs | 2 +- core/src/eval/operation.rs | 6 +- core/src/term/mod.rs | 9 ++- core/src/term/pattern/compile.rs | 57 ++++++++++++++++++- core/src/term/record.rs | 10 +--- core/src/typecheck/operation.rs | 2 +- 7 files changed, 72 insertions(+), 24 deletions(-) diff --git a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_non_exhaustive_match.ncl.snap b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_non_exhaustive_match.ncl.snap index d561986938..5e190a7d2a 100644 --- a/cli/tests/snapshot/snapshots/snapshot__eval_stderr_non_exhaustive_match.ncl.snap +++ b/cli/tests/snapshot/snapshots/snapshot__eval_stderr_non_exhaustive_match.ncl.snap @@ -5,14 +5,14 @@ expression: err error: unmatched pattern ┌─ [INPUTS_PATH]/errors/non_exhaustive_match.ncl:7:9 │ + 6 │ let x = if true then 'a else 'b in + │ -- this value doesn't match any branch 7 │ let f = match { │ ╭─────────^ 8 │ │ 'c => "hello", 9 │ │ 'd => "adios", 10 │ │ } │ ╰─^ in this match expression - · │ -13 │ f x - │ - this value doesn't match any branch - - + │ + = This match expression isn't exhaustive, matching only the following pattern(s): `'c, 'd` + = But it has been applied to an argument which doesn't match any of those patterns diff --git a/core/src/error/mod.rs b/core/src/error/mod.rs index f3929156f2..d5dab55990 100644 --- a/core/src/error/mod.rs +++ b/core/src/error/mod.rs @@ -1311,7 +1311,7 @@ impl IntoDiagnostics for EvalError { ); vec![Diagnostic::error() - .with_message("non-exhaustive pattern matching") + .with_message("unmatched pattern") .with_labels(labels) .with_notes(vec![ format!("This match expression isn't exhaustive, matching only the following pattern(s): `{tag_list}`"), diff --git a/core/src/eval/operation.rs b/core/src/eval/operation.rs index c10de00cdc..1ffdfe18b6 100644 --- a/core/src/eval/operation.rs +++ b/core/src/eval/operation.rs @@ -310,7 +310,7 @@ impl VirtualMachine { Err(mk_type_error!("embed", "Enum")) } } - UnaryOp::Match { has_default } => { + UnaryOp::TagsOnlyMatch { has_default } => { let (cases_closure, ..) = self .stack .pop_arg(&self.cache) @@ -338,7 +338,7 @@ impl VirtualMachine { let mut cases = match cases_term.into_owned() { Term::Record(r) => r.fields, - _ => panic!("invalid argument for match"), + _ => panic!("invalid argument for %match%"), }; cases @@ -348,7 +348,7 @@ impl VirtualMachine { // itself, aren't accessible in the surface language. They are // generated by the interpreter, and should never contain field without // definition. - body: field.value.expect("match cases must have a definition"), + body: field.value.expect("%match% cases must have a definition"), env: cases_env, }) .or(default) diff --git a/core/src/term/mod.rs b/core/src/term/mod.rs index b575b94017..4758b66fcb 100644 --- a/core/src/term/mod.rs +++ b/core/src/term/mod.rs @@ -1150,8 +1150,11 @@ pub enum UnaryOp { /// `embed someId` act like the identity. Embed(LocIdent), - /// Evaluate a match block applied to an argument. - Match { has_default: bool }, + /// A specialized primop for match when all patterns are enum tags. In that case, instead of + /// compiling to a generic sequence of if-then-else, we can be much more efficient by indexing + /// into a hashmap. [Self::TagsOnlyMatch] takes additional lazy arguments: a record mapping + /// tags to the corresponding branches, and the default case when `has_default` is `true`. + TagsOnlyMatch { has_default: bool }, /// Static access to a record field. /// @@ -1399,7 +1402,7 @@ impl fmt::Display for UnaryOp { BoolNot() => write!(f, "bool_not"), Blame() => write!(f, "blame"), Embed(_) => write!(f, "embed"), - Match { .. } => write!(f, "match"), + TagsOnlyMatch { .. } => write!(f, "match"), StaticAccess(_) => write!(f, "static_access"), ArrayMap() => write!(f, "map"), RecordMap() => write!(f, "record_map"), diff --git a/core/src/term/pattern/compile.rs b/core/src/term/pattern/compile.rs index e46be17e4a..7187a2d5da 100644 --- a/core/src/term/pattern/compile.rs +++ b/core/src/term/pattern/compile.rs @@ -609,8 +609,8 @@ pub trait Compile { } impl Compile for MatchData { - // Compilation of a full match expression (code between < and > is Rust code, think of it - // as a kind of templating): + // Compilation of a full match expression (code between < and > is Rust code, think of it as a + // kind of templating). Note that some special cases compile differently as optimizations. // // let value_id = value in // @@ -627,6 +627,27 @@ impl Compile for MatchData { // # this primop evaluates body with an environment extended with bindings_id // %pattern_branch% body bindings_id fn compile(self, value: RichTerm, pos: TermPos) -> RichTerm { + if self.branches.iter().all(|(pat, _)| { + matches!( + pat.data, + PatternData::Enum(EnumPattern { pattern: None, .. }) + ) + }) { + let tags_only = self.branches.into_iter().map(|(pat, body)| { + let PatternData::Enum(EnumPattern {tag, ..}) = pat.data else { + panic!("match compilation: just tested that all cases are enum tags, but found a non enum tag pattern"); + }; + + (tag, body) + }).collect(); + + return TagsOnlyMatch { + branches: tags_only, + default: self.default, + } + .compile(value, pos); + } + let default_branch = self.default.unwrap_or_else(|| { Term::RuntimeError(EvalError::NonExhaustiveMatch { value: value.clone(), @@ -694,3 +715,35 @@ impl Compile for MatchData { make::let_in(value_id, value, fold_block) } } + +/// Simple wrapper used to implement specialization of match statements when all of the patterns +/// are enum tags. Instead of a sequence of conditionals (which has linear time complexity), we use +/// a special primops based on a hashmap, which has amortized constant time complexity. +struct TagsOnlyMatch { + branches: Vec<(LocIdent, RichTerm)>, + default: Option, +} + +impl Compile for TagsOnlyMatch { + fn compile(self, value: RichTerm, pos: TermPos) -> RichTerm { + // We simply use the corresponding specialized primop in that case. + let match_op = mk_app!( + make::op1( + UnaryOp::TagsOnlyMatch { + has_default: self.default.is_some() + }, + value + ) + .with_pos(pos), + Term::Record(RecordData::with_field_values(self.branches.into_iter())) + ); + + let match_op = if let Some(default) = self.default { + mk_app!(match_op, default) + } else { + match_op + }; + + match_op.with_pos(pos) + } +} diff --git a/core/src/term/record.rs b/core/src/term/record.rs index 6f769f1354..c938ca3c08 100644 --- a/core/src/term/record.rs +++ b/core/src/term/record.rs @@ -318,15 +318,7 @@ impl RecordData { pub fn with_field_values(field_values: impl IntoIterator) -> Self { let fields = field_values .into_iter() - .map(|(id, value)| { - ( - id, - Field { - value: Some(value), - ..Default::default() - }, - ) - }) + .map(|(id, value)| (id, Field::from(value))) .collect(); RecordData { diff --git a/core/src/typecheck/operation.rs b/core/src/typecheck/operation.rs index abde19999d..cd519499ea 100644 --- a/core/src/typecheck/operation.rs +++ b/core/src/typecheck/operation.rs @@ -59,7 +59,7 @@ pub fn get_uop_type( (domain, codomain) } // This should not happen, as a match primop is only produced during evaluation. - UnaryOp::Match { .. } => panic!("cannot typecheck match primop"), + UnaryOp::TagsOnlyMatch { .. } => panic!("cannot typecheck match primop"), // Morally, Label -> Label // Dyn -> Dyn UnaryOp::ChangePolarity()