Skip to content

Commit

Permalink
Specialized pattern compilation for enum tags (#1846)
Browse files Browse the repository at this point in the history
The recent introduction of full pattern matching incurred a performance
regression for very large enum contracts.

Indeed, previously enum-only pattern matching would directly index into
a hashmap of the branches of the pattern - where the key is the enum tag
which is the pattern associated with the branch -, giving amortized
constant time.

The more powerful and generic pattern matching introduced later instead
compiles each pattern to a (potentially complex) booleanc check. This is
necessary in general, but for match statements that are just composed of
bare enum tags, this trade a constant time operation for a linear (in
the number of branches). This shown important regressions on some code
bases making heavy usage of very large enum types/contracts.

This commit specializes the compilation of match expressions when all
the patterns are enum tags to recover the old behavior. We just reuse
the unary operator `%match%`, which was precisely used to implement the
old pattern matching, and was just standing there unused since the
introduction of the new match expression compilation.
  • Loading branch information
yannham authored Mar 11, 2024
1 parent 01f640b commit c3e180f
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ expression: err
error: unmatched pattern
┌─ [INPUTS_PATH]/errors/non_exhaustive_match.ncl:7:9
6let x = if true then 'a else 'b in
-- this value doesn't match any branch
7let f = match {
│ ╭─────────^
8 │ │ 'c => "hello",
9 │ │ 'd => "adios",
10 │ │ }
│ ╰─^ in this match expression
· │
13f x
- this value doesn't match any branch


= This match expression isn't exhaustive, matching only the following pattern(s): `'c, 'd`
= But it has been applied to an argument which doesn't match any of those patterns
2 changes: 1 addition & 1 deletion core/src/error/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1311,7 +1311,7 @@ impl IntoDiagnostics<FileId> for EvalError {
);

vec![Diagnostic::error()
.with_message("non-exhaustive pattern matching")
.with_message("unmatched pattern")
.with_labels(labels)
.with_notes(vec![
format!("This match expression isn't exhaustive, matching only the following pattern(s): `{tag_list}`"),
Expand Down
6 changes: 3 additions & 3 deletions core/src/eval/operation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
Err(mk_type_error!("embed", "Enum"))
}
}
UnaryOp::Match { has_default } => {
UnaryOp::TagsOnlyMatch { has_default } => {
let (cases_closure, ..) = self
.stack
.pop_arg(&self.cache)
Expand Down Expand Up @@ -338,7 +338,7 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {

let mut cases = match cases_term.into_owned() {
Term::Record(r) => r.fields,
_ => panic!("invalid argument for match"),
_ => panic!("invalid argument for %match%"),
};

cases
Expand All @@ -348,7 +348,7 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
// itself, aren't accessible in the surface language. They are
// generated by the interpreter, and should never contain field without
// definition.
body: field.value.expect("match cases must have a definition"),
body: field.value.expect("%match% cases must have a definition"),
env: cases_env,
})
.or(default)
Expand Down
9 changes: 6 additions & 3 deletions core/src/term/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1150,8 +1150,11 @@ pub enum UnaryOp {
/// `embed someId` act like the identity.
Embed(LocIdent),

/// Evaluate a match block applied to an argument.
Match { has_default: bool },
/// A specialized primop for match when all patterns are enum tags. In that case, instead of
/// compiling to a generic sequence of if-then-else, we can be much more efficient by indexing
/// into a hashmap. [Self::TagsOnlyMatch] takes additional lazy arguments: a record mapping
/// tags to the corresponding branches, and the default case when `has_default` is `true`.
TagsOnlyMatch { has_default: bool },

/// Static access to a record field.
///
Expand Down Expand Up @@ -1399,7 +1402,7 @@ impl fmt::Display for UnaryOp {
BoolNot() => write!(f, "bool_not"),
Blame() => write!(f, "blame"),
Embed(_) => write!(f, "embed"),
Match { .. } => write!(f, "match"),
TagsOnlyMatch { .. } => write!(f, "match"),
StaticAccess(_) => write!(f, "static_access"),
ArrayMap() => write!(f, "map"),
RecordMap() => write!(f, "record_map"),
Expand Down
57 changes: 55 additions & 2 deletions core/src/term/pattern/compile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -609,8 +609,8 @@ pub trait Compile {
}

impl Compile for MatchData {
// Compilation of a full match expression (code between < and > is Rust code, think of it
// as a kind of templating):
// Compilation of a full match expression (code between < and > is Rust code, think of it as a
// kind of templating). Note that some special cases compile differently as optimizations.
//
// let value_id = value in
//
Expand All @@ -627,6 +627,27 @@ impl Compile for MatchData {
// # this primop evaluates body with an environment extended with bindings_id
// %pattern_branch% body bindings_id
fn compile(self, value: RichTerm, pos: TermPos) -> RichTerm {
if self.branches.iter().all(|(pat, _)| {
matches!(
pat.data,
PatternData::Enum(EnumPattern { pattern: None, .. })
)
}) {
let tags_only = self.branches.into_iter().map(|(pat, body)| {
let PatternData::Enum(EnumPattern {tag, ..}) = pat.data else {
panic!("match compilation: just tested that all cases are enum tags, but found a non enum tag pattern");
};

(tag, body)
}).collect();

return TagsOnlyMatch {
branches: tags_only,
default: self.default,
}
.compile(value, pos);
}

let default_branch = self.default.unwrap_or_else(|| {
Term::RuntimeError(EvalError::NonExhaustiveMatch {
value: value.clone(),
Expand Down Expand Up @@ -694,3 +715,35 @@ impl Compile for MatchData {
make::let_in(value_id, value, fold_block)
}
}

/// Simple wrapper used to implement specialization of match statements when all of the patterns
/// are enum tags. Instead of a sequence of conditionals (which has linear time complexity), we use
/// a special primops based on a hashmap, which has amortized constant time complexity.
struct TagsOnlyMatch {
branches: Vec<(LocIdent, RichTerm)>,
default: Option<RichTerm>,
}

impl Compile for TagsOnlyMatch {
fn compile(self, value: RichTerm, pos: TermPos) -> RichTerm {
// We simply use the corresponding specialized primop in that case.
let match_op = mk_app!(
make::op1(
UnaryOp::TagsOnlyMatch {
has_default: self.default.is_some()
},
value
)
.with_pos(pos),
Term::Record(RecordData::with_field_values(self.branches.into_iter()))
);

let match_op = if let Some(default) = self.default {
mk_app!(match_op, default)
} else {
match_op
};

match_op.with_pos(pos)
}
}
10 changes: 1 addition & 9 deletions core/src/term/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -318,15 +318,7 @@ impl RecordData {
pub fn with_field_values(field_values: impl IntoIterator<Item = (LocIdent, RichTerm)>) -> Self {
let fields = field_values
.into_iter()
.map(|(id, value)| {
(
id,
Field {
value: Some(value),
..Default::default()
},
)
})
.map(|(id, value)| (id, Field::from(value)))
.collect();

RecordData {
Expand Down
2 changes: 1 addition & 1 deletion core/src/typecheck/operation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ pub fn get_uop_type(
(domain, codomain)
}
// This should not happen, as a match primop is only produced during evaluation.
UnaryOp::Match { .. } => panic!("cannot typecheck match primop"),
UnaryOp::TagsOnlyMatch { .. } => panic!("cannot typecheck match primop"),
// Morally, Label -> Label
// Dyn -> Dyn
UnaryOp::ChangePolarity()
Expand Down

0 comments on commit c3e180f

Please sign in to comment.