Skip to content

Commit

Permalink
Fix expressions evaluated multiple times
Browse files Browse the repository at this point in the history
This commit fixes an issue that has been introduced by the removal of
generated variables, together with the `should_update` optimization,
which tries to avoid thunk update that are unnecessary.

However, as detailed in the added documentation for should_update, this
didn't work well with the removal of generated variables, which would
materialize by not sharing inner expressions for variables whose value
was already a data structure in weak head normal form (arrays, records
or now enum variants). Inner expressions would be recomputed.

This commit fixes the issue by adding a condition that datastructure
that aren't yet closurized shouldn't eschew thunk update.

Doing so, this commit also adds the missing closurization of ADTs, which
caused enum variant's arguments to also be potentially re-evaluated many
time.
  • Loading branch information
yannham committed Jan 29, 2024
1 parent ab63d88 commit 10ddd1b
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 26 deletions.
8 changes: 4 additions & 4 deletions core/src/eval/cache/lazy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -572,11 +572,11 @@ impl Thunk {

/// Determine if a thunk is worth being put on the stack for future update.
///
/// Typically, WHNFs and annotated values will not be evaluated to a simpler expression and are
/// not worth updating.
/// Typically, expressions in weak head normal form (more precisely, in [effective weak head
/// normal form][crate::term::Term::is_eff_whnf]) won't evaluate further and their update can
/// be skipped.
pub fn should_update(&self) -> bool {
let term = &self.borrow().body.term;
!term.is_whnf() && !term.is_annotated()
!self.borrow().body.term.is_eff_whnf()
}

/// Return a clone of the potential field dependencies stored in a revertible thunk. See
Expand Down
21 changes: 18 additions & 3 deletions core/src/eval/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,21 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
}
}
}
// Closurize the argument of a enum variant if it's not already done. Usually this is done at the first
// time this record is evaluated.
Term::EnumVariant { tag, arg, attrs } if !attrs.closurized => {
Closure {
body: RichTerm::new(
Term::EnumVariant {
tag,
arg: arg.closurize(&mut self.cache, env),
attrs: attrs.closurized(),
},
pos,
),
env: Environment::new(),
}
}
// Closurize the record if it's not already done. Usually this is done at the first
// time this record is evaluated.
Term::Record(data) if !data.attrs.closurized => {
Expand Down Expand Up @@ -1111,10 +1126,10 @@ pub fn subst<C: Cache>(
// substitution. Not recursing should be fine, though, because a type in term position
// turns into a contract, and we don't substitute inside contracts either currently.
| v @ Term::Type(_) => RichTerm::new(v, pos),
Term::EnumVariant(tag, t) => {
let t = subst(cache, t, initial_env, env);
Term::EnumVariant { tag, arg, attrs } => {
let arg = subst(cache, arg, initial_env, env);

RichTerm::new(Term::EnumVariant(tag, t), pos)
RichTerm::new(Term::EnumVariant { tag, arg, attrs }, pos)
}
Term::Let(id, t1, t2, attrs) => {
let t1 = subst(cache, t1, initial_env, env);
Expand Down
15 changes: 12 additions & 3 deletions core/src/eval/operation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3127,9 +3127,18 @@ fn eq<C: Cache>(
(Term::Lbl(l1), Term::Lbl(l2)) => Ok(EqResult::Bool(l1 == l2)),
(Term::SealingKey(s1), Term::SealingKey(s2)) => Ok(EqResult::Bool(s1 == s2)),
(Term::Enum(id1), Term::Enum(id2)) => Ok(EqResult::Bool(id1.ident() == id2.ident())),
(Term::EnumVariant(id1, arg1), Term::EnumVariant(id2, arg2))
if id1.ident() == id2.ident() =>
{
(
Term::EnumVariant {
tag: tag1,
arg: arg1,
..
},
Term::EnumVariant {
tag: tag2,
arg: arg2,
..
},
) if tag1.ident() == tag2.ident() => {
Ok(gen_eqs(cache, std::iter::once((arg1, arg2)), env1, env2))
}
(Term::Record(r1), Term::Record(r2)) => {
Expand Down
2 changes: 1 addition & 1 deletion core/src/parser/grammar.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -702,7 +702,7 @@ EnumTag: LocIdent = {

EnumVariant: RichTerm =
<tag: EnumTag> ".." "(" <arg: Term> ")" =>
RichTerm::from(Term::EnumVariant(tag, arg));
RichTerm::from(Term::EnumVariant { tag, arg, attrs: Default::default() });

ChunkLiteralPart: ChunkLiteralPart = {
"str literal" => ChunkLiteralPart::Str(<>),
Expand Down
6 changes: 3 additions & 3 deletions core/src/pretty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -770,11 +770,11 @@ where
},
Var(id) => allocator.as_string(id),
Enum(id) => allocator.text("'").append(allocator.text(ident_quoted(id))),
EnumVariant(id, data) => allocator
EnumVariant { tag, arg, attrs: _ } => allocator
.text("'")
.append(allocator.text(ident_quoted(id)))
.append(allocator.text(ident_quoted(tag)))
.append("..(")
.append(data.pretty(allocator))
.append(arg.pretty(allocator))
.append(")"),
Record(record_data) => allocator.record(record_data, &[]),
RecRecord(record_data, dyn_fields, _) => allocator.record(record_data, dyn_fields),
Expand Down
80 changes: 73 additions & 7 deletions core/src/term/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,11 @@ pub enum Term {
/// version with one argument. Note that one can just use a record to store multiple named
/// values in the argument.
#[serde(skip)]
EnumVariant(LocIdent, RichTerm),
EnumVariant {
tag: LocIdent,
arg: RichTerm,
attrs: EnumVariantAttrs,
},

/// A record, mapping identifiers to terms.
#[serde(serialize_with = "crate::serialize::serialize_record")]
Expand Down Expand Up @@ -474,6 +478,36 @@ impl std::convert::TryFrom<LabeledType> for RuntimeContract {
}
}

/// The attributes of a enum variant.
#[derive(Debug, Default, Eq, PartialEq, Clone)]
pub struct EnumVariantAttrs {
/// An enum variant is closurized if its argument is a [crate::term::Term::Closure] or a
/// constant.
///
/// When initially produced by the parser, data structures such as enum variants or arrays
/// aren't closurized. At the first evaluation, they will be turned into closurized versions,
/// by allocating cache nodes (think thunks) for non constant elements. Once done, this flag is
/// set to `true`.
///
/// Ideally, we would have a different AST representation for evaluation, where enum variants
/// would always be closurized. In the meantime, while we need to cope with a unique AST across
/// the whole pipeline, we use this flag to remember closurization.
pub closurized: bool,
}

impl EnumVariantAttrs {
/// Create new enum variant attributes. By default, the `closurized` flag is set to `false`.
pub fn new() -> Self {
Default::default()
}

/// Set the `closurized` flag to `true`.
pub fn closurized(mut self) -> Self {
self.closurized = true;
self
}
}

/// The attributes of a let binding.
#[derive(Debug, Default, Eq, PartialEq, Clone)]
pub struct LetAttrs {
Expand Down Expand Up @@ -817,7 +851,7 @@ impl Term {
Term::Match { .. } => Some("MatchExpression".to_owned()),
Term::Lbl(_) => Some("Label".to_owned()),
Term::Enum(_) => Some("Enum".to_owned()),
Term::EnumVariant(_, _) => Some("Enum".to_owned()),
Term::EnumVariant { .. } => Some("Enum".to_owned()),
Term::Record(..) | Term::RecRecord(..) => Some("Record".to_owned()),
Term::Array(..) => Some("Array".to_owned()),
Term::SealingKey(_) => Some("SealingKey".to_owned()),
Expand All @@ -840,7 +874,21 @@ impl Term {
}
}

/// Determine if a term is in evaluated form, called weak head normal form (WHNF).
/// Determine if a term is in evaluated form, called weak head normal form (WHNF). This test is
/// purely syntactic, which has the non-obvious consequence that some terms might be in WHNF
/// according to [Self::is_whnf] but might still be evaluated further.
///
/// This is due to implementation details of the evaluation around closurization. The first
/// time an array or a record is evaluated, it will be closurized - thunks will be allocated to
/// store its elements and make them shareable. Thus, if `self` is `Term::Array(data, attrs)`
/// with `attrs.closurized` set to `false`, evaluation will rewrite it to a different array,
/// although in the surface language of Nickel, arrays are weak head normal forms.
///
/// For everything happening pre-evaluation, you probably shouldn't care about this subtlety
/// and you can use `is_whnf` directly.
///
/// However, at run-time, in particular if the property you care about is "is this term going
/// to be evaluate further", then you should use [Self::is_eff_whnf] instead.
pub fn is_whnf(&self) -> bool {
match self {
Term::Null
Expand All @@ -852,7 +900,7 @@ impl Term {
| Term::Match {..}
| Term::Lbl(_)
| Term::Enum(_)
| Term::EnumVariant(_, _)
| Term::EnumVariant {..}
| Term::Record(..)
| Term::Array(..)
| Term::SealingKey(_) => true,
Expand All @@ -877,6 +925,24 @@ impl Term {
}
}

/// Helper used by [Self::is_eff_whnf] to determine if a term is a data structure that hasn't
/// been closurized yet.
fn is_unclosurized_datastructure(&self) -> bool {
match self {
Term::Array(_, attrs) => !attrs.closurized,
Term::Record(data) | Term::RecRecord(data, ..) => !data.attrs.closurized,
Term::EnumVariant { attrs, .. } => !attrs.closurized,
_ => false,
}
}

/// Determine if an expression is an effective weak head normal form, that is a value that
/// won't be evaluated further by the virtual machine. Being an effective WHNF implies being a
/// WHNF, but the converse isn't true. See [Self::is_whnf] for more details.
pub fn is_eff_whnf(&self) -> bool {
self.is_whnf() && !self.is_unclosurized_datastructure()
}

/// Determine if a term is annotated.
pub fn is_annotated(&self) -> bool {
matches!(self, Term::Annotated(..))
Expand Down Expand Up @@ -916,7 +982,7 @@ impl Term {
| Term::RecRecord(..)
| Term::Type(_)
| Term::ParseError(_)
| Term::EnumVariant(_, _)
| Term::EnumVariant { .. }
| Term::RuntimeError(_) => false,
}
}
Expand All @@ -931,7 +997,7 @@ impl Term {
| Term::StrChunks(..)
| Term::Lbl(..)
| Term::Enum(..)
| Term::EnumVariant(_, _)
| Term::EnumVariant {..}
| Term::Record(..)
| Term::RecRecord(..)
| Term::Array(..)
Expand Down Expand Up @@ -2051,7 +2117,7 @@ impl Traverse<RichTerm> for RichTerm {
}),
Term::Fun(_, t)
| Term::FunPattern(_, _, t)
| Term::EnumVariant(_, t)
| Term::EnumVariant { arg: t, .. }
| Term::Op1(_, t)
| Term::Sealed(_, t, _) => t.traverse_ref(f, state),
Term::Let(_, t1, t2, _)
Expand Down
2 changes: 1 addition & 1 deletion core/src/transform/free_vars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ impl CollectFreeVars for RichTerm {
t.collect_free_vars(free_vars);
}
}
Term::Op1(_, t) | Term::Sealed(_, t, _) | Term::EnumVariant(_, t) => {
Term::Op1(_, t) | Term::Sealed(_, t, _) | Term::EnumVariant { arg: t, .. } => {
t.collect_free_vars(free_vars)
}
Term::OpN(_, ts) => {
Expand Down
8 changes: 4 additions & 4 deletions core/src/typecheck/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1569,7 +1569,7 @@ fn walk<V: TypecheckVisitor>(
walk(state, ctxt.clone(), visitor, t)
})
}
Term::EnumVariant(_, t) => walk(state, ctxt, visitor, t),
Term::EnumVariant { arg: t, ..} => walk(state, ctxt, visitor, t),
Term::Op1(_, t) => walk(state, ctxt.clone(), visitor, t),
Term::Op2(_, t1, t2) => {
walk(state, ctxt.clone(), visitor, t1)?;
Expand Down Expand Up @@ -1967,18 +1967,18 @@ fn check<V: TypecheckVisitor>(
ty.unify(mk_uty_enum!(*id; row), state, &ctxt)
.map_err(|err| err.into_typecheck_err(state, rt.pos))
}
Term::EnumVariant(id, t) => {
Term::EnumVariant { tag, arg, .. } => {
let row_tail = state.table.fresh_erows_uvar(ctxt.var_level);
let ty_arg = state.table.fresh_type_uvar(ctxt.var_level);

// We match the expected type against `[| 'id ty_arg; row_tail |]`, where `row_tail` is
// a free unification variable, to ensure it has the right shape and extract the
// components.
ty.unify(mk_uty_enum!((*id, ty_arg.clone()); row_tail), state, &ctxt)
ty.unify(mk_uty_enum!((*tag, ty_arg.clone()); row_tail), state, &ctxt)
.map_err(|err| err.into_typecheck_err(state, rt.pos))?;

// Once we have a type for the argument, we check the variant's data against it.
check(state, ctxt, visitor, t, ty_arg)
check(state, ctxt, visitor, arg, ty_arg)
}
// If some fields are defined dynamically, the only potential type that works is `{_ : a}`
// for some `a`. In other words, the checking rule is not the same depending on the target
Expand Down

0 comments on commit 10ddd1b

Please sign in to comment.