From 30e385ee41f7213f82a33d023f054bbe084cd62f Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Mon, 29 Jan 2024 13:55:02 +0100 Subject: [PATCH] Fix expressions evaluated multiple times This commit fixes an issue that has been introduced by the removal of generated variables, together with the `should_update` optimization, which tries to avoid thunk update that are unnecessary. However, as detailed in the added documentation for should_update, this didn't work well with the removal of generated variables, which would materialize by not sharing inner expressions for variables whose value was already a data structure in weak head normal form (arrays, records or now enum variants). Inner expressions would be recomputed. This commit fixes the issue by adding a condition that datastructure that aren't yet closurized shouldn't eschew thunk update. Doing so, this commit also adds the missing closurization of ADTs, which caused enum variant's arguments to also be potentially re-evaluated many time. --- core/src/eval/cache/lazy.rs | 8 ++++---- core/src/eval/mod.rs | 11 +++++++++++ core/src/term/mod.rs | 34 +++++++++++++++++++++++++++++++++- 3 files changed, 48 insertions(+), 5 deletions(-) diff --git a/core/src/eval/cache/lazy.rs b/core/src/eval/cache/lazy.rs index 55e97ed0a9..d70a8f3128 100644 --- a/core/src/eval/cache/lazy.rs +++ b/core/src/eval/cache/lazy.rs @@ -572,11 +572,11 @@ impl Thunk { /// Determine if a thunk is worth being put on the stack for future update. /// - /// Typically, WHNFs and annotated values will not be evaluated to a simpler expression and are - /// not worth updating. + /// Typically, expressions in weak head normal form (more precisely, in [effective weak head + /// normal form][crate::term::Term::is_eff_whnf]) won't evaluate further and their update can + /// be skipped. pub fn should_update(&self) -> bool { - let term = &self.borrow().body.term; - !term.is_whnf() && !term.is_annotated() + !self.borrow().body.term.is_eff_whnf() } /// Return a clone of the potential field dependencies stored in a revertible thunk. See diff --git a/core/src/eval/mod.rs b/core/src/eval/mod.rs index 7985ee81d8..b01159e1b9 100644 --- a/core/src/eval/mod.rs +++ b/core/src/eval/mod.rs @@ -666,6 +666,17 @@ impl VirtualMachine { } } } + // Closurize the argument of a enum variant if it's not already done. Usually this is done at the first + // time this record is evaluated. + Term::EnumVariant(id, arg) if !matches!(arg.as_ref(), Term::Closure(_)) => { + Closure { + body: RichTerm::new( + Term::EnumVariant(id, arg.closurize(&mut self.cache, env)), + pos, + ), + env: Environment::new(), + } + } // Closurize the record if it's not already done. Usually this is done at the first // time this record is evaluated. Term::Record(data) if !data.attrs.closurized => { diff --git a/core/src/term/mod.rs b/core/src/term/mod.rs index 8b9961f7a6..25b8f43220 100644 --- a/core/src/term/mod.rs +++ b/core/src/term/mod.rs @@ -840,7 +840,21 @@ impl Term { } } - /// Determine if a term is in evaluated form, called weak head normal form (WHNF). + /// Determine if a term is in evaluated form, called weak head normal form (WHNF). This test is + /// purely syntactic, which has the non-obvious consequence that some terms might be in WHNF + /// according to [Self::is_whnf] but might still be evaluated further. + /// + /// This is due to implementation details of the evaluation around closurization. The first + /// time an array or a record is evaluated, it will be closurized - thunks will be allocated to + /// store its elements and make them shareable. Thus, if `self` is `Term::Array(data, attrs)` + /// with `attrs.closurized` set to `false`, evaluation will rewrite it to a different array, + /// although in the surface language of Nickel, arrays are weak head normal forms. + /// + /// For everything happening pre-evaluation, you probably shouldn't care about this subtlety + /// and you can use `is_whnf` directly. + /// + /// However, at run-time, in particular if the property you care about is "is this term going + /// to be evaluate further", then you should use [Self::is_eff_whnf] instead. pub fn is_whnf(&self) -> bool { match self { Term::Null @@ -877,6 +891,24 @@ impl Term { } } + /// Helper used by [Self::is_eff_whnf] to determine if a term is a data structure that hasn't + /// been closurized yet. + fn is_unclosurized_datastructure(&self) -> bool { + match self { + Term::Array(_, attrs) => !attrs.closurized, + Term::Record(data) | Term::RecRecord(data, ..) => !data.attrs.closurized, + Term::EnumVariant(_, arg) => matches!(arg.as_ref(), Term::Closure(_)), + _ => false, + } + } + + /// Determine if an expression is an effective weak head normal form, that is a value that + /// won't be evaluated further by the virtual machine. Being an effective WHNF implies being a + /// WHNF, but the converse isn't true. See [Self::is_whnf] for more details. + pub fn is_eff_whnf(&self) -> bool { + self.is_whnf() && !self.is_unclosurized_datastructure() + } + /// Determine if a term is annotated. pub fn is_annotated(&self) -> bool { matches!(self, Term::Annotated(..))