Use the most recent evaluation context for most of the benchmarks (In…

…tersectMBO#6152)
v0d1ch · Dec 6, 2024 · 9861cbc · 9861cbc
1 parent 72ced37
commit 9861cbc
Show file tree

Hide file tree

Showing 10 changed files with 66 additions and 48 deletions.
diff --git a/plutus-benchmark/bls12-381-costs/bench/Bench.hs b/plutus-benchmark/bls12-381-costs/bench/Bench.hs
@@ -7,7 +7,7 @@ module Main where
 import Criterion.Main
 
 import PlutusBenchmark.BLS12_381.Scripts
-import PlutusBenchmark.Common (benchProgramCek, mkEvalCtx)
+import PlutusBenchmark.Common (benchProgramCek, mkMostRecentEvalCtx)
 import PlutusLedgerApi.Common (EvaluationContext)
 import PlutusTx.Prelude qualified as Tx
 
@@ -77,7 +77,7 @@ schnorrG2Verify ctx = bench "schnorrG2Verify" $ benchProgramCek ctx mkSchnorrG2V
 
 main :: IO ()
 main = do
-  evalCtx <- evaluate mkEvalCtx
+  evalCtx <- evaluate mkMostRecentEvalCtx
   defaultMain [
         bgroup "hashAndAddG1" $ fmap (benchHashAndAddG1 evalCtx) [0, 10..150]
       , bgroup "hashAndAddG2" $ fmap (benchHashAndAddG2 evalCtx) [0, 10..150]

diff --git a/plutus-benchmark/cek-calibration/Main.hs b/plutus-benchmark/cek-calibration/Main.hs
@@ -17,7 +17,7 @@ module Main (main) where
 
 import Prelude qualified as Haskell
 
-import PlutusBenchmark.Common (benchTermCek, mkEvalCtx)
+import PlutusBenchmark.Common (benchTermCek, mkMostRecentEvalCtx)
 import PlutusCore
 import PlutusCore.Pretty qualified as PP
 import PlutusLedgerApi.Common (EvaluationContext)
@@ -87,7 +87,7 @@ writePlc p =
 
 main1 :: Haskell.IO ()
 main1 = do
-  evalCtx <- evaluate mkEvalCtx
+  evalCtx <- evaluate mkMostRecentEvalCtx
   defaultMainWith
       (defaultConfig { C.csvFile = Just "cek-lists.csv" })
       [mkListBMs evalCtx [0,10..1000]]

diff --git a/plutus-benchmark/common/PlutusBenchmark/Common.hs b/plutus-benchmark/common/PlutusBenchmark/Common.hs
@@ -17,6 +17,7 @@ module PlutusBenchmark.Common
     , runTermCek
     , cekResultMatchesHaskellValue
     , mkEvalCtx
+    , mkMostRecentEvalCtx
     , evaluateCekLikeInProd
     , benchTermCek
     , TestSize (..)
@@ -84,24 +85,33 @@ getCostsCek (UPLC.Program _ _ prog) =
           let ExBudget (ExCPU cpu)(ExMemory mem) = budget
           in (fromSatInt cpu, fromSatInt mem)
 
--- | Create the evaluation context for the benchmarks. This doesn't exactly match how it's done
--- on-chain, but that's okay because the evaluation context is cached by the ledger, so we're
--- deliberately not including it in the benchmarks.
-mkEvalCtx :: LedgerApi.EvaluationContext
-mkEvalCtx =
-    case PLC.defaultCostModelParamsForTesting of
+-- | Create the evaluation context for the benchmarks. This doesn't exactly
+-- match how it's done on-chain, but that's okay because the evaluation context
+-- is cached by the ledger, so we're deliberately not including it in the
+-- benchmarks.  Different benchmarks may depend on different language versions
+-- and semantic variants, so we have to specify those here.
+mkEvalCtx
+  :: LedgerApi.PlutusLedgerLanguage
+  -> BuiltinSemanticsVariant DefaultFun
+  -> LedgerApi.EvaluationContext
+mkEvalCtx ll semvar =
+    case PLC.defaultCostModelParamsForVariant semvar of
         Just p ->
             let errOrCtx =
-                    -- The validation benchmarks were all created from PlutusV1 scripts
                     LedgerApi.mkDynEvaluationContext
-                        LedgerApi.PlutusV1
-                        [DefaultFunSemanticsVariantB]
-                        (const DefaultFunSemanticsVariantB)
+                        ll
+                        [semvar]
+                        (const semvar)
                         p
             in case errOrCtx of
                 Right ec -> ec
                 Left err -> error $ show err
-        Nothing -> error "Couldn't get cost model params"
+        Nothing -> error $ "Couldn't get cost model params for " ++ (show semvar)
+
+-- Many of our benchmarks should use an evaluation context for the most recent
+-- Plutus language version and the ost recent semantic variant.
+mkMostRecentEvalCtx :: LedgerApi.EvaluationContext
+mkMostRecentEvalCtx = mkEvalCtx maxBound maxBound
 
 -- | Evaluate a term as it would be evaluated using the on-chain evaluator.
 evaluateCekLikeInProd

diff --git a/plutus-benchmark/lists/bench/Bench.hs b/plutus-benchmark/lists/bench/Bench.hs
@@ -6,7 +6,7 @@ module Main (main) where
 
 import Criterion.Main
 
-import PlutusBenchmark.Common (benchTermCek, getConfig, mkEvalCtx)
+import PlutusBenchmark.Common (benchTermCek, getConfig, mkMostRecentEvalCtx)
 import PlutusBenchmark.Lists.Sort qualified as Sort
 import PlutusBenchmark.Lists.Sum.Compiled qualified as Sum.Compiled
 import PlutusBenchmark.Lists.Sum.HandWritten qualified as Sum.HandWritten
@@ -54,5 +54,5 @@ main :: IO ()
 main = do
   -- Run each benchmark for at least 15 seconds.  Change this with -L or --timeout.
   config <- getConfig 15.0
-  evalCtx <- evaluate mkEvalCtx
+  evalCtx <- evaluate mkMostRecentEvalCtx
   defaultMainWith config $ benchmarks evalCtx
diff --git a/plutus-benchmark/marlowe/bench/BenchCek.hs b/plutus-benchmark/marlowe/bench/BenchCek.hs
@@ -2,12 +2,12 @@
 
 module Main where
 
-import PlutusBenchmark.Common (benchProgramCek, mkEvalCtx)
+import PlutusBenchmark.Common (benchProgramCek, mkMostRecentEvalCtx)
 import Shared (runBenchmarks)
 
 import Control.Exception (evaluate)
 
 main :: IO ()
 main = do
-  evalCtx <- evaluate mkEvalCtx
+  evalCtx <- evaluate mkMostRecentEvalCtx
   runBenchmarks (benchProgramCek evalCtx)
diff --git a/plutus-benchmark/nofib/bench/BenchCek.hs b/plutus-benchmark/nofib/bench/BenchCek.hs
@@ -3,11 +3,12 @@
 {- | Plutus benchmarks for the CEK machine based on some nofib examples. -}
 module Main where
 
-import Shared (benchTermCek, benchWith, mkEvalCtx)
+import PlutusBenchmark.Common (mkMostRecentEvalCtx)
+import Shared (benchTermCek, benchWith)
 
 import Control.Exception (evaluate)
 
 main :: IO ()
 main = do
-  evalCtx <- evaluate mkEvalCtx
+  evalCtx <- evaluate mkMostRecentEvalCtx
   benchWith $ benchTermCek evalCtx
diff --git a/plutus-benchmark/nofib/bench/Shared.hs b/plutus-benchmark/nofib/bench/Shared.hs
@@ -2,11 +2,10 @@
 module Shared (
     benchWith
     , mkBenchMarks
-    , mkEvalCtx
     , benchTermCek
     ) where
 
-import PlutusBenchmark.Common (Term, benchTermCek, getConfig, mkEvalCtx)
+import PlutusBenchmark.Common (Term, benchTermCek, getConfig)
 
 import PlutusBenchmark.NoFib.Clausify qualified as Clausify
 import PlutusBenchmark.NoFib.Knights qualified as Knights

diff --git a/plutus-benchmark/plutus-benchmark.cabal b/plutus-benchmark/plutus-benchmark.cabal
@@ -265,6 +265,7 @@ benchmark validation
     , optparse-applicative
     , plutus-benchmark-common
     , plutus-core              ^>=1.28
+    , plutus-ledger-api        ^>=1.28
 
 ---------------- validation-decode ----------------
 

diff --git a/plutus-benchmark/validation/bench/BenchCek.hs b/plutus-benchmark/validation/bench/BenchCek.hs
@@ -4,6 +4,8 @@ module Main where
 import Common (benchTermCek, benchWith, mkEvalCtx, unsafeUnflat)
 import Control.Exception (evaluate)
 import PlutusBenchmark.Common (toNamedDeBruijnTerm)
+import PlutusCore.Default (BuiltinSemanticsVariant (DefaultFunSemanticsVariantA))
+import PlutusLedgerApi.Common (PlutusLedgerLanguage (PlutusV1))
 import UntypedPlutusCore as UPLC
 
 {-|
@@ -16,7 +18,9 @@ import UntypedPlutusCore as UPLC
 -}
 main :: IO ()
 main = do
-  evalCtx <- evaluate mkEvalCtx
+  -- The validation benchmarks were all created with PlutusV1, so let's make
+  -- sure that the evaluation context matches.
+  evalCtx <- evaluate $ mkEvalCtx PlutusV1 DefaultFunSemanticsVariantA
   let mkCekBM file program =
           benchTermCek evalCtx . toNamedDeBruijnTerm . UPLC._progTerm $ unsafeUnflat file program
   benchWith mkCekBM
diff --git a/plutus-benchmark/validation/bench/BenchFull.hs b/plutus-benchmark/validation/bench/BenchFull.hs
@@ -1,6 +1,7 @@
 {-# LANGUAGE BangPatterns #-}
 module Main where
 
+import PlutusCore.Default (BuiltinSemanticsVariant (DefaultFunSemanticsVariantA))
 import PlutusCore.Evaluation.Machine.ExBudget
 import PlutusLedgerApi.Common.Versions
 import PlutusLedgerApi.V1
@@ -23,28 +24,30 @@ the whole time taken from script deserialization to script execution result.
 -}
 main :: IO ()
 main = do
-    evalCtx <- evaluate mkEvalCtx
-    let mkFullBM :: FilePath -> BS.ByteString -> Benchmarkable
-        mkFullBM file bsFlat =
-            let UPLC.Program () ver body = unsafeUnflat file bsFlat
-                -- We make some effort to mimic what happens on-chain, including the provision of
-                -- the script arguments. However, the inputs we have are *fully applied*. So we try
-                -- and reverse that by stripping off the arguments here. Conveniently, we know that
-                -- they will be Data constants. Annoyingly we can't just assume it's the first 3
-                -- arguments, since some of them are policy scripts with only 2.
-                (term, args) = peelDataArguments body
+  -- The validation benchmarks were all created with PlutusV1, so let's make
+  -- sure that the evaluation context matches.
+  evalCtx <- evaluate $ mkEvalCtx PlutusV1 DefaultFunSemanticsVariantA
+  let mkFullBM :: FilePath -> BS.ByteString -> Benchmarkable
+      mkFullBM file bsFlat =
+        let UPLC.Program () ver body = unsafeUnflat file bsFlat
+              -- We make some effort to mimic what happens on-chain, including the provision of
+              -- the script arguments. However, the inputs we have are *fully applied*. So we try
+              -- and reverse that by stripping off the arguments here. Conveniently, we know that
+              -- they will be Data constants. Annoyingly we can't just assume it's the first 3
+              -- arguments, since some of them are policy scripts with only 2.
+            (term, args) = peelDataArguments body
                 -- strictify and "short" the result cbor to create a real `SerialisedScript`
-                !benchScript = force . serialiseUPLC $ UPLC.Program () ver term
-                eval script =
-                    either (error . show) (\_ -> ()) . snd $ evaluateScriptRestricting
-                        futurePV
-                        -- no logs
-                        Quiet
-                        evalCtx
-                        -- uses restricting(enormous) instead of counting to include the periodic
-                        -- budget-overspent check
-                        (unExRestrictingBudget enormousBudget)
-                        (either (error . show) id $ deserialiseScript futurePV script)
-                        args
-            in whnf eval benchScript
-    benchWith mkFullBM
+            !benchScript = force . serialiseUPLC $ UPLC.Program () ver term
+            eval script =
+              either (error . show) (\_ -> ()) . snd $ evaluateScriptRestricting
+              futurePV
+              -- no logs
+              Quiet
+              evalCtx
+              -- uses restricting(enormous) instead of counting to include the periodic
+              -- budget-overspent check
+              (unExRestrictingBudget enormousBudget)
+              (either (error . show) id $ deserialiseScript futurePV script)
+              args
+        in whnf eval benchScript
+  benchWith mkFullBM