From 46b4ad74064902027a2a068b4d3aad71979e8a48 Mon Sep 17 00:00:00 2001 From: David Feuer Date: Thu, 11 Nov 2021 05:10:50 -0500 Subject: [PATCH 1/5] Make BuiltinByteString and BuiltInString more opaque Hopefully fixes #4193. --- plutus-tx/src/PlutusTx/Builtins/Internal.hs | 92 ++++++++++++++++----- 1 file changed, 73 insertions(+), 19 deletions(-) diff --git a/plutus-tx/src/PlutusTx/Builtins/Internal.hs b/plutus-tx/src/PlutusTx/Builtins/Internal.hs index e544bdde661..11982779ca7 100644 --- a/plutus-tx/src/PlutusTx/Builtins/Internal.hs +++ b/plutus-tx/src/PlutusTx/Builtins/Internal.hs @@ -1,11 +1,12 @@ -{-# LANGUAGE DeriveAnyClass #-} -{-# LANGUAGE DerivingStrategies #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE TypeApplications #-} --- This ensures that we don't put *anything* about these functions into the interface --- file, otherwise GHC can be clever about the ones that are always error, even though --- they're NOINLINE! -{-# OPTIONS_GHC -O0 #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeApplications #-} + +-- We don't want builtins getting torn apart. +{-# OPTIONS_GHC -fno-cpr-anal #-} + -- | This module contains the special Haskell names that are used to map to builtin types or functions -- in Plutus Core. -- @@ -13,16 +14,18 @@ module PlutusTx.Builtins.Internal where import Codec.Serialise -import Control.DeepSeq (NFData) +import Control.DeepSeq (NFData (..)) import Crypto qualified import Data.ByteArray qualified as BA import Data.ByteString as BS import Data.ByteString.Hash qualified as Hash import Data.Coerce (coerce) -import Data.Hashable (Hashable) +import Data.Hashable (Hashable (..)) import Data.Maybe (fromMaybe) import Data.Text as Text (Text, empty) import Data.Text.Encoding as Text (decodeUtf8, encodeUtf8) +import Data.Void (Void) +import GHC.Exts (lazy) import GHC.Generics (Generic) import PlutusCore.Data qualified as PLC import PlutusTx.Utils (mustBeReplaced) @@ -61,9 +64,11 @@ for most of our functions it's not a *semantic* problem. Here, however, it is a problem. So we just expose the delayed version as the builtin. -} -{-# NOINLINE error #-} +-- We use GHC.Exts.lazy here so GHC can't see that the result is always bottom. +-- I (David Feuer) don't really understand why this is needed, but it seems to +-- be desired. error :: BuiltinUnit -> a -error = mustBeReplaced "error" +error (BuiltinUnit ()) = lazy (mustBeReplaced "error") {- BOOL @@ -95,7 +100,9 @@ unitval = BuiltinUnit () {-# NOINLINE chooseUnit #-} chooseUnit :: BuiltinUnit -> a -> a -chooseUnit (BuiltinUnit ()) a = a +-- For tracing purposes, this is defined like pseq; it's strict in the unit +-- value but (analyzed as) lazy in the interesting value. +chooseUnit (BuiltinUnit ()) a = lazy a {- INTEGER @@ -147,15 +154,57 @@ equalsInteger = coerce ((==) @Integer) BYTESTRING -} +{- Note [Silly sum types] + +GHC's worker/wrapper transformation and CPR analysis really like to tear +product types apart. Usually, that's a good thing. But for our purposes, it's +not. We want to see the actual `BuiltInByteString` or `BuiltInString`, not its +constituent parts. So we turn these product types into sum types, by adding +constructors that can never actually be applied. Turning off CPR analysis in +this module and using `NOINLINE` where appropriate then seems to be sufficient +to prevent that. Anyone changing this stuff should be careful to inspect the +generated workers to make sure we're preserving everything we should. +-} + -- | An opaque type representing Plutus Core ByteStrings. -newtype BuiltinByteString = BuiltinByteString ByteString - deriving stock (Generic) - deriving newtype (Haskell.Show, Haskell.Eq, Haskell.Ord, Haskell.Semigroup, Haskell.Monoid) - deriving newtype (Hashable, Serialise, NFData, BA.ByteArrayAccess, BA.ByteArray) +data BuiltinByteString + = BuiltinByteString {-# UNPACK #-} !ByteString + | NeverBS_ !Void + deriving stock (Generic, Haskell.Eq, Haskell.Ord) + +instance Haskell.Show BuiltinByteString where + showsPrec d (BuiltinByteString bs) = Haskell.showsPrec d bs + +instance Haskell.Semigroup BuiltinByteString where + BuiltinByteString bs1 <> BuiltinByteString bs2 + = BuiltinByteString $ bs1 Haskell.<> bs2 + +instance Haskell.Monoid BuiltinByteString where + mempty = BuiltinByteString Haskell.mempty instance Pretty BuiltinByteString where pretty = viaShow +instance NFData BuiltinByteString where + rnf (BuiltinByteString bs) = rnf bs + +instance Hashable BuiltinByteString where + hashWithSalt s (BuiltinByteString bs) = hashWithSalt s bs + +instance BA.ByteArrayAccess BuiltinByteString where + length (BuiltinByteString bs) = BA.length bs + withByteArray (BuiltinByteString bs) = BA.withByteArray bs + copyByteArrayToPtr (BuiltinByteString bs) = BA.copyByteArrayToPtr bs + +instance Serialise BuiltinByteString where + encode (BuiltinByteString bs) = encode bs + decode = BuiltinByteString <$> decode + +instance BA.ByteArray BuiltinByteString where + allocRet n f = do + (a, bs) <- BA.allocRet n f + pure (a, BuiltinByteString bs) + {-# NOINLINE appendByteString #-} appendByteString :: BuiltinByteString -> BuiltinByteString -> BuiltinByteString appendByteString (BuiltinByteString b1) (BuiltinByteString b2) = BuiltinByteString $ BS.append b1 b2 @@ -217,8 +266,13 @@ decodeUtf8 (BuiltinByteString b) = BuiltinString $ Text.decodeUtf8 b STRING -} -newtype BuiltinString = BuiltinString Text - deriving newtype (Haskell.Show, Haskell.Eq, Haskell.Ord) +data BuiltinString + = BuiltinString {-# UNPACK #-} !Text + | NeverS_ !Void + deriving stock (Haskell.Eq, Haskell.Ord) + +instance Haskell.Show BuiltinString where + showsPrec d (BuiltinString bs) = Haskell.showsPrec d bs {-# NOINLINE appendString #-} appendString :: BuiltinString -> BuiltinString -> BuiltinString From 3b09a7a5e10cc1ed2b867a36e0ea1214c1862d27 Mon Sep 17 00:00:00 2001 From: David Feuer Date: Thu, 11 Nov 2021 19:21:15 -0500 Subject: [PATCH 2/5] Fix up Generic instance --- plutus-tx/src/PlutusTx/Builtins/Internal.hs | 55 ++++++++++++++------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/plutus-tx/src/PlutusTx/Builtins/Internal.hs b/plutus-tx/src/PlutusTx/Builtins/Internal.hs index 11982779ca7..7530a06fc09 100644 --- a/plutus-tx/src/PlutusTx/Builtins/Internal.hs +++ b/plutus-tx/src/PlutusTx/Builtins/Internal.hs @@ -1,8 +1,11 @@ -{-# LANGUAGE DeriveAnyClass #-} -{-# LANGUAGE DerivingStrategies #-} -{-# LANGUAGE ImportQualifiedPost #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE UndecidableInstances #-} -- We don't want builtins getting torn apart. {-# OPTIONS_GHC -fno-cpr-anal #-} @@ -14,7 +17,7 @@ module PlutusTx.Builtins.Internal where import Codec.Serialise -import Control.DeepSeq (NFData (..)) +import Control.DeepSeq (NFData) import Crypto qualified import Data.ByteArray qualified as BA import Data.ByteString as BS @@ -26,7 +29,7 @@ import Data.Text as Text (Text, empty) import Data.Text.Encoding as Text (decodeUtf8, encodeUtf8) import Data.Void (Void) import GHC.Exts (lazy) -import GHC.Generics (Generic) +import GHC.Generics import PlutusCore.Data qualified as PLC import PlutusTx.Utils (mustBeReplaced) import Prettyprinter (Pretty (..), viaShow) @@ -158,7 +161,7 @@ BYTESTRING GHC's worker/wrapper transformation and CPR analysis really like to tear product types apart. Usually, that's a good thing. But for our purposes, it's -not. We want to see the actual `BuiltInByteString` or `BuiltInString`, not its +not. We want to see the actual `BuiltinByteString` or `BuiltinString`, not its constituent parts. So we turn these product types into sum types, by adding constructors that can never actually be applied. Turning off CPR analysis in this module and using `NOINLINE` where appropriate then seems to be sufficient @@ -170,7 +173,31 @@ generated workers to make sure we're preserving everything we should. data BuiltinByteString = BuiltinByteString {-# UNPACK #-} !ByteString | NeverBS_ !Void - deriving stock (Generic, Haskell.Eq, Haskell.Ord) + deriving stock (Haskell.Eq, Haskell.Ord) + deriving anyclass (NFData, Hashable, Serialise) + +-- This is used for a dirty hack to produce the `Generic` instance +-- we want. +data FakeBuiltinByteString = FakeBuiltinByteString {-# UNPACK #-} !ByteString + deriving stock Generic + +type family TwiddleBS fake where + TwiddleBS (D1 ('MetaData _type_name mod_name pkg_name newtypeness) + (C1 ('MetaCons _con_name x y) + z)) = + D1 ('MetaData "BuiltinByteString" mod_name pkg_name newtypeness) + (C1 ('MetaCons "BuiltinByteString" x y) + z) + +-- We define a custom Generic instance because we want any generic code working +-- with `BuiltinByteString` to see it as a single-constructor type. In +-- particular, any serialization should be done without a constructor tag. +instance Generic BuiltinByteString where + type Rep BuiltinByteString = TwiddleBS (Rep FakeBuiltinByteString) + {-# INLINE to #-} + to (M1 (M1 (M1 (K1 bs)))) = BuiltinByteString bs + {-# INLINE from #-} + from (BuiltinByteString bs) = M1 (M1 (M1 (K1 bs))) instance Haskell.Show BuiltinByteString where showsPrec d (BuiltinByteString bs) = Haskell.showsPrec d bs @@ -185,21 +212,11 @@ instance Haskell.Monoid BuiltinByteString where instance Pretty BuiltinByteString where pretty = viaShow -instance NFData BuiltinByteString where - rnf (BuiltinByteString bs) = rnf bs - -instance Hashable BuiltinByteString where - hashWithSalt s (BuiltinByteString bs) = hashWithSalt s bs - instance BA.ByteArrayAccess BuiltinByteString where length (BuiltinByteString bs) = BA.length bs withByteArray (BuiltinByteString bs) = BA.withByteArray bs copyByteArrayToPtr (BuiltinByteString bs) = BA.copyByteArrayToPtr bs -instance Serialise BuiltinByteString where - encode (BuiltinByteString bs) = encode bs - decode = BuiltinByteString <$> decode - instance BA.ByteArray BuiltinByteString where allocRet n f = do (a, bs) <- BA.allocRet n f From 9b6c5000f04d88e02e4bdd276d9d8ef3935453c8 Mon Sep 17 00:00:00 2001 From: David Feuer Date: Thu, 11 Nov 2021 22:24:12 -0500 Subject: [PATCH 3/5] Take care of GHC.Exts.lazy `GHC.Exts.lazy`, like `GHC.Exts.noinline`, is a magical function with no unfolding. GHC erases it in Core Prep, which happens after we interrupt GHC's compilation process, so we have to erase it ourselves. We also need to `NOINLINE` the definition of `error`, because that's what the Plutus compiler is looking for. --- plutus-tx-plugin/src/PlutusTx/Compiler/Expr.hs | 9 +++++++-- plutus-tx/src/PlutusTx/Builtins/Internal.hs | 1 + stubs/plutus-ghc-stub/src/StubTypes.hs | 4 ++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/plutus-tx-plugin/src/PlutusTx/Compiler/Expr.hs b/plutus-tx-plugin/src/PlutusTx/Compiler/Expr.hs index 12378e211b6..b4fa8be9beb 100644 --- a/plutus-tx-plugin/src/PlutusTx/Compiler/Expr.hs +++ b/plutus-tx-plugin/src/PlutusTx/Compiler/Expr.hs @@ -125,7 +125,10 @@ stringExprContent = \case -- in question we will strip this off anyway. strip :: GHC.CoreExpr -> GHC.CoreExpr strip = \case - GHC.Var n `GHC.App` GHC.Type _ `GHC.App` expr | GHC.getName n == GHC.noinlineIdName -> strip expr + GHC.Var n `GHC.App` GHC.Type _ `GHC.App` expr + | GHC.getName n == GHC.noinlineIdName || + GHC.hasKey n GHC.lazyIdKey + -> strip expr GHC.Tick _ expr -> strip expr expr -> expr @@ -534,7 +537,9 @@ compileExpr e = withContextM 2 (sdToTxt $ "Compiling expr:" GHC.<+> GHC.ppr e) $ -- Ignore the magic 'noinline' function, it's the identity but has no unfolding. -- See Note [noinline hack] - GHC.Var n `GHC.App` GHC.Type _ `GHC.App` arg | GHC.getName n == GHC.noinlineIdName -> compileExpr arg + GHC.Var n `GHC.App` GHC.Type _ `GHC.App` arg + | GHC.getName n == GHC.noinlineIdName || + GHC.hasKey n GHC.lazyIdKey -> compileExpr arg -- See note [GHC runtime errors] -- diff --git a/plutus-tx/src/PlutusTx/Builtins/Internal.hs b/plutus-tx/src/PlutusTx/Builtins/Internal.hs index 7530a06fc09..601b0371335 100644 --- a/plutus-tx/src/PlutusTx/Builtins/Internal.hs +++ b/plutus-tx/src/PlutusTx/Builtins/Internal.hs @@ -70,6 +70,7 @@ it is a problem. So we just expose the delayed version as the builtin. -- We use GHC.Exts.lazy here so GHC can't see that the result is always bottom. -- I (David Feuer) don't really understand why this is needed, but it seems to -- be desired. +{-# NOINLINE error #-} error :: BuiltinUnit -> a error (BuiltinUnit ()) = lazy (mustBeReplaced "error") diff --git a/stubs/plutus-ghc-stub/src/StubTypes.hs b/stubs/plutus-ghc-stub/src/StubTypes.hs index 1787b051c5a..7b09dc64c52 100644 --- a/stubs/plutus-ghc-stub/src/StubTypes.hs +++ b/stubs/plutus-ghc-stub/src/StubTypes.hs @@ -16,6 +16,7 @@ import Language.Haskell.TH qualified as TH data DynFlags = DynFlags_ type FamInstEnvs = (FamInstEnv, FamInstEnv) +data Unique = Unique_ deriving (Eq, Ord, Outputable, Data) data Name = Name_ deriving (Eq, Ord, Outputable, Data) data OccName = OccName_ deriving (Eq, Ord) data Module = Module_ deriving (Eq, Ord) @@ -678,6 +679,9 @@ charTyConName = undefined noinlineIdName :: Name noinlineIdName = undefined +lazyIdKey :: Unique +lazyIdKey = undefined + nilDataCon :: DataCon nilDataCon = undefined From 4a8caeba05f71d339bed246732ffaf415d7fa34a Mon Sep 17 00:00:00 2001 From: David Feuer Date: Fri, 12 Nov 2021 17:25:08 -0500 Subject: [PATCH 4/5] More comments --- plutus-tx/src/PlutusTx/Builtins/Internal.hs | 51 ++++++++++++++++----- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/plutus-tx/src/PlutusTx/Builtins/Internal.hs b/plutus-tx/src/PlutusTx/Builtins/Internal.hs index 601b0371335..166ff7dc3ed 100644 --- a/plutus-tx/src/PlutusTx/Builtins/Internal.hs +++ b/plutus-tx/src/PlutusTx/Builtins/Internal.hs @@ -7,7 +7,9 @@ {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE UndecidableInstances #-} --- We don't want builtins getting torn apart. +-- We don't want builtins getting torn apart. Enabling CPR analysis does that, +-- so we have to disable it. We leave worker/wrapper enabled, because that's +-- needed to give good compiled code here for off-chain use. {-# OPTIONS_GHC -fno-cpr-anal #-} -- | This module contains the special Haskell names that are used to map to builtin types or functions @@ -168,17 +170,36 @@ constructors that can never actually be applied. Turning off CPR analysis in this module and using `NOINLINE` where appropriate then seems to be sufficient to prevent that. Anyone changing this stuff should be careful to inspect the generated workers to make sure we're preserving everything we should. + +Note: because the Void field in the NeverBS_ and NeverS_ constructors is +*strict*, GHC's pattern coverage checker recognizes that those constructors +aren't needed for complete pattern matches. -} -- | An opaque type representing Plutus Core ByteStrings. data BuiltinByteString + -- See Note [Silly sum types] + -- This type *must* be defined in the same module as FakeBuiltinByteString. + -- See Note [Building a custom Generic instance] for an explanation of that. = BuiltinByteString {-# UNPACK #-} !ByteString | NeverBS_ !Void deriving stock (Haskell.Eq, Haskell.Ord) deriving anyclass (NFData, Hashable, Serialise) --- This is used for a dirty hack to produce the `Generic` instance --- we want. +-- We define a custom Generic instance because we want any generic code working +-- with `BuiltinByteString` to see it as a single-constructor type. Most +-- importantly, any serialization should be done without a constructor tag. +-- +-- See Note [Building a custom Generic instance] +instance Generic BuiltinByteString where + type Rep BuiltinByteString = TwiddleBS (Rep FakeBuiltinByteString) + {-# INLINE to #-} + to (M1 (M1 (M1 (K1 bs)))) = BuiltinByteString bs + {-# INLINE from #-} + from (BuiltinByteString bs) = M1 (M1 (M1 (K1 bs))) + +-- See Note [Building a custom Generic instance] +-- This type *must* be defined in the same module as BuiltinByteString. data FakeBuiltinByteString = FakeBuiltinByteString {-# UNPACK #-} !ByteString deriving stock Generic @@ -190,15 +211,20 @@ type family TwiddleBS fake where (C1 ('MetaCons "BuiltinByteString" x y) z) --- We define a custom Generic instance because we want any generic code working --- with `BuiltinByteString` to see it as a single-constructor type. In --- particular, any serialization should be done without a constructor tag. -instance Generic BuiltinByteString where - type Rep BuiltinByteString = TwiddleBS (Rep FakeBuiltinByteString) - {-# INLINE to #-} - to (M1 (M1 (M1 (K1 bs)))) = BuiltinByteString bs - {-# INLINE from #-} - from (BuiltinByteString bs) = M1 (M1 (M1 (K1 bs))) +{- Note [Building a custom Generic instance] + +It's really easy to write custom `to` and `from` methods for `Generic`. The +trickier part is getting the `Rep` right. The `Rep` includes metadata about +strictness, unpacking, constructor names, record selectors, etc. Some aspects +are difficult or impossible to get right manually, particularly the exact +package name and the "decided strictness". So instead of building the `Rep` +from scratch, we instead *edit* one that GHC builds us for a type, +FakeBuiltinByteString, that lives in the same module and looks almost identical +to the one we're trying to imitate. All we have to do is use a type family, +TwiddleBS, to edit the FakeBuiltinByteString to replace its type name and +constructor name with those of BuiltinByteString. All the other structural +information and metadata are copied over unchanged. +-} instance Haskell.Show BuiltinByteString where showsPrec d (BuiltinByteString bs) = Haskell.showsPrec d bs @@ -285,6 +311,7 @@ STRING -} data BuiltinString + -- See Note [Silly sum types] = BuiltinString {-# UNPACK #-} !Text | NeverS_ !Void deriving stock (Haskell.Eq, Haskell.Ord) From a35134b3e85eb4be026091109574c4ec8d24a541 Mon Sep 17 00:00:00 2001 From: David Feuer Date: Sun, 14 Nov 2021 19:30:29 -0500 Subject: [PATCH 5/5] Don't allow demand analysis or w/w for error --- plutus-tx/src/PlutusTx/Builtins/Internal.hs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/plutus-tx/src/PlutusTx/Builtins/Internal.hs b/plutus-tx/src/PlutusTx/Builtins/Internal.hs index 166ff7dc3ed..53a658ddc11 100644 --- a/plutus-tx/src/PlutusTx/Builtins/Internal.hs +++ b/plutus-tx/src/PlutusTx/Builtins/Internal.hs @@ -71,10 +71,11 @@ it is a problem. So we just expose the delayed version as the builtin. -- We use GHC.Exts.lazy here so GHC can't see that the result is always bottom. -- I (David Feuer) don't really understand why this is needed, but it seems to --- be desired. +-- be desired. We also don't allow this to be seen as strict in the unit, +-- because that would lead to a worker-wrapper transformation of `error`. {-# NOINLINE error #-} error :: BuiltinUnit -> a -error (BuiltinUnit ()) = lazy (mustBeReplaced "error") +error u = lazy u `seq` lazy (mustBeReplaced "error") {- BOOL @@ -106,9 +107,9 @@ unitval = BuiltinUnit () {-# NOINLINE chooseUnit #-} chooseUnit :: BuiltinUnit -> a -> a --- For tracing purposes, this is defined like pseq; it's strict in the unit --- value but (analyzed as) lazy in the interesting value. -chooseUnit (BuiltinUnit ()) a = lazy a +-- We allow only arity analysis of this function. No +-- demand analysis. +chooseUnit u a = lazy u `seq` lazy a {- INTEGER