Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make BuiltinByteString and BuiltInString more opaque #4206

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions plutus-tx-plugin/src/PlutusTx/Compiler/Expr.hs
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,10 @@ stringExprContent = \case
-- in question we will strip this off anyway.
strip :: GHC.CoreExpr -> GHC.CoreExpr
strip = \case
GHC.Var n `GHC.App` GHC.Type _ `GHC.App` expr | GHC.getName n == GHC.noinlineIdName -> strip expr
GHC.Var n `GHC.App` GHC.Type _ `GHC.App` expr
| GHC.getName n == GHC.noinlineIdName ||
GHC.hasKey n GHC.lazyIdKey
-> strip expr
GHC.Tick _ expr -> strip expr
expr -> expr

Expand Down Expand Up @@ -534,7 +537,9 @@ compileExpr e = withContextM 2 (sdToTxt $ "Compiling expr:" GHC.<+> GHC.ppr e) $

-- Ignore the magic 'noinline' function, it's the identity but has no unfolding.
-- See Note [noinline hack]
GHC.Var n `GHC.App` GHC.Type _ `GHC.App` arg | GHC.getName n == GHC.noinlineIdName -> compileExpr arg
GHC.Var n `GHC.App` GHC.Type _ `GHC.App` arg
| GHC.getName n == GHC.noinlineIdName ||
GHC.hasKey n GHC.lazyIdKey -> compileExpr arg

-- See note [GHC runtime errors]
-- <error func> <runtime rep> <overall type> <call stack> <message>
Expand Down
135 changes: 117 additions & 18 deletions plutus-tx/src/PlutusTx/Builtins/Internal.hs
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
-- This ensures that we don't put *anything* about these functions into the interface
-- file, otherwise GHC can be clever about the ones that are always error, even though
-- they're NOINLINE!
{-# OPTIONS_GHC -O0 #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UndecidableInstances #-}

-- We don't want builtins getting torn apart. Enabling CPR analysis does that,
-- so we have to disable it. We leave worker/wrapper enabled, because that's
-- needed to give good compiled code here for off-chain use.
{-# OPTIONS_GHC -fno-cpr-anal #-}

-- | This module contains the special Haskell names that are used to map to builtin types or functions
-- in Plutus Core.
--
Expand All @@ -19,11 +25,13 @@ import Data.ByteArray qualified as BA
import Data.ByteString as BS
import Data.ByteString.Hash qualified as Hash
import Data.Coerce (coerce)
import Data.Hashable (Hashable)
import Data.Hashable (Hashable (..))
import Data.Maybe (fromMaybe)
import Data.Text as Text (Text, empty)
import Data.Text.Encoding as Text (decodeUtf8, encodeUtf8)
import GHC.Generics (Generic)
import Data.Void (Void)
import GHC.Exts (lazy)
import GHC.Generics
import PlutusCore.Data qualified as PLC
import PlutusTx.Utils (mustBeReplaced)
import Prettyprinter (Pretty (..), viaShow)
Expand Down Expand Up @@ -61,9 +69,12 @@ for most of our functions it's not a *semantic* problem. Here, however,
it is a problem. So we just expose the delayed version as the builtin.
-}

-- We use GHC.Exts.lazy here so GHC can't see that the result is always bottom.
-- I (David Feuer) don't really understand why this is needed, but it seems to
-- be desired.
{-# NOINLINE error #-}
error :: BuiltinUnit -> a
error = mustBeReplaced "error"
error (BuiltinUnit ()) = lazy (mustBeReplaced "error")

{-
BOOL
Expand Down Expand Up @@ -95,7 +106,9 @@ unitval = BuiltinUnit ()

{-# NOINLINE chooseUnit #-}
chooseUnit :: BuiltinUnit -> a -> a
chooseUnit (BuiltinUnit ()) a = a
-- For tracing purposes, this is defined like pseq; it's strict in the unit
-- value but (analyzed as) lazy in the interesting value.
chooseUnit (BuiltinUnit ()) a = lazy a

{-
INTEGER
Expand Down Expand Up @@ -147,15 +160,95 @@ equalsInteger = coerce ((==) @Integer)
BYTESTRING
-}

{- Note [Silly sum types]

GHC's worker/wrapper transformation and CPR analysis really like to tear
product types apart. Usually, that's a good thing. But for our purposes, it's
not. We want to see the actual `BuiltinByteString` or `BuiltinString`, not its
constituent parts. So we turn these product types into sum types, by adding
constructors that can never actually be applied. Turning off CPR analysis in
this module and using `NOINLINE` where appropriate then seems to be sufficient
to prevent that. Anyone changing this stuff should be careful to inspect the
generated workers to make sure we're preserving everything we should.

Note: because the Void field in the NeverBS_ and NeverS_ constructors is
*strict*, GHC's pattern coverage checker recognizes that those constructors
aren't needed for complete pattern matches.
-}

-- | An opaque type representing Plutus Core ByteStrings.
newtype BuiltinByteString = BuiltinByteString ByteString
deriving stock (Generic)
deriving newtype (Haskell.Show, Haskell.Eq, Haskell.Ord, Haskell.Semigroup, Haskell.Monoid)
deriving newtype (Hashable, Serialise, NFData, BA.ByteArrayAccess, BA.ByteArray)
data BuiltinByteString
-- See Note [Silly sum types]
-- This type *must* be defined in the same module as FakeBuiltinByteString.
-- See Note [Building a custom Generic instance] for an explanation of that.
= BuiltinByteString {-# UNPACK #-} !ByteString
| NeverBS_ !Void
deriving stock (Haskell.Eq, Haskell.Ord)
deriving anyclass (NFData, Hashable, Serialise)

-- We define a custom Generic instance because we want any generic code working
-- with `BuiltinByteString` to see it as a single-constructor type. Most
-- importantly, any serialization should be done without a constructor tag.
--
-- See Note [Building a custom Generic instance]
instance Generic BuiltinByteString where
type Rep BuiltinByteString = TwiddleBS (Rep FakeBuiltinByteString)
{-# INLINE to #-}
to (M1 (M1 (M1 (K1 bs)))) = BuiltinByteString bs
{-# INLINE from #-}
from (BuiltinByteString bs) = M1 (M1 (M1 (K1 bs)))

-- See Note [Building a custom Generic instance]
-- This type *must* be defined in the same module as BuiltinByteString.
data FakeBuiltinByteString = FakeBuiltinByteString {-# UNPACK #-} !ByteString
deriving stock Generic

type family TwiddleBS fake where
TwiddleBS (D1 ('MetaData _type_name mod_name pkg_name newtypeness)
(C1 ('MetaCons _con_name x y)
z)) =
D1 ('MetaData "BuiltinByteString" mod_name pkg_name newtypeness)
(C1 ('MetaCons "BuiltinByteString" x y)
z)

{- Note [Building a custom Generic instance]

It's really easy to write custom `to` and `from` methods for `Generic`. The
trickier part is getting the `Rep` right. The `Rep` includes metadata about
strictness, unpacking, constructor names, record selectors, etc. Some aspects
are difficult or impossible to get right manually, particularly the exact
package name and the "decided strictness". So instead of building the `Rep`
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can get the "package name" (actually the unit-id) with CURRENT_PACKAGE_KEY CPP macro.
OTOH using the fake type with generic trick is quite clever :)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@phadej, using a fake type is the only way I know to be sure the DecidedStrictness is right no matter how the module is compiled, and I think it's easier to see that the construction is correct than it would be to do it all by hand anyway. Copying the module name from the fake type makes it robust against module renaming too. For the sake of my curiosity, what is a unit-id?

Copy link

@phadej phadej Nov 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is a unit-id

It's a difficult question to answer exhaustively, but a short version is:

unit-ids are the identifiers libraries (or more generally units) are registered with in package db (or more generally identified by GHC). Package id (which is package version + package number) is metadata used by humans. For ghc boot libraries unit-ids and package-ids are the same, for cabal installed packages (in store) they are different:

For example:

% head -n 10 /cabal/store/ghc-9.0.1/package.db/aeson-2.0.2.0-ceb356429ca3538b1da748cbb4d8187275f311ecac1e7bd3d68bd0e65c65d68a.conf
name:                 aeson
version:              2.0.2.0
visibility:           public
id:
    aeson-2.0.2.0-ceb356429ca3538b1da748cbb4d8187275f311ecac1e7bd3d68bd0e65c65d68a

key:
    aeson-2.0.2.0-ceb356429ca3538b1da748cbb4d8187275f311ecac1e7bd3d68bd0e65c65d68a

license:              BSD-3-Clause

(key was the predecessor of unit-ids before Backpack, IIRC, now there for compatibility reasons).

Unit-ids may be arbitrary strings (restricted alphabet), they don't need to start with package-id, it's there to help debugging.

from scratch, we instead *edit* one that GHC builds us for a type,
FakeBuiltinByteString, that lives in the same module and looks almost identical
to the one we're trying to imitate. All we have to do is use a type family,
TwiddleBS, to edit the FakeBuiltinByteString to replace its type name and
constructor name with those of BuiltinByteString. All the other structural
information and metadata are copied over unchanged.
-}

instance Haskell.Show BuiltinByteString where
showsPrec d (BuiltinByteString bs) = Haskell.showsPrec d bs

instance Haskell.Semigroup BuiltinByteString where
BuiltinByteString bs1 <> BuiltinByteString bs2
= BuiltinByteString $ bs1 Haskell.<> bs2

instance Haskell.Monoid BuiltinByteString where
mempty = BuiltinByteString Haskell.mempty

instance Pretty BuiltinByteString where
pretty = viaShow

instance BA.ByteArrayAccess BuiltinByteString where
length (BuiltinByteString bs) = BA.length bs
withByteArray (BuiltinByteString bs) = BA.withByteArray bs
copyByteArrayToPtr (BuiltinByteString bs) = BA.copyByteArrayToPtr bs

instance BA.ByteArray BuiltinByteString where
allocRet n f = do
(a, bs) <- BA.allocRet n f
pure (a, BuiltinByteString bs)

{-# NOINLINE appendByteString #-}
appendByteString :: BuiltinByteString -> BuiltinByteString -> BuiltinByteString
appendByteString (BuiltinByteString b1) (BuiltinByteString b2) = BuiltinByteString $ BS.append b1 b2
Expand Down Expand Up @@ -217,8 +310,14 @@ decodeUtf8 (BuiltinByteString b) = BuiltinString $ Text.decodeUtf8 b
STRING
-}

newtype BuiltinString = BuiltinString Text
deriving newtype (Haskell.Show, Haskell.Eq, Haskell.Ord)
data BuiltinString
-- See Note [Silly sum types]
= BuiltinString {-# UNPACK #-} !Text
| NeverS_ !Void
deriving stock (Haskell.Eq, Haskell.Ord)

instance Haskell.Show BuiltinString where
showsPrec d (BuiltinString bs) = Haskell.showsPrec d bs

{-# NOINLINE appendString #-}
appendString :: BuiltinString -> BuiltinString -> BuiltinString
Expand Down
4 changes: 4 additions & 0 deletions stubs/plutus-ghc-stub/src/StubTypes.hs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import Language.Haskell.TH qualified as TH

data DynFlags = DynFlags_
type FamInstEnvs = (FamInstEnv, FamInstEnv)
data Unique = Unique_ deriving (Eq, Ord, Outputable, Data)
data Name = Name_ deriving (Eq, Ord, Outputable, Data)
data OccName = OccName_ deriving (Eq, Ord)
data Module = Module_ deriving (Eq, Ord)
Expand Down Expand Up @@ -678,6 +679,9 @@ charTyConName = undefined
noinlineIdName :: Name
noinlineIdName = undefined

lazyIdKey :: Unique
lazyIdKey = undefined

nilDataCon :: DataCon
nilDataCon = undefined

Expand Down