Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(cmd): Add offline pruning of state trie. #1564

Merged
merged 18 commits into from
May 20, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions cmd/gossamer/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,29 @@ var (
}
)

// State Prune flags
var (
// BloomFilterSizeFlag size for bloom filter, valid for the use with prune-state subcommand
BloomFilterSizeFlag = cli.IntFlag{
Name: "bloom-size",
Usage: "Megabytes of memory allocated to bloom-filter for pruning",
Value: 2048,
}

// DBPathFlag data directory for pruned DB, valid for the use with prune-state subcommand
DBPathFlag = cli.StringFlag{
Name: "pruned-db-path",
Usage: "Data directory for the output DB",
}
noot marked this conversation as resolved.
Show resolved Hide resolved

// RetainBlockNumberFlag retain number of block from latest block while pruning, valid for the use with prune-state subcommand
RetainBlockNumberFlag = cli.IntFlag{
Name: "retain-block",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe name retain-blocks otherwise it could be interpreted as what block number to retain

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Usage: "Retain number of block from latest block while pruning",
Value: 256,
}
)

// flag sets that are shared by multiple commands
var (
// GlobalFlags are flags that are valid for use with the root command and all subcommands
Expand Down Expand Up @@ -354,6 +377,13 @@ var (
HeaderFlag,
FirstSlotFlag,
}

PruningFlags = []cli.Flag{
BasePathFlag,
BloomFilterSizeFlag,
DBPathFlag,
RetainBlockNumberFlag,
}
)

// FixFlagOrder allow us to use various flag order formats (ie, `gossamer init
Expand Down
55 changes: 55 additions & 0 deletions cmd/gossamer/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"os"

"github.com/ChainSafe/gossamer/dot"
"github.com/ChainSafe/gossamer/dot/state"
"github.com/ChainSafe/gossamer/lib/keystore"
"github.com/ChainSafe/gossamer/lib/utils"
log "github.com/ChainSafe/log15"
Expand All @@ -35,6 +36,7 @@ const (
buildSpecCommandName = "build-spec"
importRuntimeCommandName = "import-runtime"
importStateCommandName = "import-state"
pruningStateCommandName = "prune-state"
)

// app is the cli application
Expand Down Expand Up @@ -115,6 +117,18 @@ var (
"Input can be generated by using the RPC function state_getPairs.\n" +
"\tUsage: gossamer import-state --state state.json --header header.json --first-slot <first slot of network>\n",
}

pruningCommand = cli.Command{
Action: FixFlagOrder(pruneState),
Name: pruningStateCommandName,
Usage: "Prune state will prune the state trie",
ArgsUsage: "",
Flags: PruningFlags,
Description: `prune-state <retain-block> will prune historical state data.
All trie nodes that do not belong to the specified version state will be deleted from the database.

The default pruning target is the HEAD-256 state`,
}
)

// init initialises the cli application
Expand All @@ -132,6 +146,7 @@ func init() {
buildSpecCommand,
importRuntimeCommand,
importStateCommand,
pruningCommand,
}
app.Flags = RootFlags
}
Expand Down Expand Up @@ -411,3 +426,43 @@ func buildSpecAction(ctx *cli.Context) error {

return nil
}

func pruneState(ctx *cli.Context) error {
inputDBPath := ctx.GlobalString(BasePathFlag.Name)
if inputDBPath == "" {
inputDBPath = dot.GssmrConfig().Global.BasePath
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the user should be able to specify the chain with --chain and then it should use the basepath based on the chain

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


bloomSize := ctx.Uint64(BloomFilterSizeFlag.Name)
retainBlocks := ctx.Int64(RetainBlockNumberFlag.Name)

pruner, err := state.NewPruner(inputDBPath, bloomSize, retainBlocks)
if err != nil {
return err
}

logger.Info("Pruner initialised")

err = pruner.SetBloomFilter()
if err != nil {
return fmt.Errorf("failed to set keys into bloom filter %w", err)
}

// close input DB so we can open reopen it for streaming,
err = pruner.InputDB.Close()
if err != nil {
return fmt.Errorf("failed to closed input db %w", err)
}

prunedDBPath := ctx.String(DBPathFlag.Name)
if prunedDBPath == "" {
return fmt.Errorf("path not specified for badger db")
}

err = pruner.Prune(inputDBPath, prunedDBPath)
if err != nil {
return fmt.Errorf("failed to prune %w", err)
}

return nil
}
1 change: 0 additions & 1 deletion cmd/gossamer/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (

"github.com/ChainSafe/gossamer/dot"
"github.com/ChainSafe/gossamer/lib/utils"

log "github.com/ChainSafe/log15"
"github.com/stretchr/testify/require"
"github.com/urfave/cli"
Expand Down
59 changes: 59 additions & 0 deletions dot/state/bloom.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package state

import (
"encoding/binary"
"errors"

"github.com/ChainSafe/gossamer/lib/common"
log "github.com/ChainSafe/log15"
bloomfilter "github.com/holiman/bloomfilter/v2"
)

// ErrKeySize is returned when key size does not fit
var ErrKeySize = errors.New("cannot have nil keystore")

type stateBloomHasher []byte

func (f stateBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
func (f stateBloomHasher) Sum(b []byte) []byte { panic("not implemented") }
func (f stateBloomHasher) Reset() { panic("not implemented") }
func (f stateBloomHasher) BlockSize() int { panic("not implemented") }
func (f stateBloomHasher) Size() int { return 8 }
func (f stateBloomHasher) Sum64() uint64 { return binary.BigEndian.Uint64(f) }

// stateBloom is a wrapper for bloom filter.
// The keys of all generated entries will be recorded here so that in the pruning
// stage the entries belong to the specific version can be avoided for deletion.
type stateBloom struct {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
type stateBloom struct {
type bloomState struct {

This isn't a public type, but still maybe try to avoid the stutter? state.stateBloom vs state.bloomState.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

bloom *bloomfilter.Filter
}

// newStateBloomWithSize creates a brand new state bloom for state generation
// The bloom filter will be created by the passing bloom filter size. the parameters
// are picked so that the false-positive rate for mainnet is low enough.
func newStateBloomWithSize(size uint64) (*stateBloom, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
func newStateBloomWithSize(size uint64) (*stateBloom, error) {
func newStateBloom(size uint64) (*stateBloom, error) {

I think the named parameter denotes that the caller will need to provide size with the standard constructor pattern.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

bloom, err := bloomfilter.New(size*1024*1024*8, 4)
if err != nil {
return nil, err
}
log.Info("initialised state bloom", "size", float64(bloom.M()/8))
return &stateBloom{bloom: bloom}, nil
}

// put writes key to bloom filter
func (sb *stateBloom) put(key []byte) error {
if len(key) != common.HashLength {
return ErrKeySize
}

sb.bloom.Add(stateBloomHasher(key))
return nil
}

// contain is the wrapper of the underlying contains function which
// reports whether the key is contained.
// - If it says yes, the key may be contained
// - If it says no, the key is definitely not contained.
func (sb *stateBloom) contain(key []byte) bool {
return sb.bloom.Contains(stateBloomHasher(key))
}
4 changes: 2 additions & 2 deletions dot/state/initialize.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func (s *Service) Initialise(gen *genesis.Genesis, header *types.Header, t *trie
return fmt.Errorf("failed to clear database: %s", err)
}

if err = t.Store(chaindb.NewTable(db, storagePrefix)); err != nil {
if err = t.Store(chaindb.NewTable(db, StoragePrefix)); err != nil {
return fmt.Errorf("failed to write genesis trie to database: %w", err)
}

Expand Down Expand Up @@ -166,7 +166,7 @@ func loadGrandpaAuthorities(t *trie.Trie) ([]*types.GrandpaVoter, error) {
// storeInitialValues writes initial genesis values to the state database
func (s *Service) storeInitialValues(data *genesis.Data, header *types.Header, t *trie.Trie) error {
// write genesis trie to database
if err := t.Store(chaindb.NewTable(s.db, storagePrefix)); err != nil {
if err := t.Store(chaindb.NewTable(s.db, StoragePrefix)); err != nil {
return fmt.Errorf("failed to write trie to database: %s", err)
}

Expand Down
Loading