From b8bad314edf4ba00dec29982337bca844960a028 Mon Sep 17 00:00:00 2001 From: Fynn Date: Mon, 25 Sep 2023 11:16:36 +0800 Subject: [PATCH] cmd/geth: add hash2path & trie get tools --- cmd/geth/dbcmd.go | 267 +++++++++++++++++++++++++++++++++++++++++ core/rawdb/database.go | 22 ++++ trie/hbss2pbss.go | 243 +++++++++++++++++++++++++++++++++++++ trie/node.go | 5 + 4 files changed, 537 insertions(+) create mode 100644 trie/hbss2pbss.go diff --git a/cmd/geth/dbcmd.go b/cmd/geth/dbcmd.go index b8fa365049..1962fd60f8 100644 --- a/cmd/geth/dbcmd.go +++ b/cmd/geth/dbcmd.go @@ -19,6 +19,7 @@ package main import ( "bytes" "fmt" + "math" "os" "os/signal" "path/filepath" @@ -72,6 +73,10 @@ Remove blockchain and state databases`, // no legacy stored receipts for bsc // dbMigrateFreezerCmd, dbCheckStateContentCmd, + dbHbss2PbssCmd, + dbPruneHashTrieCmd, + dbTrieGetCmd, + dbTrieDeleteCmd, }, } dbInspectCmd = &cli.Command{ @@ -94,6 +99,54 @@ Remove blockchain and state databases`, For each trie node encountered, it checks that the key corresponds to the keccak256(value). If this is not true, this indicates a data corruption.`, } + dbHbss2PbssCmd = &cli.Command{ + Action: hbss2pbss, + Name: "hbss-to-pbss", + ArgsUsage: "", + Flags: []cli.Flag{ + utils.DataDirFlag, + utils.SyncModeFlag, + }, + Usage: "Convert Hash-Base to Path-Base trie node.", + Description: `This command iterates the entire trie node database and convert the hash-base node to path-base node.`, + } + dbTrieGetCmd = &cli.Command{ + Action: dbTrieGet, + Name: "trie-get", + Usage: "Show the value of a trie node path key", + ArgsUsage: "[trie owner] ", + Flags: []cli.Flag{ + utils.DataDirFlag, + utils.SyncModeFlag, + utils.MainnetFlag, + utils.StateSchemeFlag, + }, + Description: "This command looks up the specified trie node key from the database.", + } + dbTrieDeleteCmd = &cli.Command{ + Action: dbTrieDelete, + Name: "trie-delete", + Usage: "delete the specify trie node", + ArgsUsage: "[trie owner] | ", + Flags: []cli.Flag{ + utils.DataDirFlag, + utils.SyncModeFlag, + utils.MainnetFlag, + utils.StateSchemeFlag, + }, + Description: "This command delete the specify trie node from the database.", + } + dbPruneHashTrieCmd = &cli.Command{ + Action: pruneHashTrie, + Name: "prune-hash-trie", + ArgsUsage: "", + Flags: []cli.Flag{ + utils.DataDirFlag, + utils.SyncModeFlag, + }, + Usage: "[Caution]Prune all the hash trie node in diskdb", + Description: `This command iterates the entrie kv in leveldb and delete all the hash trie node.`, + } dbStatCmd = &cli.Command{ Action: dbStats, Name: "stats", @@ -433,6 +486,133 @@ func dbGet(ctx *cli.Context) error { return nil } +// dbTrieGet shows the value of a given database key +func dbTrieGet(ctx *cli.Context) error { + if ctx.NArg() < 1 || ctx.NArg() > 2 { + return fmt.Errorf("required arguments: %v", ctx.Command.ArgsUsage) + } + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + db := utils.MakeChainDatabase(ctx, stack, false, false) + defer db.Close() + + scheme := ctx.String(utils.StateSchemeFlag.Name) + if scheme == "" { + scheme = rawdb.HashScheme + } + + if scheme == rawdb.PathScheme { + var ( + pathKey []byte + owner []byte + err error + ) + if ctx.NArg() == 1 { + pathKey, err = hexutil.Decode(ctx.Args().Get(0)) + if err != nil { + log.Info("Could not decode the value", "error", err) + return err + } + nodeVal, hash := rawdb.ReadAccountTrieNode(db, pathKey) + log.Info("TrieGet result ", "PathKey", common.Bytes2Hex(pathKey), "Hash: ", hash, "node: ", trie.NodeString(hash.Bytes(), nodeVal)) + } else if ctx.NArg() == 2 { + owner, err = hexutil.Decode(ctx.Args().Get(0)) + if err != nil { + log.Info("Could not decode the value", "error", err) + return err + } + pathKey, err = hexutil.Decode(ctx.Args().Get(1)) + if err != nil { + log.Info("Could not decode the value", "error", err) + return err + } + + nodeVal, hash := rawdb.ReadStorageTrieNode(db, common.BytesToHash(owner), pathKey) + log.Info("TrieGet result ", "PathKey: ", common.Bytes2Hex(pathKey), "Owner: ", common.BytesToHash(owner), "Hash: ", hash, "node: ", trie.NodeString(hash.Bytes(), nodeVal)) + } + } else if scheme == rawdb.HashScheme { + if ctx.NArg() == 1 { + hashKey, err := hexutil.Decode(ctx.Args().Get(0)) + if err != nil { + log.Info("Could not decode the value", "error", err) + return err + } + val, err := db.Get(hashKey) + if err != nil { + log.Error("db get failed, ", "error: ", err) + return err + } + log.Info("TrieGet result ", "HashKey: ", common.BytesToHash(hashKey), "node: ", trie.NodeString(hashKey, val)) + } else { + log.Error("args too much") + } + } + + return nil +} + +// dbTrieDelete delete the trienode of a given database key +func dbTrieDelete(ctx *cli.Context) error { + if ctx.NArg() < 1 || ctx.NArg() > 2 { + return fmt.Errorf("required arguments: %v", ctx.Command.ArgsUsage) + } + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + db := utils.MakeChainDatabase(ctx, stack, false, false) + defer db.Close() + + scheme := ctx.String(utils.StateSchemeFlag.Name) + if scheme == "" { + scheme = rawdb.HashScheme + } + + if scheme == rawdb.PathScheme { + var ( + pathKey []byte + owner []byte + err error + ) + if ctx.NArg() == 1 { + pathKey, err = hexutil.Decode(ctx.Args().Get(0)) + if err != nil { + log.Info("Could not decode the value", "error", err) + return err + } + rawdb.DeleteAccountTrieNode(db, pathKey) + } else if ctx.NArg() == 2 { + owner, err = hexutil.Decode(ctx.Args().Get(0)) + if err != nil { + log.Info("Could not decode the value", "error", err) + return err + } + pathKey, err = hexutil.Decode(ctx.Args().Get(1)) + if err != nil { + log.Info("Could not decode the value", "error", err) + return err + } + rawdb.DeleteStorageTrieNode(db, common.BytesToHash(owner), pathKey) + } + } else if scheme == rawdb.HashScheme { + if ctx.NArg() == 1 { + hashKey, err := hexutil.Decode(ctx.Args().Get(0)) + if err != nil { + log.Info("Could not decode the value", "error", err) + return err + } + err = db.Delete(hashKey) + if err != nil { + log.Error("db delete failed", "err", err) + return err + } + } else { + log.Error("args too much") + } + } + return nil +} + // dbDelete deletes a key from the database func dbDelete(ctx *cli.Context) error { if ctx.NArg() != 1 { @@ -743,3 +923,90 @@ func showMetaData(ctx *cli.Context) error { table.Render() return nil } + +func hbss2pbss(ctx *cli.Context) error { + if ctx.NArg() > 1 { + return fmt.Errorf("required arguments: %v", ctx.Command.ArgsUsage) + } + + var jobnum uint64 + var err error + if ctx.NArg() == 1 { + jobnum, err = strconv.ParseUint(ctx.Args().Get(0), 10, 64) + if err != nil { + return fmt.Errorf("failed to Parse jobnum, Args[1]: %v, err: %v", ctx.Args().Get(1), err) + } + } else { + // by default + jobnum = 1000 + } + + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + db := utils.MakeChainDatabase(ctx, stack, false, false) + db.Sync() + defer db.Close() + + config := trie.HashDefaults + triedb := trie.NewDatabase(db, config) + triedb.Cap(0) + log.Info("hbss2pbss triedb", "scheme", triedb.Scheme()) + defer triedb.Close() + + headerHash := rawdb.ReadHeadHeaderHash(db) + blockNumber := rawdb.ReadHeaderNumber(db, headerHash) + if blockNumber == nil { + log.Error("read header number failed.") + return fmt.Errorf("read header number failed") + } + + log.Info("hbss2pbss converting", "HeaderHash: ", headerHash.String(), ", blockNumber: ", *blockNumber) + + var headerBlockHash common.Hash + var trieRootHash common.Hash + + if *blockNumber != math.MaxUint64 { + headerBlockHash = rawdb.ReadCanonicalHash(db, *blockNumber) + if headerBlockHash == (common.Hash{}) { + return fmt.Errorf("ReadHeadBlockHash empty hash") + } + blockHeader := rawdb.ReadHeader(db, headerBlockHash, *blockNumber) + trieRootHash = blockHeader.Root + fmt.Println("Canonical Hash: ", headerBlockHash.String(), ", TrieRootHash: ", trieRootHash.String()) + } + if (trieRootHash == common.Hash{}) { + log.Error("Empty root hash") + return fmt.Errorf("Empty root hash.") + } + + id := trie.StateTrieID(trieRootHash) + theTrie, err := trie.New(id, triedb) + if err != nil { + log.Error("fail to new trie tree", "err", err, "rootHash", err, trieRootHash.String()) + return err + } + + h2p, err := trie.NewHbss2Pbss(theTrie, triedb, trieRootHash, *blockNumber, jobnum) + if err != nil { + log.Error("fail to new hash2pbss", "err", err, "rootHash", err, trieRootHash.String()) + return err + } + h2p.Run() + + return nil +} + +func pruneHashTrie(ctx *cli.Context) error { + if ctx.NArg() != 0 { + return fmt.Errorf("required none argument") + } + + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + db := utils.MakeChainDatabase(ctx, stack, false, false) + defer db.Close() + + return rawdb.PruneHashTrieNodeInDataBase(db) +} diff --git a/core/rawdb/database.go b/core/rawdb/database.go index 7188efa6f3..12384a66fa 100644 --- a/core/rawdb/database.go +++ b/core/rawdb/database.go @@ -581,6 +581,28 @@ func AncientInspect(db ethdb.Database) error { return nil } +func PruneHashTrieNodeInDataBase(db ethdb.Database) error { + it := db.NewIterator([]byte{}, []byte{}) + defer it.Release() + + total_num := 0 + for it.Next() { + var key = it.Key() + switch { + case IsLegacyTrieNode(key, it.Value()): + db.Delete(key) + total_num++ + if total_num%100000 == 0 { + log.Info("Pruning ", "Complete progress: ", total_num, "hash-base trie nodes") + } + default: + continue + } + } + log.Info("Pruning ", "Complete progress", total_num, "hash-base trie nodes") + return nil +} + // InspectDatabase traverses the entire database and checks the size // of all different categories of data. func InspectDatabase(db ethdb.Database, keyPrefix, keyStart []byte) error { diff --git a/trie/hbss2pbss.go b/trie/hbss2pbss.go new file mode 100644 index 0000000000..40845bdc1e --- /dev/null +++ b/trie/hbss2pbss.go @@ -0,0 +1,243 @@ +package trie + +import ( + "bytes" + "errors" + "fmt" + "runtime" + "sync" + "sync/atomic" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/trienode" +) + +type Hbss2Pbss struct { + trie *Trie // traverse trie + db *Database + blocknum uint64 + root node // root of triedb + stateRootHash common.Hash + concurrentQueue chan struct{} + totalNum uint64 + wg sync.WaitGroup +} + +const ( + DEFAULT_TRIEDBCACHE_SIZE = 1024 * 1024 * 1024 +) + +// NewHbss2Pbss return a hash2Path obj +func NewHbss2Pbss(tr *Trie, db *Database, stateRootHash common.Hash, blocknum uint64, jobnum uint64) (*Hbss2Pbss, error) { + if tr == nil { + return nil, errors.New("trie is nil") + } + + if tr.root == nil { + return nil, errors.New("trie root is nil") + } + + ins := &Hbss2Pbss{ + trie: tr, + blocknum: blocknum, + db: db, + stateRootHash: stateRootHash, + root: tr.root, + concurrentQueue: make(chan struct{}, jobnum), + wg: sync.WaitGroup{}, + } + + return ins, nil +} + +func (t *Trie) resloveWithoutTrack(n node, prefix []byte) (node, error) { + if n, ok := n.(hashNode); ok { + blob, err := t.reader.node(prefix, common.BytesToHash(n)) + if err != nil { + return nil, err + } + return mustDecodeNode(n, blob), nil + } + return n, nil +} + +func (h2p *Hbss2Pbss) writeNode(pathKey []byte, n *trienode.Node, owner common.Hash) { + if owner == (common.Hash{}) { + rawdb.WriteAccountTrieNode(h2p.db.diskdb, pathKey, n.Blob) + log.Debug("WriteNodes account node, ", "path: ", common.Bytes2Hex(pathKey), "Hash: ", n.Hash, "BlobHash: ", crypto.Keccak256Hash(n.Blob)) + } else { + rawdb.WriteStorageTrieNode(h2p.db.diskdb, owner, pathKey, n.Blob) + log.Debug("WriteNodes storage node, ", "path: ", common.Bytes2Hex(pathKey), "owner: ", owner.String(), "Hash: ", n.Hash, "BlobHash: ", crypto.Keccak256Hash(n.Blob)) + } +} + +// Run statistics, external call +func (h2p *Hbss2Pbss) Run() { + log.Debug("Find Account Trie Tree, rootHash: ", h2p.trie.Hash().String(), "BlockNum: ", h2p.blocknum) + + h2p.ConcurrentTraversal(h2p.trie, h2p.root, []byte{}) + h2p.wg.Wait() + + log.Info("Total complete: %v, go routines Num: %v, h2p concurrentQueue: %v\n", h2p.totalNum, runtime.NumGoroutine(), len(h2p.concurrentQueue)) + + rawdb.WritePersistentStateID(h2p.db.diskdb, h2p.blocknum) + rawdb.WriteStateID(h2p.db.diskdb, h2p.stateRootHash, h2p.blocknum) +} + +func (h2p *Hbss2Pbss) SubConcurrentTraversal(theTrie *Trie, theNode node, path []byte) { + h2p.concurrentQueue <- struct{}{} + h2p.ConcurrentTraversal(theTrie, theNode, path) + <-h2p.concurrentQueue + h2p.wg.Done() +} + +func (h2p *Hbss2Pbss) ConcurrentTraversal(theTrie *Trie, theNode node, path []byte) { + total_num := uint64(0) + // nil node + if theNode == nil { + return + } + + switch current := (theNode).(type) { + case *shortNode: + collapsed := current.copy() + collapsed.Key = hexToCompact(current.Key) + var hash, _ = current.cache() + h2p.writeNode(path, trienode.New(common.BytesToHash(hash), nodeToBytes(collapsed)), theTrie.owner) + + h2p.ConcurrentTraversal(theTrie, current.Val, append(path, current.Key...)) + + case *fullNode: + // copy from trie/Committer (*committer).commit + collapsed := current.copy() + var hash, _ = collapsed.cache() + collapsed.Children = h2p.commitChildren(path, current) + + nodebytes := nodeToBytes(collapsed) + if common.BytesToHash(hash) != common.BytesToHash(crypto.Keccak256(nodebytes)) { + log.Error("Hash is inconsistent, hash: ", common.BytesToHash(hash), "node hash: ", common.BytesToHash(crypto.Keccak256(nodebytes)), "node: ", collapsed.fstring("")) + panic("hash inconsistent.") + } + + h2p.writeNode(path, trienode.New(common.BytesToHash(hash), nodeToBytes(collapsed)), theTrie.owner) + + for idx, child := range current.Children { + if child == nil { + continue + } + childPath := append(path, byte(idx)) + if len(h2p.concurrentQueue)*2 < cap(h2p.concurrentQueue) { + h2p.wg.Add(1) + dst := make([]byte, len(childPath)) + copy(dst, childPath) + go h2p.SubConcurrentTraversal(theTrie, child, dst) + } else { + h2p.ConcurrentTraversal(theTrie, child, childPath) + } + } + case hashNode: + n, err := theTrie.resloveWithoutTrack(current, path) + if err != nil { + log.Error("Resolve HashNode", "error", err, "TrieRoot", theTrie.Hash(), "Path", path) + return + } + h2p.ConcurrentTraversal(theTrie, n, path) + total_num = atomic.AddUint64(&h2p.totalNum, 1) + if total_num%100000 == 0 { + log.Info("Converting ", "Complete progress", total_num, "go routines Num", runtime.NumGoroutine(), "h2p concurrentQueue", len(h2p.concurrentQueue)) + } + return + case valueNode: + if !hasTerm(path) { + log.Info("ValueNode miss path term", "path", common.Bytes2Hex(path)) + break + } + var account types.StateAccount + if err := rlp.Decode(bytes.NewReader(current), &account); err != nil { + // log.Info("Rlp decode account failed.", "err", err) + break + } + if account.Root == (common.Hash{}) || account.Root == types.EmptyRootHash { + // log.Info("Not a storage trie.", "account", common.BytesToHash(path).String()) + break + } + + ownerAddress := common.BytesToHash(hexToCompact(path)) + tr, err := New(StorageTrieID(h2p.stateRootHash, ownerAddress, account.Root), h2p.db) + if err != nil { + log.Error("New Storage trie error", "err", err, "root", account.Root.String(), "owner", ownerAddress.String()) + break + } + log.Debug("Find Contract Trie Tree", "rootHash: ", tr.Hash().String(), "") + h2p.wg.Add(1) + go h2p.SubConcurrentTraversal(tr, tr.root, []byte{}) + default: + panic(errors.New("Invalid node type to traverse.")) + } +} + +// copy from trie/Commiter (*committer).commit +func (h2p *Hbss2Pbss) commitChildren(path []byte, n *fullNode) [17]node { + var children [17]node + for i := 0; i < 16; i++ { + child := n.Children[i] + if child == nil { + continue + } + // If it's the hashed child, save the hash value directly. + // Note: it's impossible that the child in range [0, 15] + // is a valueNode. + if hn, ok := child.(hashNode); ok { + children[i] = hn + continue + } + + children[i] = h2p.commit(append(path, byte(i)), child) + } + // For the 17th child, it's possible the type is valuenode. + if n.Children[16] != nil { + children[16] = n.Children[16] + } + return children +} + +// commit collapses a node down into a hash node and returns it. +func (h2p *Hbss2Pbss) commit(path []byte, n node) node { + // if this path is clean, use available cached data + hash, dirty := n.cache() + if hash != nil && !dirty { + return hash + } + // Commit children, then parent, and remove the dirty flag. + switch cn := n.(type) { + case *shortNode: + // Commit child + collapsed := cn.copy() + + // If the child is fullNode, recursively commit, + // otherwise it can only be hashNode or valueNode. + if _, ok := cn.Val.(*fullNode); ok { + collapsed.Val = h2p.commit(append(path, cn.Key...), cn.Val) + } + // The key needs to be copied, since we're adding it to the + // modified nodeset. + collapsed.Key = hexToCompact(cn.Key) + return collapsed + case *fullNode: + hashedKids := h2p.commitChildren(path, cn) + collapsed := cn.copy() + collapsed.Children = hashedKids + + return collapsed + case hashNode: + return cn + default: + // nil, valuenode shouldn't be committed + panic(fmt.Sprintf("%T: invalid node: %v", n, n)) + } +} diff --git a/trie/node.go b/trie/node.go index 15bbf62f1c..d78ed5c569 100644 --- a/trie/node.go +++ b/trie/node.go @@ -112,6 +112,11 @@ func (n rawNode) EncodeRLP(w io.Writer) error { return err } +func NodeString(hash, buf []byte) string { + node := mustDecodeNode(hash, buf) + return node.fstring("NodeString: ") +} + // mustDecodeNode is a wrapper of decodeNode and panic if any error is encountered. func mustDecodeNode(hash, buf []byte) node { n, err := decodeNode(hash, buf)