From 9a7498d79e61c42403c59afb7e7bd279887ba1a0 Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Mon, 20 Feb 2023 18:43:22 +0800 Subject: [PATCH] core/state, trie: port changes from PBSS --- core/state/statedb.go | 2 +- trie/committer.go | 43 ++--- trie/database.go | 9 +- trie/nodeset.go | 120 +++++++------ trie/proof.go | 2 +- trie/trie.go | 28 +-- trie/trie_test.go | 33 +--- trie/util_test.go | 409 +++++++++++++++++++++++------------------- trie/utils.go | 132 +++----------- 9 files changed, 365 insertions(+), 413 deletions(-) diff --git a/core/state/statedb.go b/core/state/statedb.go index 3d8fd15bbd25..247aef8b239c 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -970,8 +970,8 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { storageTrieNodesUpdated int storageTrieNodesDeleted int nodes = trie.NewMergedNodeSet() + codeWriter = s.db.DiskDB().NewBatch() ) - codeWriter := s.db.DiskDB().NewBatch() for addr := range s.stateObjectsDirty { if obj := s.stateObjects[addr]; !obj.deleted { // Write any contract code associated with the state object diff --git a/trie/committer.go b/trie/committer.go index c4957f3490ea..c1d96e7d8773 100644 --- a/trie/committer.go +++ b/trie/committer.go @@ -33,29 +33,21 @@ type leaf struct { // insertion order. type committer struct { nodes *NodeSet - tracer *tracer collectLeaf bool } // newCommitter creates a new committer or picks one from the pool. -func newCommitter(owner common.Hash, tracer *tracer, collectLeaf bool) *committer { +func newCommitter(nodeset *NodeSet, collectLeaf bool) *committer { return &committer{ - nodes: NewNodeSet(owner), - tracer: tracer, + nodes: nodeset, collectLeaf: collectLeaf, } } // Commit collapses a node down into a hash node and returns it along with // the modified nodeset. -func (c *committer) Commit(n node) (hashNode, *NodeSet) { - h := c.commit(nil, n) - // Some nodes can be deleted from trie which can't be captured - // by committer itself. Iterate all deleted nodes tracked by - // tracer and marked them as deleted only if they are present - // in database previously. - c.tracer.markDeletions(c.nodes) - return h.(hashNode), c.nodes +func (c *committer) Commit(n node) hashNode { + return c.commit(nil, n).(hashNode) } // commit collapses a node down into a hash node and returns it. @@ -74,9 +66,7 @@ func (c *committer) commit(path []byte, n node) node { // If the child is fullNode, recursively commit, // otherwise it can only be hashNode or valueNode. if _, ok := cn.Val.(*fullNode); ok { - childV := c.commit(append(path, cn.Key...), cn.Val) - - collapsed.Val = childV + collapsed.Val = c.commit(append(path, cn.Key...), cn.Val) } // The key needs to be copied, since we're adding it to the // modified nodeset. @@ -85,12 +75,6 @@ func (c *committer) commit(path []byte, n node) node { if hn, ok := hashedNode.(hashNode); ok { return hn } - // The short node now is embedded in its parent. Mark the node as - // deleted if it's present in database previously. It's equivalent - // as deletion from database's perspective. - if prev := c.tracer.getPrev(path); len(prev) != 0 { - c.nodes.markDeleted(path, prev) - } return collapsed case *fullNode: hashedKids := c.commitChildren(path, cn) @@ -101,12 +85,6 @@ func (c *committer) commit(path []byte, n node) node { if hn, ok := hashedNode.(hashNode); ok { return hn } - // The full node now is embedded in its parent. Mark the node as - // deleted if it's present in database previously. It's equivalent - // as deletion from database's perspective. - if prev := c.tracer.getPrev(path); len(prev) != 0 { - c.nodes.markDeleted(path, prev) - } return collapsed case hashNode: return cn @@ -134,8 +112,7 @@ func (c *committer) commitChildren(path []byte, n *fullNode) [17]node { // Commit the child recursively and store the "hashed" value. // Note the returned node can be some embedded nodes, so it's // possible the type is not hashNode. - hashed := c.commit(append(path, byte(i)), child) - children[i] = hashed + children[i] = c.commit(append(path, byte(i)), child) } // For the 17th child, it's possible the type is valuenode. if n.Children[16] != nil { @@ -155,6 +132,12 @@ func (c *committer) store(path []byte, n node) node { // usually is leaf node). But small value (less than 32bytes) is not // our target (leaves in account trie only). if hash == nil { + // The node is embedded in its parent, in other words, this node + // will not be stored in the database independently, mark it as + // deleted only if the node was existent in database before. + if _, ok := c.nodes.accessList[string(path)]; ok { + c.nodes.markDeleted(path) + } return n } // We have the hash already, estimate the RLP encoding-size of the node. @@ -169,7 +152,7 @@ func (c *committer) store(path []byte, n node) node { } ) // Collect the dirty node to nodeset for return. - c.nodes.markUpdated(path, mnode, c.tracer.getPrev(path)) + c.nodes.markUpdated(path, mnode) // Collect the corresponding leaf node if it's required. We don't check // full node since it's impossible to store value in fullNode. The key diff --git a/trie/database.go b/trie/database.go index 895ffdf89d88..442f594c1639 100644 --- a/trie/database.go +++ b/trie/database.go @@ -792,13 +792,12 @@ func (db *Database) Update(nodes *MergedNodeSet) error { } for _, owner := range order { subset := nodes.sets[owner] - for _, path := range subset.updates.order { - n, ok := subset.updates.nodes[path] - if !ok { - return fmt.Errorf("missing node %x %v", owner, path) + subset.forEachWithOrder(false, func(path string, n *memoryNode) { + if n.isDeleted() { + return // ignore deletion } db.insert(n.hash, int(n.size), n.node) - } + }) } // Link up the account trie and storage trie if the node points // to an account trie leaf. diff --git a/trie/nodeset.go b/trie/nodeset.go index 928172350171..6ec175e3ed77 100644 --- a/trie/nodeset.go +++ b/trie/nodeset.go @@ -19,6 +19,7 @@ package trie import ( "fmt" "reflect" + "sort" "strings" "github.com/ethereum/go-ethereum/common" @@ -40,8 +41,8 @@ var memoryNodeSize = int(reflect.TypeOf(memoryNode{}).Size()) // memorySize returns the total memory size used by this node. // nolint:unused -func (n *memoryNode) memorySize(key int) int { - return int(n.size) + memoryNodeSize + key +func (n *memoryNode) memorySize(pathlen int) int { + return int(n.size) + memoryNodeSize + pathlen } // rlp returns the raw rlp encoded blob of the cached trie node, either directly @@ -64,7 +65,13 @@ func (n *memoryNode) obj() node { return expandNode(n.hash[:], n.node) } +// isDeleted returns the indicator if the node is marked as deleted. +func (n *memoryNode) isDeleted() bool { + return n.hash == (common.Hash{}) +} + // nodeWithPrev wraps the memoryNode with the previous node value. +// nolint: unused type nodeWithPrev struct { *memoryNode prev []byte // RLP-encoded previous value, nil means it's non-existent @@ -79,64 +86,62 @@ func (n *nodeWithPrev) unwrap() *memoryNode { // memorySize returns the total memory size used by this node. It overloads // the function in memoryNode by counting the size of previous value as well. // nolint: unused -func (n *nodeWithPrev) memorySize(key int) int { - return n.memoryNode.memorySize(key) + len(n.prev) -} - -// nodesWithOrder represents a collection of dirty nodes which includes -// newly-inserted and updated nodes. The modification order of all nodes -// is represented by order list. -type nodesWithOrder struct { - order []string // the path list of dirty nodes, sort by insertion order - nodes map[string]*nodeWithPrev // the map of dirty nodes, keyed by node path +func (n *nodeWithPrev) memorySize(pathlen int) int { + return n.memoryNode.memorySize(pathlen) + len(n.prev) } // NodeSet contains all dirty nodes collected during the commit operation. // Each node is keyed by path. It's not thread-safe to use. type NodeSet struct { - owner common.Hash // the identifier of the trie - updates *nodesWithOrder // the set of updated nodes(newly inserted, updated) - deletes map[string][]byte // the map of deleted nodes, keyed by node - leaves []*leaf // the list of dirty leaves + owner common.Hash // the identifier of the trie + nodes map[string]*memoryNode // the set of dirty nodes(inserted, updated, deleted) + leaves []*leaf // the list of dirty leaves + accessList map[string][]byte // The list of accessed nodes, which records the original node value } // NewNodeSet initializes an empty node set to be used for tracking dirty nodes // from a specific account or storage trie. The owner is zero for the account // trie and the owning account address hash for storage tries. -func NewNodeSet(owner common.Hash) *NodeSet { +func NewNodeSet(owner common.Hash, accessList map[string][]byte) *NodeSet { return &NodeSet{ - owner: owner, - updates: &nodesWithOrder{ - nodes: make(map[string]*nodeWithPrev), - }, - deletes: make(map[string][]byte), + owner: owner, + nodes: make(map[string]*memoryNode), + accessList: accessList, } } -/* -// NewNodeSetWithDeletion initializes the nodeset with provided deletion set. -func NewNodeSetWithDeletion(owner common.Hash, paths [][]byte, prev [][]byte) *NodeSet { - set := NewNodeSet(owner) - for i, path := range paths { - set.markDeleted(path, prev[i]) +// forEachWithOrder iterates the dirty nodes with the specified order. +// If topToBottom is true: +// +// then the order of iteration is top to bottom, left to right. +// +// If topToBottom is false: +// +// then the order of iteration is bottom to top, right to left. +func (set *NodeSet) forEachWithOrder(topToBottom bool, callback func(path string, n *memoryNode)) { + var paths sort.StringSlice + for path := range set.nodes { + paths = append(paths, path) + } + if topToBottom { + paths.Sort() + } else { + sort.Sort(sort.Reverse(paths)) + } + for _, path := range paths { + callback(path, set.nodes[path]) } - return set } -*/ // markUpdated marks the node as dirty(newly-inserted or updated) with provided // node path, node object along with its previous value. -func (set *NodeSet) markUpdated(path []byte, node *memoryNode, prev []byte) { - set.updates.order = append(set.updates.order, string(path)) - set.updates.nodes[string(path)] = &nodeWithPrev{ - memoryNode: node, - prev: prev, - } +func (set *NodeSet) markUpdated(path []byte, node *memoryNode) { + set.nodes[string(path)] = node } // markDeleted marks the node as deleted with provided path and previous value. -func (set *NodeSet) markDeleted(path []byte, prev []byte) { - set.deletes[string(path)] = prev +func (set *NodeSet) markDeleted(path []byte) { + set.nodes[string(path)] = &memoryNode{} } // addLeaf collects the provided leaf node into set. @@ -146,14 +151,25 @@ func (set *NodeSet) addLeaf(node *leaf) { // Size returns the number of updated and deleted nodes contained in the set. func (set *NodeSet) Size() (int, int) { - return len(set.updates.order), len(set.deletes) + var ( + updates int + deletes int + ) + for _, n := range set.nodes { + if n.isDeleted() { + deletes += 1 + } else { + updates += 1 + } + } + return updates, deletes } // Hashes returns the hashes of all updated nodes. TODO(rjl493456442) how can // we get rid of it? func (set *NodeSet) Hashes() []common.Hash { var ret []common.Hash - for _, node := range set.updates.nodes { + for _, node := range set.nodes { ret = append(ret, node.hash) } return ret @@ -163,19 +179,23 @@ func (set *NodeSet) Hashes() []common.Hash { func (set *NodeSet) Summary() string { var out = new(strings.Builder) fmt.Fprintf(out, "nodeset owner: %v\n", set.owner) - if set.updates != nil { - for _, key := range set.updates.order { - updated := set.updates.nodes[key] - if updated.prev != nil { - fmt.Fprintf(out, " [*]: %x -> %v prev: %x\n", key, updated.hash, updated.prev) - } else { - fmt.Fprintf(out, " [+]: %x -> %v\n", key, updated.hash) + if set.nodes != nil { + for path, n := range set.nodes { + // Deletion + if n.isDeleted() { + fmt.Fprintf(out, " [-]: %x prev: %x\n", path, set.accessList[path]) + continue + } + // Insertion + origin, ok := set.accessList[path] + if !ok { + fmt.Fprintf(out, " [+]: %x -> %v\n", path, n.hash) + continue } + // Update + fmt.Fprintf(out, " [*]: %x -> %v prev: %x\n", path, n.hash, origin) } } - for k, n := range set.deletes { - fmt.Fprintf(out, " [-]: %x -> %x\n", k, n) - } for _, n := range set.leaves { fmt.Fprintf(out, "[leaf]: %v\n", n) } diff --git a/trie/proof.go b/trie/proof.go index af49ce36b36c..f11dfc47afab 100644 --- a/trie/proof.go +++ b/trie/proof.go @@ -563,7 +563,7 @@ func VerifyRangeProof(rootHash common.Hash, firstKey []byte, lastKey []byte, key } // Rebuild the trie with the leaf stream, the shape of trie // should be same with the original one. - tr := &Trie{root: root, reader: newEmptyReader()} + tr := &Trie{root: root, reader: newEmptyReader(), tracer: newTracer()} if empty { tr.root = nil } diff --git a/trie/trie.go b/trie/trie.go index cf9108f1077b..17bacba00fdc 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -81,7 +81,7 @@ func New(id *ID, db NodeReader) (*Trie, error) { trie := &Trie{ owner: id.Owner, reader: reader, - //tracer: newTracer(), + tracer: newTracer(), } if id.Root != (common.Hash{}) && id.Root != types.EmptyRootHash { rootnode, err := trie.resolveAndTrack(id.Root[:], nil) @@ -547,7 +547,7 @@ func (t *Trie) resolveAndTrack(n hashNode, prefix []byte) (node, error) { // Hash returns the root hash of the trie. It does not write to the // database and can be used even if the trie doesn't have one. func (t *Trie) Hash() common.Hash { - hash, cached, _ := t.hashRoot() + hash, cached := t.hashRoot() t.root = cached return common.BytesToHash(hash.(hashNode)) } @@ -561,14 +561,14 @@ func (t *Trie) Hash() common.Hash { func (t *Trie) Commit(collectLeaf bool) (common.Hash, *NodeSet) { defer t.tracer.reset() + nodes := NewNodeSet(t.owner, t.tracer.accessList) + t.tracer.markDeletions(nodes) + // Trie is empty and can be classified into two types of situations: // - The trie was empty and no update happens // - The trie was non-empty and all nodes are dropped if t.root == nil { - // Wrap tracked deletions as the return - set := NewNodeSet(t.owner) - t.tracer.markDeletions(set) - return types.EmptyRootHash, set + return types.EmptyRootHash, nodes } // Derive the hash for all dirty nodes first. We hold the assumption // in the following procedure that all nodes are hashed. @@ -582,23 +582,23 @@ func (t *Trie) Commit(collectLeaf bool) (common.Hash, *NodeSet) { t.root = hashedNode return rootHash, nil } - h := newCommitter(t.owner, t.tracer, collectLeaf) - newRoot, nodes := h.Commit(t.root) - t.root = newRoot + t.root = newCommitter(nodes, collectLeaf).Commit(t.root) return rootHash, nodes } // hashRoot calculates the root hash of the given trie -func (t *Trie) hashRoot() (node, node, error) { +func (t *Trie) hashRoot() (node, node) { if t.root == nil { - return hashNode(types.EmptyRootHash.Bytes()), nil, nil + return hashNode(types.EmptyRootHash.Bytes()), nil } // If the number of changes is below 100, we let one thread handle it h := newHasher(t.unhashed >= 100) - defer returnHasherToPool(h) + defer func() { + returnHasherToPool(h) + t.unhashed = 0 + }() hashed, cached := h.hash(t.root, true) - t.unhashed = 0 - return hashed, cached, nil + return hashed, cached } // Reset drops the referenced root node and cleans all internal state. diff --git a/trie/trie_test.go b/trie/trie_test.go index 2f56c89cde37..877c11bbe039 100644 --- a/trie/trie_test.go +++ b/trie/trie_test.go @@ -410,8 +410,6 @@ func runRandTest(rt randTest) bool { values = make(map[string]string) // tracks content of the trie origTrie = NewEmpty(triedb) ) - tr.tracer = newTracer() - for i, step := range rt { // fmt.Printf("{op: %d, key: common.Hex2Bytes(\"%x\"), value: common.Hex2Bytes(\"%x\")}, // step %d\n", // step.op, step.key, step.value, i) @@ -449,21 +447,14 @@ func runRandTest(rt randTest) bool { root, nodes := tr.Commit(true) // Validity the returned nodeset if nodes != nil { - for path, node := range nodes.updates.nodes { + for path := range nodes.nodes { blob, _, _ := origTrie.TryGetNode(hexToCompact([]byte(path))) - got := node.prev + got := nodes.accessList[path] if !bytes.Equal(blob, got) { rt[i].err = fmt.Errorf("prevalue mismatch for 0x%x, got 0x%x want 0x%x", path, got, blob) panic(rt[i].err) } } - for path, prev := range nodes.deletes { - blob, _, _ := origTrie.TryGetNode(hexToCompact([]byte(path))) - if !bytes.Equal(blob, prev) { - rt[i].err = fmt.Errorf("prevalue mismatch for 0x%x, got 0x%x want 0x%x", path, prev, blob) - return false - } - } } if nodes != nil { triedb.Update(NewWithNodeSet(nodes)) @@ -474,12 +465,6 @@ func runRandTest(rt randTest) bool { return false } tr = newtr - - // Enable node tracing. Resolve the root node again explicitly - // since it's not captured at the beginning. - tr.tracer = newTracer() - tr.resolveAndTrack(root.Bytes(), nil) - origTrie = tr.Copy() case opItercheckhash: checktr := NewEmpty(triedb) @@ -492,8 +477,6 @@ func runRandTest(rt randTest) bool { } case opNodeDiff: var ( - inserted = tr.tracer.insertList() - deleted = tr.tracer.deleteList() origIter = origTrie.NodeIterator(nil) curIter = tr.NodeIterator(nil) origSeen = make(map[string]struct{}) @@ -527,19 +510,19 @@ func runRandTest(rt randTest) bool { deleteExp[path] = struct{}{} } } - if len(insertExp) != len(inserted) { + if len(insertExp) != len(tr.tracer.insert) { rt[i].err = fmt.Errorf("insert set mismatch") } - if len(deleteExp) != len(deleted) { + if len(deleteExp) != len(tr.tracer.delete) { rt[i].err = fmt.Errorf("delete set mismatch") } - for _, insert := range inserted { - if _, present := insertExp[string(insert)]; !present { + for insert := range tr.tracer.insert { + if _, present := insertExp[insert]; !present { rt[i].err = fmt.Errorf("missing inserted node") } } - for _, del := range deleted { - if _, present := deleteExp[string(del)]; !present { + for del := range tr.tracer.delete { + if _, present := deleteExp[del]; !present { rt[i].err = fmt.Errorf("missing deleted node") } } diff --git a/trie/util_test.go b/trie/util_test.go index 8d925a16aabb..dfdbdc02a98d 100644 --- a/trie/util_test.go +++ b/trie/util_test.go @@ -25,14 +25,13 @@ import ( "github.com/ethereum/go-ethereum/core/types" ) -// Tests if the trie diffs are tracked correctly. -func TestTrieTracer(t *testing.T) { - db := NewDatabase(rawdb.NewMemoryDatabase()) - trie := NewEmpty(db) - trie.tracer = newTracer() - - // Insert a batch of entries, all the nodes should be marked as inserted - vals := []struct{ k, v string }{ +var ( + tiny = []struct{ k, v string }{ + {"k1", "v1"}, + {"k2", "v2"}, + {"k3", "v3"}, + } + nonAligned = []struct{ k, v string }{ {"do", "verb"}, {"ether", "wookiedoo"}, {"horse", "stallion"}, @@ -41,265 +40,311 @@ func TestTrieTracer(t *testing.T) { {"dog", "puppy"}, {"somethingveryoddindeedthis is", "myothernodedata"}, } + standard = []struct{ k, v string }{ + {string(randBytes(32)), "verb"}, + {string(randBytes(32)), "wookiedoo"}, + {string(randBytes(32)), "stallion"}, + {string(randBytes(32)), "horse"}, + {string(randBytes(32)), "coin"}, + {string(randBytes(32)), "puppy"}, + {string(randBytes(32)), "myothernodedata"}, + } +) + +func TestTrieTracer(t *testing.T) { + testTrieTracer(t, tiny) + testTrieTracer(t, nonAligned) + testTrieTracer(t, standard) +} + +// Tests if the trie diffs are tracked correctly. Tracer should capture +// all non-leave dirty nodes, no matter the node is embedded or not. +func testTrieTracer(t *testing.T, vals []struct{ k, v string }) { + db := NewDatabase(rawdb.NewMemoryDatabase()) + trie := NewEmpty(db) + + // Determine all insertions are tracked for _, val := range vals { trie.Update([]byte(val.k), []byte(val.v)) } - trie.Hash() + insertSet := copySet(trie.tracer.insert) // copy before commit + deleteSet := copySet(trie.tracer.delete) // copy before commit + root, nodes := trie.Commit(false) + db.Update(NewWithNodeSet(nodes)) - seen := make(map[string]struct{}) - it := trie.NodeIterator(nil) - for it.Next(true) { - if it.Leaf() { - continue - } - seen[string(it.Path())] = struct{}{} + seen := setKeys(iterNodes(db, root)) + if !compareSet(insertSet, seen) { + t.Fatal("Unexpected insertion set") } - inserted := trie.tracer.insertList() - if len(inserted) != len(seen) { - t.Fatalf("Unexpected inserted node tracked want %d got %d", len(seen), len(inserted)) - } - for _, k := range inserted { - _, ok := seen[string(k)] - if !ok { - t.Fatalf("Unexpected inserted node") - } - } - deleted := trie.tracer.deleteList() - if len(deleted) != 0 { - t.Fatalf("Unexpected deleted node tracked %d", len(deleted)) + if !compareSet(deleteSet, nil) { + t.Fatal("Unexpected deletion set") } - // Commit the changes and re-create with new root - root, nodes := trie.Commit(false) - if err := db.Update(NewWithNodeSet(nodes)); err != nil { - t.Fatal(err) - } + // Determine all deletions are tracked trie, _ = New(TrieID(root), db) - trie.tracer = newTracer() - - // Delete all the elements, check deletion set for _, val := range vals { trie.Delete([]byte(val.k)) } - trie.Hash() - - inserted = trie.tracer.insertList() - if len(inserted) != 0 { - t.Fatalf("Unexpected inserted node tracked %d", len(inserted)) - } - deleted = trie.tracer.deleteList() - if len(deleted) != len(seen) { - t.Fatalf("Unexpected deleted node tracked want %d got %d", len(seen), len(deleted)) + insertSet, deleteSet = copySet(trie.tracer.insert), copySet(trie.tracer.delete) + if !compareSet(insertSet, nil) { + t.Fatal("Unexpected insertion set") } - for _, k := range deleted { - _, ok := seen[string(k)] - if !ok { - t.Fatalf("Unexpected inserted node") - } + if !compareSet(deleteSet, seen) { + t.Fatal("Unexpected deletion set") } } +// Test that after inserting a new batch of nodes and deleting them immediately, +// the trie tracer should be cleared normally as no operation happened. func TestTrieTracerNoop(t *testing.T) { - trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) - trie.tracer = newTracer() + testTrieTracerNoop(t, tiny) + testTrieTracerNoop(t, nonAligned) + testTrieTracerNoop(t, standard) +} - // Insert a batch of entries, all the nodes should be marked as inserted - vals := []struct{ k, v string }{ - {"do", "verb"}, - {"ether", "wookiedoo"}, - {"horse", "stallion"}, - {"shaman", "horse"}, - {"doge", "coin"}, - {"dog", "puppy"}, - {"somethingveryoddindeedthis is", "myothernodedata"}, - } +func testTrieTracerNoop(t *testing.T, vals []struct{ k, v string }) { + trie := NewEmpty(NewDatabase(rawdb.NewMemoryDatabase())) for _, val := range vals { trie.Update([]byte(val.k), []byte(val.v)) } for _, val := range vals { trie.Delete([]byte(val.k)) } - if len(trie.tracer.insertList()) != 0 { - t.Fatalf("Unexpected inserted node tracked %d", len(trie.tracer.insertList())) + if len(trie.tracer.insert) != 0 { + t.Fatal("Unexpected insertion set") } - if len(trie.tracer.deleteList()) != 0 { - t.Fatalf("Unexpected deleted node tracked %d", len(trie.tracer.deleteList())) + if len(trie.tracer.delete) != 0 { + t.Fatal("Unexpected deletion set") } } -func TestTrieTracePrevValue(t *testing.T) { +// Test whether the original value of the loaded nodes are correctly recorded. +// Besides, this will also ensure the accessList won't be spammed because of +// trie iteration and proving. +func TestAccessList(t *testing.T) { + testAccessList(t, tiny) + testAccessList(t, nonAligned) + testAccessList(t, standard) +} + +func testAccessList(t *testing.T, vals []struct{ k, v string }) { db := NewDatabase(rawdb.NewMemoryDatabase()) trie := NewEmpty(db) - trie.tracer = newTracer() - - paths, blobs := trie.tracer.prevList() - if len(paths) != 0 || len(blobs) != 0 { - t.Fatalf("Nothing should be tracked") - } - // Insert a batch of entries, all the nodes should be marked as inserted - vals := []struct{ k, v string }{ - {"do", "verb"}, - {"ether", "wookiedoo"}, - {"horse", "stallion"}, - {"shaman", "horse"}, - {"doge", "coin"}, - {"dog", "puppy"}, - {"somethingveryoddindeedthis is", "myothernodedata"}, - } for _, val := range vals { trie.Update([]byte(val.k), []byte(val.v)) } - paths, blobs = trie.tracer.prevList() - if len(paths) != 0 || len(blobs) != 0 { + if len(trie.tracer.accessList) != 0 { t.Fatalf("Nothing should be tracked") } - - // Commit the changes and re-create with new root root, nodes := trie.Commit(false) - if err := db.Update(NewWithNodeSet(nodes)); err != nil { - t.Fatal(err) - } - trie, _ = New(TrieID(root), db) - trie.tracer = newTracer() - trie.resolveAndTrack(root.Bytes(), nil) + db.Update(NewWithNodeSet(nodes)) - // Load all nodes in trie + // Reload all nodes in trie + trie, _ = New(TrieID(root), db) for _, val := range vals { trie.TryGet([]byte(val.k)) } - - // Ensure all nodes are tracked by tracer with correct prev-values - iter := trie.NodeIterator(nil) - seen := make(map[string][]byte) - for iter.Next(true) { - // Embedded nodes are ignored since they are not present in - // database. - if iter.Hash() == (common.Hash{}) { - continue - } - seen[string(iter.Path())] = common.CopyBytes(iter.NodeBlob()) - } - - paths, blobs = trie.tracer.prevList() - if len(paths) != len(seen) || len(blobs) != len(seen) { - t.Fatalf("Unexpected tracked values") - } - for i, path := range paths { - blob := blobs[i] - prev, ok := seen[string(path)] - if !ok { - t.Fatalf("Missing node %v", path) - } - if !bytes.Equal(blob, prev) { - t.Fatalf("Unexpected value path: %v, want: %v, got: %v", path, prev, blob) - } + // Ensure all nodes are tracked by tracer with correct values, + // which should be aligned with *non-embedded* trie nodes. + seen := iterNodesWithHash(db, root) + if !compareValueSet(trie.tracer.accessList, seen) { + t.Fatal("Unexpected accessList") } // Re-open the trie and iterate the trie, ensure nothing will be tracked. // Iterator will not link any loaded nodes to trie. trie, _ = New(TrieID(root), db) - trie.tracer = newTracer() - - iter = trie.NodeIterator(nil) + prev := len(trie.tracer.accessList) + iter := trie.NodeIterator(nil) for iter.Next(true) { } - paths, blobs = trie.tracer.prevList() - if len(paths) != 0 || len(blobs) != 0 { + if len(trie.tracer.accessList) != prev { t.Fatalf("Nothing should be tracked") } // Re-open the trie and generate proof for entries, ensure nothing will // be tracked. Prover will not link any loaded nodes to trie. trie, _ = New(TrieID(root), db) - trie.tracer = newTracer() + prev = len(trie.tracer.accessList) for _, val := range vals { trie.Prove([]byte(val.k), 0, rawdb.NewMemoryDatabase()) } - paths, blobs = trie.tracer.prevList() - if len(paths) != 0 || len(blobs) != 0 { + if len(trie.tracer.accessList) != prev { t.Fatalf("Nothing should be tracked") } - // Delete entries from trie, ensure all previous values are correct. + // Delete entries from trie, ensure all previous values are correct, + // which should be aligned with *non-embedded* trie nodes. trie, _ = New(TrieID(root), db) - trie.tracer = newTracer() - trie.resolveAndTrack(root.Bytes(), nil) - for _, val := range vals { trie.TryDelete([]byte(val.k)) } - paths, blobs = trie.tracer.prevList() - if len(paths) != len(seen) || len(blobs) != len(seen) { - t.Fatalf("Unexpected tracked values") - } - for i, path := range paths { - blob := blobs[i] - prev, ok := seen[string(path)] - if !ok { - t.Fatalf("Missing node %v", path) - } - if !bytes.Equal(blob, prev) { - t.Fatalf("Unexpected value path: %v, want: %v, got: %v", path, prev, blob) - } + if !compareValueSet(trie.tracer.accessList, seen) { + t.Fatal("Unexpected accessList") } } -func TestDeleteAll(t *testing.T) { +// Tests that nodes are correctly recorded when inserting or deleting nodes +// into the trie. +func TestNodeSet(t *testing.T) { + testNodeSet(t, tiny) + testNodeSet(t, nonAligned) + testNodeSet(t, standard) +} + +func testNodeSet(t *testing.T, vals []struct{ k, v string }) { db := NewDatabase(rawdb.NewMemoryDatabase()) trie := NewEmpty(db) - trie.tracer = newTracer() - - // Insert a batch of entries, all the nodes should be marked as inserted - vals := []struct{ k, v string }{ - {"do", "verb"}, - {"ether", "wookiedoo"}, - {"horse", "stallion"}, - {"shaman", "horse"}, - {"doge", "coin"}, - {"dog", "puppy"}, - {"somethingveryoddindeedthis is", "myothernodedata"}, - } for _, val := range vals { trie.Update([]byte(val.k), []byte(val.v)) } root, set := trie.Commit(false) - if err := db.Update(NewWithNodeSet(set)); err != nil { - t.Fatal(err) + db.Update(NewWithNodeSet(set)) + + nodes := iterNodesWithHash(db, root) + dirty := make(map[string]struct{}) + for path := range set.nodes { + dirty[path] = struct{}{} + } + if !compareSet(dirty, setKeys(nodes)) { + t.Fatal("Unexpected nodeset") } + if !compareValueSet(set.accessList, nil) { + t.Fatal("Unexpected accessList") + } + // Delete entries from trie, ensure all values are detected trie, _ = New(TrieID(root), db) - trie.tracer = newTracer() - trie.resolveAndTrack(root.Bytes(), nil) + for _, val := range vals { + trie.Delete([]byte(val.k)) + } + root, set = trie.Commit(false) + if root != types.EmptyRootHash { + t.Fatalf("Invalid trie root %v", root) + } + dirty = make(map[string]struct{}) + for path := range set.nodes { + dirty[path] = struct{}{} + } + if !compareSet(dirty, setKeys(nodes)) { + t.Fatal("Unexpected nodeset") + } + if !compareValueSet(set.accessList, nodes) { + t.Fatal("Unexpected accessList") + } +} - // Iterate all existent nodes +// Tests whether the original tree node is correctly deleted after being embedded +// in its parent due to the smaller size of the original tree node. +func TestEmbedNode(t *testing.T) { var ( - it = trie.NodeIterator(nil) - nodes = make(map[string][]byte) + db = NewDatabase(rawdb.NewMemoryDatabase()) + trie = NewEmpty(db) ) - for it.Next(true) { - if it.Hash() != (common.Hash{}) { - nodes[string(it.Path())] = common.CopyBytes(it.NodeBlob()) - } + for _, val := range tiny { + trie.Update([]byte(val.k), randBytes(32)) } + root, set := trie.Commit(false) + db.Update(NewWithNodeSet(set)) + nodesA := iterNodesWithHash(db, root) - // Perform deletion to purge the entire trie - for _, val := range vals { - trie.Delete([]byte(val.k)) + trie, _ = New(TrieID(root), db) + for _, val := range tiny { + trie.Update([]byte(val.k), []byte(val.v)) } root, set = trie.Commit(false) - if root != types.EmptyRootHash { - t.Fatalf("Invalid trie root %v", root) + db.Update(NewWithNodeSet(set)) + nodesB := iterNodesWithHash(db, root) + + // The nodes in old set but not in new set are the nodes + // get removed from trie. + for path, blob := range nodesA { + if _, ok := nodesB[path]; ok { + continue + } + n, ok := set.nodes[path] + if !ok { + t.Fatal("missing node") + } + if !n.isDeleted() { + t.Fatal("unexpected node") + } + if !bytes.Equal(set.accessList[path], blob) { + t.Fatal("unexpected accessList") + } + } +} + +func compareSet(setA, setB map[string]struct{}) bool { + if len(setA) != len(setB) { + return false } - for path, blob := range set.deletes { - prev, ok := nodes[path] + for key := range setA { + if _, ok := setB[key]; !ok { + return false + } + } + return true +} + +func compareValueSet(setA, setB map[string][]byte) bool { + if len(setA) != len(setB) { + return false + } + for key, valA := range setA { + valB, ok := setB[key] if !ok { - t.Fatalf("Extra node deleted %v", []byte(path)) + return false } - if !bytes.Equal(prev, blob) { - t.Fatalf("Unexpected previous value %v", []byte(path)) + if !bytes.Equal(valA, valB) { + return false } } - if len(set.deletes) != len(nodes) { - t.Fatalf("Unexpected deletion set") + return true +} + +func iterNodes(db *Database, root common.Hash) map[string][]byte { + var ( + trie, _ = New(TrieID(root), db) + it = trie.NodeIterator(nil) + nodes = make(map[string][]byte) + ) + for it.Next(true) { + if it.Leaf() { + continue + } + nodes[string(it.Path())] = common.CopyBytes(it.NodeBlob()) + } + return nodes +} + +func iterNodesWithHash(db *Database, root common.Hash) map[string][]byte { + var ( + trie, _ = New(TrieID(root), db) + it = trie.NodeIterator(nil) + nodes = make(map[string][]byte) + ) + for it.Next(true) { + if it.Hash() == (common.Hash{}) { + continue + } + nodes[string(it.Path())] = common.CopyBytes(it.NodeBlob()) + } + return nodes +} + +func setKeys(set map[string][]byte) map[string]struct{} { + keys := make(map[string]struct{}) + for k := range set { + keys[k] = struct{}{} + } + return keys +} + +func copySet(set map[string]struct{}) map[string]struct{} { + copied := make(map[string]struct{}) + for k := range set { + copied[k] = struct{}{} } + return copied } diff --git a/trie/utils.go b/trie/utils.go index 5dce65cd2971..0848bdae7336 100644 --- a/trie/utils.go +++ b/trie/utils.go @@ -31,42 +31,34 @@ package trie // // Besides, it's also used for recording the original value of the nodes // when they are resolved from the disk. The pre-value of the nodes will -// be used to construct reverse-diffs in the future. +// be used to construct trie history in the future. // // Note tracer is not thread-safe, callers should be responsible for handling // the concurrency issues by themselves. type tracer struct { - insert map[string]struct{} - delete map[string]struct{} - origin map[string][]byte + insert map[string]struct{} + delete map[string]struct{} + accessList map[string][]byte } // newTracer initializes the tracer for capturing trie changes. func newTracer() *tracer { return &tracer{ - insert: make(map[string]struct{}), - delete: make(map[string]struct{}), - origin: make(map[string][]byte), + insert: make(map[string]struct{}), + delete: make(map[string]struct{}), + accessList: make(map[string][]byte), } } // onRead tracks the newly loaded trie node and caches the rlp-encoded blob internally. // Don't change the value outside of function since it's not deep-copied. func (t *tracer) onRead(path []byte, val []byte) { - // Tracer isn't used right now, remove this check later. - if t == nil { - return - } - t.origin[string(path)] = val + t.accessList[string(path)] = val } // onInsert tracks the newly inserted trie node. If it's already in the deletion set // (resurrected node), then just wipe it from the deletion set as the "untouched". func (t *tracer) onInsert(path []byte) { - // Tracer isn't used right now, remove this check later. - if t == nil { - return - } if _, present := t.delete[string(path)]; present { delete(t.delete, string(path)) return @@ -78,10 +70,6 @@ func (t *tracer) onInsert(path []byte) { // in the addition set, then just wipe it from the addition set // as it's untouched. func (t *tracer) onDelete(path []byte) { - // Tracer isn't used right now, remove this check later. - if t == nil { - return - } if _, present := t.insert[string(path)]; present { delete(t.insert, string(path)) return @@ -89,111 +77,45 @@ func (t *tracer) onDelete(path []byte) { t.delete[string(path)] = struct{}{} } -// insertList returns the tracked inserted trie nodes in list format. -func (t *tracer) insertList() [][]byte { - // Tracer isn't used right now, remove this check later. - if t == nil { - return nil - } - var ret [][]byte - for path := range t.insert { - ret = append(ret, []byte(path)) - } - return ret -} - -// deleteList returns the tracked deleted trie nodes in list format. -func (t *tracer) deleteList() [][]byte { - // Tracer isn't used right now, remove this check later. - if t == nil { - return nil - } - var ret [][]byte - for path := range t.delete { - ret = append(ret, []byte(path)) - } - return ret -} - -// prevList returns the tracked node blobs in list format. -func (t *tracer) prevList() ([][]byte, [][]byte) { - // Tracer isn't used right now, remove this check later. - if t == nil { - return nil, nil - } - var ( - paths [][]byte - blobs [][]byte - ) - for path, blob := range t.origin { - paths = append(paths, []byte(path)) - blobs = append(blobs, blob) - } - return paths, blobs -} - -// getPrev returns the cached original value of the specified node. -func (t *tracer) getPrev(path []byte) []byte { - // Tracer isn't used right now, remove this check later. - if t == nil { - return nil - } - return t.origin[string(path)] -} - // reset clears the content tracked by tracer. func (t *tracer) reset() { - // Tracer isn't used right now, remove this check later. - if t == nil { - return - } t.insert = make(map[string]struct{}) t.delete = make(map[string]struct{}) - t.origin = make(map[string][]byte) + t.accessList = make(map[string][]byte) } // copy returns a deep copied tracer instance. func (t *tracer) copy() *tracer { - // Tracer isn't used right now, remove this check later. - if t == nil { - return nil - } var ( - insert = make(map[string]struct{}) - delete = make(map[string]struct{}) - origin = make(map[string][]byte) + insert = make(map[string]struct{}) + delete = make(map[string]struct{}) + accessList = make(map[string][]byte) ) - for key := range t.insert { - insert[key] = struct{}{} + for path := range t.insert { + insert[path] = struct{}{} } - for key := range t.delete { - delete[key] = struct{}{} + for path := range t.delete { + delete[path] = struct{}{} } - for key, val := range t.origin { - origin[key] = val + for path, blob := range t.accessList { + accessList[path] = blob } return &tracer{ - insert: insert, - delete: delete, - origin: origin, + insert: insert, + delete: delete, + accessList: accessList, } } // markDeletions puts all tracked deletions into the provided nodeset. func (t *tracer) markDeletions(set *NodeSet) { - // Tracer isn't used right now, remove this check later. - if t == nil { - return - } - for _, path := range t.deleteList() { - // There are a few possibilities for this scenario(the node is deleted - // but not present in database previously), for example the node was - // embedded in the parent and now deleted from the trie. In this case - // it's noop from database's perspective. - val := t.getPrev(path) - if len(val) == 0 { + for path := range t.delete { + // It's possible a few deleted nodes were embedded + // in their parent before, the deletions can be no + // effect by deleting nothing, filter them out. + if _, ok := set.accessList[path]; !ok { continue } - set.markDeleted(path, val) + set.markDeleted([]byte(path)) } }