From 782d8dc60e7ad42c01f1df5aee5c9f398e7329ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Tue, 20 Sep 2022 14:14:24 +0300 Subject: [PATCH 1/6] eth: fix a rare datarace on CHT challenge reply / shutdown --- eth/handler.go | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/eth/handler.go b/eth/handler.go index 5924f1bd49..19de9ad872 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -400,11 +400,16 @@ func (h *handler) runEthPeer(peer *eth.Peer, handler eth.Handler) error { if h.checkpointHash != (common.Hash{}) { // Request the peer's checkpoint header for chain height/weight validation resCh := make(chan *eth.Response) - if _, err := peer.RequestHeadersByNumber(h.checkpointNumber, 1, 0, false, resCh); err != nil { + + req, err := peer.RequestHeadersByNumber(h.checkpointNumber, 1, 0, false, resCh) + if err != nil { return err } // Start a timer to disconnect if the peer doesn't reply in time go func() { + // Ensure the request gets cancelled in case of error/drop + defer req.Close() + timeout := time.NewTimer(syncChallengeTimeout) defer timeout.Stop() @@ -446,10 +451,15 @@ func (h *handler) runEthPeer(peer *eth.Peer, handler eth.Handler) error { // If we have any explicit whitelist block hashes, request them for number, hash := range h.whitelist { resCh := make(chan *eth.Response) - if _, err := peer.RequestHeadersByNumber(number, 1, 0, false, resCh); err != nil { + + req, err := peer.RequestHeadersByNumber(number, 1, 0, false, resCh) + if err != nil { return err } - go func(number uint64, hash common.Hash) { + go func(number uint64, hash common.Hash, req *eth.Request) { + // Ensure the request gets cancelled in case of error/drop + defer req.Close() + timeout := time.NewTimer(syncChallengeTimeout) defer timeout.Stop() @@ -478,7 +488,7 @@ func (h *handler) runEthPeer(peer *eth.Peer, handler eth.Handler) error { peer.Log().Warn("Whitelist challenge timed out, dropping", "addr", peer.RemoteAddr(), "type", peer.Name()) h.removePeer(peer.ID()) } - }(number, hash) + }(number, hash, req) } // Handle incoming messages until the connection is torn down return handler(peer) From cc6116a930946261a143edadb2dd8d60c14c7658 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Tue, 6 Sep 2022 12:57:03 +0300 Subject: [PATCH 2/6] trie: check childrens' existence concurrently for snap heal --- trie/sync.go | 56 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/trie/sync.go b/trie/sync.go index 7eaa35244e..10eaeb53da 100644 --- a/trie/sync.go +++ b/trie/sync.go @@ -19,6 +19,7 @@ package trie import ( "errors" "fmt" + "sync" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/prque" @@ -345,11 +346,11 @@ func (s *Sync) schedule(req *request) { // retrieval scheduling. func (s *Sync) children(req *request, object node) ([]*request, error) { // Gather all the children of the node, irrelevant whether known or not - type child struct { + type childNode struct { path []byte node node } - var children []child + var children []childNode switch node := (object).(type) { case *shortNode: @@ -357,14 +358,14 @@ func (s *Sync) children(req *request, object node) ([]*request, error) { if hasTerm(key) { key = key[:len(key)-1] } - children = []child{{ + children = []childNode{{ node: node.Val, path: append(append([]byte(nil), req.path...), key...), }} case *fullNode: for i := 0; i < 17; i++ { if node.Children[i] != nil { - children = append(children, child{ + children = append(children, childNode{ node: node.Children[i], path: append(append([]byte(nil), req.path...), byte(i)), }) @@ -374,7 +375,10 @@ func (s *Sync) children(req *request, object node) ([]*request, error) { panic(fmt.Sprintf("unknown node: %+v", node)) } // Iterate over the children, and request all unknown ones - requests := make([]*request, 0, len(children)) + var ( + missing = make(chan *request, len(children)) + pending sync.WaitGroup + ) for _, child := range children { // Notify any external watcher of a new key/value node if req.callback != nil { @@ -398,18 +402,36 @@ func (s *Sync) children(req *request, object node) ([]*request, error) { if s.membatch.hasNode(hash) { continue } - // If database says duplicate, then at least the trie node is present - // and we hold the assumption that it's NOT legacy contract code. - if rawdb.HasTrieNode(s.database, hash) { - continue - } - // Locally unknown node, schedule for retrieval - requests = append(requests, &request{ - path: child.path, - hash: hash, - parents: []*request{req}, - callback: req.callback, - }) + // Check the presence of children concurrently + pending.Add(1) + go func(child childNode) { + defer pending.Done() + + // If database says duplicate, then at least the trie node is present + // and we hold the assumption that it's NOT legacy contract code. + chash := common.BytesToHash(node) + if rawdb.HasTrieNode(s.database, chash) { + return + } + // Locally unknown node, schedule for retrieval + missing <- &request{ + path: child.path, + hash: chash, + parents: []*request{req}, + callback: req.callback, + } + }(child) + } + } + pending.Wait() + + requests := make([]*request, 0, len(children)) + for done := false; !done; { + select { + case miss := <-missing: + requests = append(requests, miss) + default: + done = true } } return requests, nil From c7bcbd5456866850a5a454b89a0511b0480eaf4b Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 31 Aug 2022 17:58:18 +0200 Subject: [PATCH 3/6] eth/protocols/snap: fix problems due to idle-but-busy peers --- eth/protocols/snap/sync.go | 90 +++++++++++++++++++++++--------------- trie/sync.go | 2 +- 2 files changed, 56 insertions(+), 36 deletions(-) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 133acfaa0b..6a6105fccf 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -2252,14 +2252,18 @@ func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, acco // Whether or not the response is valid, we can mark the peer as idle and // notify the scheduler to assign a new task. If the response is invalid, // we'll drop the peer in a bit. + defer func() { + s.lock.Lock() + defer s.lock.Unlock() + if _, ok := s.peers[peer.ID()]; ok { + s.accountIdlers[peer.ID()] = struct{}{} + } + select { + case s.update <- struct{}{}: + default: + } + }() s.lock.Lock() - if _, ok := s.peers[peer.ID()]; ok { - s.accountIdlers[peer.ID()] = struct{}{} - } - select { - case s.update <- struct{}{}: - default: - } // Ensure the response is for a valid request req, ok := s.accountReqs[id] if !ok { @@ -2364,14 +2368,18 @@ func (s *Syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error // Whether or not the response is valid, we can mark the peer as idle and // notify the scheduler to assign a new task. If the response is invalid, // we'll drop the peer in a bit. + defer func() { + s.lock.Lock() + defer s.lock.Unlock() + if _, ok := s.peers[peer.ID()]; ok { + s.bytecodeIdlers[peer.ID()] = struct{}{} + } + select { + case s.update <- struct{}{}: + default: + } + }() s.lock.Lock() - if _, ok := s.peers[peer.ID()]; ok { - s.bytecodeIdlers[peer.ID()] = struct{}{} - } - select { - case s.update <- struct{}{}: - default: - } // Ensure the response is for a valid request req, ok := s.bytecodeReqs[id] if !ok { @@ -2473,14 +2481,18 @@ func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slo // Whether or not the response is valid, we can mark the peer as idle and // notify the scheduler to assign a new task. If the response is invalid, // we'll drop the peer in a bit. + defer func() { + s.lock.Lock() + defer s.lock.Unlock() + if _, ok := s.peers[peer.ID()]; ok { + s.storageIdlers[peer.ID()] = struct{}{} + } + select { + case s.update <- struct{}{}: + default: + } + }() s.lock.Lock() - if _, ok := s.peers[peer.ID()]; ok { - s.storageIdlers[peer.ID()] = struct{}{} - } - select { - case s.update <- struct{}{}: - default: - } // Ensure the response is for a valid request req, ok := s.storageReqs[id] if !ok { @@ -2600,14 +2612,18 @@ func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error // Whether or not the response is valid, we can mark the peer as idle and // notify the scheduler to assign a new task. If the response is invalid, // we'll drop the peer in a bit. + defer func() { + s.lock.Lock() + defer s.lock.Unlock() + if _, ok := s.peers[peer.ID()]; ok { + s.trienodeHealIdlers[peer.ID()] = struct{}{} + } + select { + case s.update <- struct{}{}: + default: + } + }() s.lock.Lock() - if _, ok := s.peers[peer.ID()]; ok { - s.trienodeHealIdlers[peer.ID()] = struct{}{} - } - select { - case s.update <- struct{}{}: - default: - } // Ensure the response is for a valid request req, ok := s.trienodeHealReqs[id] if !ok { @@ -2695,14 +2711,18 @@ func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) e // Whether or not the response is valid, we can mark the peer as idle and // notify the scheduler to assign a new task. If the response is invalid, // we'll drop the peer in a bit. + defer func() { + s.lock.Lock() + defer s.lock.Unlock() + if _, ok := s.peers[peer.ID()]; ok { + s.bytecodeHealIdlers[peer.ID()] = struct{}{} + } + select { + case s.update <- struct{}{}: + default: + } + }() s.lock.Lock() - if _, ok := s.peers[peer.ID()]; ok { - s.bytecodeHealIdlers[peer.ID()] = struct{}{} - } - select { - case s.update <- struct{}{}: - default: - } // Ensure the response is for a valid request req, ok := s.bytecodeHealReqs[id] if !ok { diff --git a/trie/sync.go b/trie/sync.go index 10eaeb53da..d45f2c31bc 100644 --- a/trie/sync.go +++ b/trie/sync.go @@ -417,7 +417,7 @@ func (s *Sync) children(req *request, object node) ([]*request, error) { missing <- &request{ path: child.path, hash: chash, - parents: []*request{req}, + parents: []*request{req}, callback: req.callback, } }(child) From 8b71ce55d9ca399dc909a93afdd731e0edbcff78 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Thu, 27 Oct 2022 15:25:01 +0200 Subject: [PATCH 4/6] eth/filters: change filter block to be by-ref (#26054) This PR changes the block field in the filter to be a pointer, to disambiguate between empty hash and no hash --- eth/filters/filter.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/eth/filters/filter.go b/eth/filters/filter.go index a354eb9761..2762993dad 100644 --- a/eth/filters/filter.go +++ b/eth/filters/filter.go @@ -59,8 +59,8 @@ type Filter struct { addresses []common.Address topics [][]common.Hash - block common.Hash // Block hash if filtering a single block - begin, end int64 // Range interval if filtering multiple blocks + block *common.Hash // Block hash if filtering a single block + begin, end int64 // Range interval if filtering multiple blocks matcher *bloombits.Matcher @@ -106,7 +106,7 @@ func NewRangeFilter(backend Backend, begin, end int64, addresses []common.Addres func NewBlockFilter(backend Backend, block common.Hash, addresses []common.Address, topics [][]common.Hash) *Filter { // Create a generic filter and convert it into a block filter filter := newFilter(backend, addresses, topics) - filter.block = block + filter.block = &block return filter } @@ -125,8 +125,8 @@ func newFilter(backend Backend, addresses []common.Address, topics [][]common.Ha // first block that contains matches, updating the start of the filter accordingly. func (f *Filter) Logs(ctx context.Context) ([]*types.Log, error) { // If we're doing singleton block filtering, execute and return - if f.block != (common.Hash{}) { - header, err := f.backend.HeaderByHash(ctx, f.block) + if f.block != nil { + header, err := f.backend.HeaderByHash(ctx, *f.block) if err != nil { return nil, err } From 9fb3b67722d10531690030140e607017568222d3 Mon Sep 17 00:00:00 2001 From: Jordan Krage Date: Wed, 2 Nov 2022 09:29:33 -0500 Subject: [PATCH 5/6] rpc: handle wrong HTTP batch response length (#26064) --- rpc/client.go | 1 + rpc/client_test.go | 48 ++++++++++++++++++++++++++++++++++++++++++++++ rpc/http.go | 3 +++ 3 files changed, 52 insertions(+) diff --git a/rpc/client.go b/rpc/client.go index ee7150eb8d..a89f8ba18c 100644 --- a/rpc/client.go +++ b/rpc/client.go @@ -31,6 +31,7 @@ import ( ) var ( + ErrBadResult = errors.New("bad result in JSON-RPC response") ErrClientQuit = errors.New("client is closed") ErrNoResult = errors.New("no result in JSON-RPC response") ErrSubscriptionQueueOverflow = errors.New("subscription queue overflow") diff --git a/rpc/client_test.go b/rpc/client_test.go index fa6010bb19..c20dc38b4f 100644 --- a/rpc/client_test.go +++ b/rpc/client_test.go @@ -19,6 +19,7 @@ package rpc import ( "context" "encoding/json" + "errors" "fmt" "math/rand" "net" @@ -144,6 +145,53 @@ func TestClientBatchRequest(t *testing.T) { } } +func TestClientBatchRequest_len(t *testing.T) { + b, err := json.Marshal([]jsonrpcMessage{ + {Version: "2.0", ID: json.RawMessage("1"), Method: "foo", Result: json.RawMessage(`"0x1"`)}, + {Version: "2.0", ID: json.RawMessage("2"), Method: "bar", Result: json.RawMessage(`"0x2"`)}, + }) + if err != nil { + t.Fatal("failed to encode jsonrpc message:", err) + } + s := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + _, err := rw.Write(b) + if err != nil { + t.Error("failed to write response:", err) + } + })) + t.Cleanup(s.Close) + + client, err := Dial(s.URL) + if err != nil { + t.Fatal("failed to dial test server:", err) + } + defer client.Close() + + t.Run("too-few", func(t *testing.T) { + batch := []BatchElem{ + {Method: "foo"}, + {Method: "bar"}, + {Method: "baz"}, + } + ctx, cancelFn := context.WithTimeout(context.Background(), time.Second) + defer cancelFn() + if err := client.BatchCallContext(ctx, batch); !errors.Is(err, ErrBadResult) { + t.Errorf("expected %q but got: %v", ErrBadResult, err) + } + }) + + t.Run("too-many", func(t *testing.T) { + batch := []BatchElem{ + {Method: "foo"}, + } + ctx, cancelFn := context.WithTimeout(context.Background(), time.Second) + defer cancelFn() + if err := client.BatchCallContext(ctx, batch); !errors.Is(err, ErrBadResult) { + t.Errorf("expected %q but got: %v", ErrBadResult, err) + } + }) +} + func TestClientNotify(t *testing.T) { server := newTestServer() defer server.Stop() diff --git a/rpc/http.go b/rpc/http.go index c076ae9f78..9604c14f70 100644 --- a/rpc/http.go +++ b/rpc/http.go @@ -165,6 +165,9 @@ func (c *Client) sendBatchHTTP(ctx context.Context, op *requestOp, msgs []*jsonr if err := json.NewDecoder(respBody).Decode(&respmsgs); err != nil { return err } + if len(respmsgs) != len(msgs) { + return fmt.Errorf("batch has %d requests but response has %d: %w", len(msgs), len(respmsgs), ErrBadResult) + } for i := 0; i < len(respmsgs); i++ { op.resp <- &respmsgs[i] } From ae7512c133cefbe1d0db8f8e50ddd7cb8e8d40b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Fri, 9 Sep 2022 11:42:57 +0300 Subject: [PATCH 6/6] eth/protocols/snap: throttle trie heal requests when peers DoS us (#25666) * eth/protocols/snap: throttle trie heal requests when peers DoS us * eth/protocols/snap: lower heal throttle log to debug Co-authored-by: Martin Holst Swende * eth/protocols/snap: fix comment Co-authored-by: Martin Holst Swende --- eth/protocols/snap/sync.go | 107 ++++++++++++++++++++++++++++++++++--- 1 file changed, 100 insertions(+), 7 deletions(-) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 6a6105fccf..f7cefeb5f4 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -21,10 +21,12 @@ import ( "encoding/json" "errors" "fmt" + gomath "math" "math/big" "math/rand" "sort" "sync" + "sync/atomic" "time" "github.com/ethereum/go-ethereum/common" @@ -79,6 +81,29 @@ const ( // and waste round trip times. If it's too high, we're capping responses and // waste bandwidth. maxTrieRequestCount = maxRequestSize / 512 + + // trienodeHealRateMeasurementImpact is the impact a single measurement has on + // the local node's trienode processing capacity. A value closer to 0 reacts + // slower to sudden changes, but it is also more stable against temporary hiccups. + trienodeHealRateMeasurementImpact = 0.005 + + // minTrienodeHealThrottle is the minimum divisor for throttling trie node + // heal requests to avoid overloading the local node and exessively expanding + // the state trie bedth wise. + minTrienodeHealThrottle = 1 + + // maxTrienodeHealThrottle is the maximum divisor for throttling trie node + // heal requests to avoid overloading the local node and exessively expanding + // the state trie bedth wise. + maxTrienodeHealThrottle = maxTrieRequestCount + + // trienodeHealThrottleIncrease is the multiplier for the throttle when the + // rate of arriving data is higher than the rate of processing it. + trienodeHealThrottleIncrease = 1.33 + + // trienodeHealThrottleDecrease is the divisor for the throttle when the + // rate of arriving data is lower than the rate of processing it. + trienodeHealThrottleDecrease = 1.25 ) var ( @@ -432,6 +457,11 @@ type Syncer struct { trienodeHealReqs map[uint64]*trienodeHealRequest // Trie node requests currently running bytecodeHealReqs map[uint64]*bytecodeHealRequest // Bytecode requests currently running + trienodeHealRate float64 // Average heal rate for processing trie node data + trienodeHealPend uint64 // Number of trie nodes currently pending for processing + trienodeHealThrottle float64 // Divisor for throttling the amount of trienode heal data requested + trienodeHealThrottled time.Time // Timestamp the last time the throttle was updated + trienodeHealSynced uint64 // Number of state trie nodes downloaded trienodeHealBytes common.StorageSize // Number of state trie bytes persisted to disk trienodeHealDups uint64 // Number of state trie nodes already processed @@ -477,9 +507,10 @@ func NewSyncer(db ethdb.KeyValueStore) *Syncer { trienodeHealIdlers: make(map[string]struct{}), bytecodeHealIdlers: make(map[string]struct{}), - trienodeHealReqs: make(map[uint64]*trienodeHealRequest), - bytecodeHealReqs: make(map[uint64]*bytecodeHealRequest), - stateWriter: db.NewBatch(), + trienodeHealReqs: make(map[uint64]*trienodeHealRequest), + bytecodeHealReqs: make(map[uint64]*bytecodeHealRequest), + trienodeHealThrottle: maxTrienodeHealThrottle, // Tune downward instead of insta-filling with junk + stateWriter: db.NewBatch(), extProgress: new(SyncProgress), } @@ -1324,6 +1355,10 @@ func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fai if cap > maxTrieRequestCount { cap = maxTrieRequestCount } + cap = int(float64(cap) / s.trienodeHealThrottle) + if cap <= 0 { + cap = 1 + } var ( hashes = make([]common.Hash, 0, cap) paths = make([]trie.SyncPath, 0, cap) @@ -2094,6 +2129,10 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { // processTrienodeHealResponse integrates an already validated trienode response // into the healer tasks. func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { + var ( + start = time.Now() + fills int + ) for i, hash := range res.hashes { node := res.nodes[i] @@ -2102,6 +2141,8 @@ func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { res.task.trieTasks[hash] = res.paths[i] continue } + fills++ + // Push the trie node into the state syncer s.trienodeHealSynced++ s.trienodeHealBytes += common.StorageSize(len(node)) @@ -2125,6 +2166,50 @@ func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { log.Crit("Failed to persist healing data", "err", err) } log.Debug("Persisted set of healing data", "type", "trienodes", "bytes", common.StorageSize(batch.ValueSize())) + + // Calculate the processing rate of one filled trie node + rate := float64(fills) / (float64(time.Since(start)) / float64(time.Second)) + + // Update the currently measured trienode queueing and processing throughput. + // + // The processing rate needs to be updated uniformly independent if we've + // processed 1x100 trie nodes or 100x1 to keep the rate consistent even in + // the face of varying network packets. As such, we cannot just measure the + // time it took to process N trie nodes and update once, we need one update + // per trie node. + // + // Naively, that would be: + // + // for i:=0; i time.Second { + // Periodically adjust the trie node throttler + if float64(pending) > 2*s.trienodeHealRate { + s.trienodeHealThrottle *= trienodeHealThrottleIncrease + } else { + s.trienodeHealThrottle /= trienodeHealThrottleDecrease + } + if s.trienodeHealThrottle > maxTrienodeHealThrottle { + s.trienodeHealThrottle = maxTrienodeHealThrottle + } else if s.trienodeHealThrottle < minTrienodeHealThrottle { + s.trienodeHealThrottle = minTrienodeHealThrottle + } + s.trienodeHealThrottled = time.Now() + + log.Debug("Updated trie node heal throttler", "rate", s.trienodeHealRate, "pending", pending, "throttle", s.trienodeHealThrottle) + } } // processBytecodeHealResponse integrates an already validated bytecode response @@ -2659,10 +2744,12 @@ func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error // Cross reference the requested trienodes with the response to find gaps // that the serving node is missing - hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState) - hash := make([]byte, 32) - - nodes := make([][]byte, len(req.hashes)) + var ( + hasher = sha3.NewLegacyKeccak256().(crypto.KeccakState) + hash = make([]byte, 32) + nodes = make([][]byte, len(req.hashes)) + fills uint64 + ) for i, j := 0, 0; i < len(trienodes); i++ { // Find the next hash that we've been served, leaving misses with nils hasher.Reset() @@ -2674,16 +2761,22 @@ func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error } if j < len(req.hashes) { nodes[j] = trienodes[i] + fills++ j++ continue } // We've either ran out of hashes, or got unrequested data logger.Warn("Unexpected healing trienodes", "count", len(trienodes)-i) + // Signal this request as failed, and ready for rescheduling s.scheduleRevertTrienodeHealRequest(req) return errors.New("unexpected healing trienode") } // Response validated, send it to the scheduler for filling + atomic.AddUint64(&s.trienodeHealPend, fills) + defer func() { + atomic.AddUint64(&s.trienodeHealPend, ^(fills - 1)) + }() response := &trienodeHealResponse{ task: req.task, hashes: req.hashes,