Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixing multinode state transition logic by register polling subscription in rpc client #14534

Merged
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .changeset/moody-rules-agree.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
"chainlink": patch
---

- register polling subscription to avoid subscription leaking when rpc client gets closed.
- add a temporary special treatment for SubscribeNewHead before we replace it with SubscribeToHeads. Add a goroutine that forwards new head from poller to caller channel.
- fix a deadlock in poller, by using a new lock for subs slice in rpc client.
#bugfix
47 changes: 42 additions & 5 deletions core/chains/evm/client/rpc_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ type rpcClient struct {
ws rawclient
http *rawclient

stateMu sync.RWMutex // protects state* fields
stateMu sync.RWMutex // protects state* fields
subsSliceMu sync.RWMutex // protects subscription slice
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a deadlock was caught by my new unit tests, and this fixes it.


// Need to track subscriptions because closing the RPC does not (always?)
// close the underlying subscription
Expand Down Expand Up @@ -317,8 +318,8 @@ func (r *rpcClient) getRPCDomain() string {

// registerSub adds the sub to the rpcClient list
func (r *rpcClient) registerSub(sub ethereum.Subscription, stopInFLightCh chan struct{}) error {
r.stateMu.Lock()
defer r.stateMu.Unlock()
r.subsSliceMu.Lock()
defer r.subsSliceMu.Unlock()
// ensure that the `sub` belongs to current life cycle of the `rpcClient` and it should not be killed due to
// previous `DisconnectAll` call.
select {
Expand All @@ -335,12 +336,16 @@ func (r *rpcClient) registerSub(sub ethereum.Subscription, stopInFLightCh chan s
// DisconnectAll disconnects all clients connected to the rpcClient
func (r *rpcClient) DisconnectAll() {
r.stateMu.Lock()
defer r.stateMu.Unlock()
if r.ws.rpc != nil {
r.ws.rpc.Close()
}
r.cancelInflightRequests()
r.stateMu.Unlock()

r.subsSliceMu.Lock()
r.unsubscribeAll()
r.subsSliceMu.Unlock()

r.chainInfoLock.Lock()
r.latestChainInfo = commonclient.ChainInfo{}
r.chainInfoLock.Unlock()
Expand Down Expand Up @@ -496,11 +501,30 @@ func (r *rpcClient) SubscribeNewHead(ctx context.Context, channel chan<- *evmtyp
if r.newHeadsPollInterval > 0 {
interval := r.newHeadsPollInterval
timeout := interval
poller, _ := commonclient.NewPoller[*evmtypes.Head](interval, r.latestBlock, timeout, r.rpcLog)
poller, pollerCh := commonclient.NewPoller[*evmtypes.Head](interval, r.latestBlock, timeout, r.rpcLog)
if err = poller.Start(ctx); err != nil {
return nil, err
}

// NOTE this is a temporary special treatment for SubscribeNewHead before we refactor head tracker to use SubscribeToHeads
// as we need to forward new head from the poller channel to the channel passed from caller.
go func() {
for head := range pollerCh {
select {
case channel <- head:
// forwarding new head to the channel passed from caller
case <-poller.Err():
// return as poller returns error
return
}
}
}()

err = r.registerSub(&poller, chStopInFlight)
if err != nil {
return nil, err
}

lggr.Debugf("Polling new heads over http")
return &poller, nil
}
Expand Down Expand Up @@ -547,6 +571,11 @@ func (r *rpcClient) SubscribeToHeads(ctx context.Context) (ch <-chan *evmtypes.H
return nil, nil, err
}

err = r.registerSub(&poller, chStopInFlight)
if err != nil {
return nil, nil, err
}

lggr.Debugf("Polling new heads over http")
return channel, &poller, nil
}
Expand Down Expand Up @@ -579,6 +608,8 @@ func (r *rpcClient) SubscribeToHeads(ctx context.Context) (ch <-chan *evmtypes.H
}

func (r *rpcClient) SubscribeToFinalizedHeads(ctx context.Context) (<-chan *evmtypes.Head, commontypes.Subscription, error) {
ctx, cancel, chStopInFlight, _, _ := r.acquireQueryCtx(ctx, r.rpcTimeout)
defer cancel()
interval := r.finalizedBlockPollInterval
if interval == 0 {
return nil, nil, errors.New("FinalizedBlockPollInterval is 0")
Expand All @@ -588,6 +619,12 @@ func (r *rpcClient) SubscribeToFinalizedHeads(ctx context.Context) (<-chan *evmt
if err := poller.Start(ctx); err != nil {
return nil, nil, err
}

err := r.registerSub(&poller, chStopInFlight)
if err != nil {
return nil, nil, err
}

return channel, &poller, nil
}

Expand Down
107 changes: 107 additions & 0 deletions core/chains/evm/client/rpc_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,113 @@ func TestRPCClient_SubscribeNewHead(t *testing.T) {
require.ErrorContains(t, err, "RPCClient returned error (rpc)")
tests.AssertLogEventually(t, observed, "evmclient.Client#EthSubscribe RPC call failure")
})
t.Run("Closed rpc client should remove existing SubscribeNewHead subscription with WS", func(t *testing.T) {
huangzhen1997 marked this conversation as resolved.
Show resolved Hide resolved
server := testutils.NewWSServer(t, chainId, serverCallBack)
wsURL := server.WSURL()

rpc := client.NewRPCClient(lggr, *wsURL, nil, "rpc", 1, chainId, commonclient.Primary, 0, 0, commonclient.QueryTimeout, commonclient.QueryTimeout, "")
huangzhen1997 marked this conversation as resolved.
Show resolved Hide resolved
require.NoError(t, rpc.Dial(ctx))

ch := make(chan *evmtypes.Head)
sub, err := rpc.SubscribeNewHead(tests.Context(t), ch)
require.NoError(t, err)
errCh := sub.Err()
require.Equal(t, int32(1), rpc.SubscribersCount())
rpc.DisconnectAll()
_, ok := <-errCh
require.False(t, ok)
require.NoError(t, rpc.Dial(ctx))
require.Equal(t, int32(0), rpc.SubscribersCount())
})
t.Run("Closed rpc client should remove existing SubscribeNewHead subscription with HTTP polling", func(t *testing.T) {
server := testutils.NewWSServer(t, chainId, serverCallBack)
wsURL := server.WSURL()

rpc := client.NewRPCClient(lggr, *wsURL, &url.URL{}, "rpc", 1, chainId, commonclient.Primary, 0, 1, commonclient.QueryTimeout, commonclient.QueryTimeout, "")
require.NoError(t, rpc.Dial(ctx))

ch := make(chan *evmtypes.Head)
sub, err := rpc.SubscribeNewHead(tests.Context(t), ch)
require.NoError(t, err)
errCh := sub.Err()

// ensure sub exists
require.Equal(t, int32(1), rpc.SubscribersCount())
rpc.DisconnectAll()

// ensure sub is closed
_, ok := <-errCh
require.False(t, ok)

require.NoError(t, rpc.Dial(ctx))
require.Equal(t, int32(0), rpc.SubscribersCount())
})
t.Run("Closed rpc client should remove existing SubscribeToHeads subscription with WS", func(t *testing.T) {
server := testutils.NewWSServer(t, chainId, serverCallBack)
wsURL := server.WSURL()

rpc := client.NewRPCClient(lggr, *wsURL, nil, "rpc", 1, chainId, commonclient.Primary, 0, 0, commonclient.QueryTimeout, commonclient.QueryTimeout, "")
require.NoError(t, rpc.Dial(ctx))

_, sub, err := rpc.SubscribeToHeads(tests.Context(t))
require.NoError(t, err)
errCh := sub.Err()

// ensure sub exists
require.Equal(t, int32(1), rpc.SubscribersCount())
rpc.DisconnectAll()

// ensure sub is closed
_, ok := <-errCh
require.False(t, ok)

require.NoError(t, rpc.Dial(ctx))
require.Equal(t, int32(0), rpc.SubscribersCount())
})
t.Run("Closed rpc client should remove existing SubscribeToHeads subscription with HTTP polling", func(t *testing.T) {
server := testutils.NewWSServer(t, chainId, serverCallBack)
wsURL := server.WSURL()

rpc := client.NewRPCClient(lggr, *wsURL, &url.URL{}, "rpc", 1, chainId, commonclient.Primary, 0, 1, commonclient.QueryTimeout, commonclient.QueryTimeout, "")
require.NoError(t, rpc.Dial(ctx))

_, sub, err := rpc.SubscribeToHeads(tests.Context(t))
require.NoError(t, err)
errCh := sub.Err()

// ensure sub exists
require.Equal(t, int32(1), rpc.SubscribersCount())
rpc.DisconnectAll()

// ensure sub is closed
_, ok := <-errCh
require.False(t, ok)

require.NoError(t, rpc.Dial(ctx))
require.Equal(t, int32(0), rpc.SubscribersCount())
})
t.Run("Closed rpc client should remove existing SubscribeToFinalizedHeads subscription", func(t *testing.T) {
server := testutils.NewWSServer(t, chainId, serverCallBack)
wsURL := server.WSURL()

rpc := client.NewRPCClient(lggr, *wsURL, &url.URL{}, "rpc", 1, chainId, commonclient.Primary, 1, 0, commonclient.QueryTimeout, commonclient.QueryTimeout, "")
require.NoError(t, rpc.Dial(ctx))

_, sub, err := rpc.SubscribeToFinalizedHeads(tests.Context(t))
require.NoError(t, err)
errCh := sub.Err()

// ensure sub exists
require.Equal(t, int32(1), rpc.SubscribersCount())
rpc.DisconnectAll()

// ensure sub is closed
_, ok := <-errCh
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you use

select {
<-errCh: // ok
default: fail
}

To return more explicit error instead of tests timeout

require.False(t, ok)

require.NoError(t, rpc.Dial(ctx))
require.Equal(t, int32(0), rpc.SubscribersCount())
})
t.Run("Subscription error is properly wrapper", func(t *testing.T) {
server := testutils.NewWSServer(t, chainId, serverCallBack)
wsURL := server.WSURL()
Expand Down
Loading