From 3626644a6422ef3f8a39cf9450e1f7f9ec56d08a Mon Sep 17 00:00:00 2001 From: achingbrain Date: Tue, 8 Aug 2023 17:17:39 +0100 Subject: [PATCH 1/5] feat: add reconnect threshold for peers that we have failed to dial When auto-dialing peers, if we have failed to dial them recently there's little point in redialing them. Adds a `autoDialPeerRetryThreshold` which is a value in ms. If we have attempted to dial a peer and that dial attempt failed but we are under our min connection count, do not auto dial the peer within the retry threshold. Defaults to 10 minutes. --- .../src/connection-manager/auto-dial.ts | 30 +++++++++-- .../src/connection-manager/constants.ts | 15 ++++++ .../src/connection-manager/dial-queue.ts | 19 ++++++- .../libp2p/src/connection-manager/index.ts | 6 +++ .../test/connection-manager/auto-dial.spec.ts | 54 +++++++++++++++++++ .../test/connection-manager/direct.spec.ts | 13 +++++ 6 files changed, 131 insertions(+), 6 deletions(-) diff --git a/packages/libp2p/src/connection-manager/auto-dial.ts b/packages/libp2p/src/connection-manager/auto-dial.ts index c7357652dd..a2114ccf93 100644 --- a/packages/libp2p/src/connection-manager/auto-dial.ts +++ b/packages/libp2p/src/connection-manager/auto-dial.ts @@ -1,7 +1,8 @@ import { logger } from '@libp2p/logger' import { PeerMap, PeerSet } from '@libp2p/peer-collections' +import { toString as uint8ArrayToString } from 'uint8arrays/to-string' import { PeerJobQueue } from '../utils/peer-job-queue.js' -import { AUTO_DIAL_CONCURRENCY, AUTO_DIAL_INTERVAL, AUTO_DIAL_MAX_QUEUE_LENGTH, AUTO_DIAL_PRIORITY, MIN_CONNECTIONS } from './constants.js' +import { AUTO_DIAL_CONCURRENCY, AUTO_DIAL_INTERVAL, AUTO_DIAL_MAX_QUEUE_LENGTH, AUTO_DIAL_PEER_RETRY_THRESHOLD, AUTO_DIAL_PRIORITY, LAST_DIAL_FAILURE_KEY, MIN_CONNECTIONS } from './constants.js' import type { Libp2pEvents } from '@libp2p/interface' import type { EventEmitter } from '@libp2p/interface/events' import type { PeerStore } from '@libp2p/interface/peer-store' @@ -16,6 +17,7 @@ interface AutoDialInit { autoDialConcurrency?: number autoDialPriority?: number autoDialInterval?: number + autoDialPeerRetryThreshold?: number } interface AutoDialComponents { @@ -29,7 +31,8 @@ const defaultOptions = { maxQueueLength: AUTO_DIAL_MAX_QUEUE_LENGTH, autoDialConcurrency: AUTO_DIAL_CONCURRENCY, autoDialPriority: AUTO_DIAL_PRIORITY, - autoDialInterval: AUTO_DIAL_INTERVAL + autoDialInterval: AUTO_DIAL_INTERVAL, + autoDialPeerRetryThreshold: AUTO_DIAL_PEER_RETRY_THRESHOLD } export class AutoDial implements Startable { @@ -40,6 +43,7 @@ export class AutoDial implements Startable { private readonly autoDialPriority: number private readonly autoDialIntervalMs: number private readonly autoDialMaxQueueLength: number + private readonly autoDialPeerRetryThresholdMs: number private autoDialInterval?: ReturnType private started: boolean private running: boolean @@ -56,6 +60,7 @@ export class AutoDial implements Startable { this.autoDialPriority = init.autoDialPriority ?? defaultOptions.autoDialPriority this.autoDialIntervalMs = init.autoDialInterval ?? defaultOptions.autoDialInterval this.autoDialMaxQueueLength = init.maxQueueLength ?? defaultOptions.maxQueueLength + this.autoDialPeerRetryThresholdMs = init.autoDialPeerRetryThreshold ?? defaultOptions.autoDialPeerRetryThreshold this.started = false this.running = false this.queue = new PeerJobQueue({ @@ -207,9 +212,26 @@ export class AutoDial implements Startable { return 0 }) - log('selected %d/%d peers to dial', sortedPeers.length, peers.length) + const peersThatHaveNotFailed = sortedPeers.filter(peer => { + const lastDialFailure = peer.metadata.get(LAST_DIAL_FAILURE_KEY) - for (const peer of sortedPeers) { + if (lastDialFailure == null) { + return true + } + + const when = parseInt(uint8ArrayToString(lastDialFailure)) + + if (isNaN(when)) { + return true + } + + // only dial if the time since the last failure is above the retry threshold + return Date.now() - when > this.autoDialPeerRetryThresholdMs + }) + + log('selected %d/%d peers to dial', peersThatHaveNotFailed.length, peers.length) + + for (const peer of peersThatHaveNotFailed) { this.queue.add(async () => { const numConnections = this.connectionManager.getConnectionsMap().size diff --git a/packages/libp2p/src/connection-manager/constants.ts b/packages/libp2p/src/connection-manager/constants.ts index 6d0ffb8c9d..d8680e5b49 100644 --- a/packages/libp2p/src/connection-manager/constants.ts +++ b/packages/libp2p/src/connection-manager/constants.ts @@ -53,6 +53,11 @@ export const AUTO_DIAL_PRIORITY = 0 */ export const AUTO_DIAL_MAX_QUEUE_LENGTH = 100 +/** + * @see https://libp2p.github.io/js-libp2p/interfaces/libp2p.index.unknown.ConnectionManagerInit.html#autoDialPeerRetryThreshold + */ +export const AUTO_DIAL_PEER_RETRY_THRESHOLD = 1000 * 60 * 10 + /** * @see https://libp2p.github.io/js-libp2p/interfaces/index._internal_.ConnectionManagerConfig.html#inboundConnectionThreshold */ @@ -62,3 +67,13 @@ export const INBOUND_CONNECTION_THRESHOLD = 5 * @see https://libp2p.github.io/js-libp2p/interfaces/index._internal_.ConnectionManagerConfig.html#maxIncomingPendingConnections */ export const MAX_INCOMING_PENDING_CONNECTIONS = 10 + +/** + * Store as part of the peer store metadata for a given peer, the value for this + * key is a stringified number representing the timestamp of the last time a + * dial attempted failed with the relevant peer. + * + * Used to insure we do not endlessly try to auto dial peers we have recently + * failed to dial. + */ +export const LAST_DIAL_FAILURE_KEY = 'last-dial-failure' diff --git a/packages/libp2p/src/connection-manager/dial-queue.ts b/packages/libp2p/src/connection-manager/dial-queue.ts index fb540bb33e..e529e6f8cd 100644 --- a/packages/libp2p/src/connection-manager/dial-queue.ts +++ b/packages/libp2p/src/connection-manager/dial-queue.ts @@ -7,13 +7,15 @@ import { dnsaddrResolver } from '@multiformats/multiaddr/resolvers' import { type ClearableSignal, anySignal } from 'any-signal' import pDefer from 'p-defer' import PQueue from 'p-queue' +import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string' import { codes } from '../errors.js' import { getPeerAddress } from '../get-peer.js' import { DIAL_TIMEOUT, MAX_PARALLEL_DIALS_PER_PEER, MAX_PARALLEL_DIALS, - MAX_PEER_ADDRS_TO_DIAL + MAX_PEER_ADDRS_TO_DIAL, + LAST_DIAL_FAILURE_KEY } from './constants.js' import { combineSignals, resolveMultiaddrs } from './utils.js' import type { AddressSorter, AbortOptions, PendingDial } from '@libp2p/interface' @@ -230,9 +232,22 @@ export class DialQueue { // clean up abort signals/controllers signal.clear() }) - .catch(err => { + .catch(async err => { log.error('dial failed to %s', pendingDial.multiaddrs.map(ma => ma.toString()).join(', '), err) + if (peerId != null) { + // record the last failed dial + try { + await this.peerStore.patch(peerId, { + metadata: { + [LAST_DIAL_FAILURE_KEY]: uint8ArrayFromString(Date.now().toString()) + } + }) + } catch (err: any) { + log.error('could not update last dial failure key for %p', peerId, err) + } + } + // Error is a timeout if (signal.aborted) { const error = new CodeError(err.message, codes.ERR_TIMEOUT) diff --git a/packages/libp2p/src/connection-manager/index.ts b/packages/libp2p/src/connection-manager/index.ts index a73ff85a75..0728796434 100644 --- a/packages/libp2p/src/connection-manager/index.ts +++ b/packages/libp2p/src/connection-manager/index.ts @@ -66,6 +66,12 @@ export interface ConnectionManagerInit { */ autoDialMaxQueueLength?: number + /** + * When we've failed to dial a peer, do not autodial them again within this + * number of ms. (default: 10 minutes) + */ + autoDialPeerRetryThreshold?: number + /** * Sort the known addresses of a peer before trying to dial, By default public * addresses will be dialled before private (e.g. loopback or LAN) addresses. diff --git a/packages/libp2p/test/connection-manager/auto-dial.spec.ts b/packages/libp2p/test/connection-manager/auto-dial.spec.ts index 377e7bd38c..08b7c23466 100644 --- a/packages/libp2p/test/connection-manager/auto-dial.spec.ts +++ b/packages/libp2p/test/connection-manager/auto-dial.spec.ts @@ -11,7 +11,9 @@ import delay from 'delay' import pWaitFor from 'p-wait-for' import Sinon from 'sinon' import { stubInterface } from 'sinon-ts' +import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string' import { AutoDial } from '../../src/connection-manager/auto-dial.js' +import { LAST_DIAL_FAILURE_KEY } from '../../src/connection-manager/constants.js' import { matchPeerId } from '../fixtures/match-peer-id.js' import type { Libp2pEvents } from '@libp2p/interface' import type { Connection } from '@libp2p/interface/connection' @@ -224,4 +226,56 @@ describe('auto-dial', () => { // should only have queried peer store once expect(peerStoreAllSpy.callCount).to.equal(1) }) + + it('should not re-dial peers we have recently failed to dial', async () => { + const peerWithAddress: Peer = { + id: await createEd25519PeerId(), + protocols: [], + addresses: [{ + multiaddr: multiaddr('/ip4/127.0.0.1/tcp/4001'), + isCertified: true + }], + metadata: new Map(), + tags: new Map() + } + const undialablePeer: Peer = { + id: await createEd25519PeerId(), + protocols: [], + addresses: [{ + multiaddr: multiaddr('/ip4/127.0.0.1/tcp/4002'), + isCertified: true + }], + // we failed to dial them recently + metadata: new Map([[LAST_DIAL_FAILURE_KEY, uint8ArrayFromString(`${Date.now() - 10}`)]]), + tags: new Map() + } + + await peerStore.save(peerWithAddress.id, peerWithAddress) + await peerStore.save(undialablePeer.id, undialablePeer) + + const connectionManager = stubInterface({ + getConnectionsMap: new PeerMap(), + getDialQueue: [] + }) + + autoDialler = new AutoDial({ + peerStore, + connectionManager, + events + }, { + minConnections: 10, + autoDialInterval: 10000 + }) + autoDialler.start() + void autoDialler.autoDial() + + await pWaitFor(() => { + return connectionManager.openConnection.callCount === 1 + }) + await delay(1000) + + expect(connectionManager.openConnection.callCount).to.equal(1) + expect(connectionManager.openConnection.calledWith(matchPeerId(peerWithAddress.id))).to.be.true() + expect(connectionManager.openConnection.calledWith(matchPeerId(undialablePeer.id))).to.be.false() + }) }) diff --git a/packages/libp2p/test/connection-manager/direct.spec.ts b/packages/libp2p/test/connection-manager/direct.spec.ts index 5a5f52bebe..38dd18e156 100644 --- a/packages/libp2p/test/connection-manager/direct.spec.ts +++ b/packages/libp2p/test/connection-manager/direct.spec.ts @@ -20,6 +20,7 @@ import { pEvent } from 'p-event' import sinon from 'sinon' import { stubInterface } from 'sinon-ts' import { defaultComponents, type Components } from '../../src/components.js' +import { LAST_DIAL_FAILURE_KEY } from '../../src/connection-manager/constants.js' import { DefaultConnectionManager } from '../../src/connection-manager/index.js' import { codes as ErrorCodes } from '../../src/errors.js' import { type IdentifyService, identifyService } from '../../src/identify/index.js' @@ -104,6 +105,18 @@ describe('dialing (direct, WebSockets)', () => { .and.to.have.nested.property('.code', ErrorCodes.ERR_NO_VALID_ADDRESSES) }) + it('should mark a peer as having recently failed to connect', async () => { + connectionManager = new DefaultConnectionManager(localComponents) + await connectionManager.start() + + await expect(connectionManager.openConnection(multiaddr(`/ip4/127.0.0.1/tcp/12984/ws/p2p/${remoteComponents.peerId.toString()}`))) + .to.eventually.be.rejected() + + const peer = await localComponents.peerStore.get(remoteComponents.peerId) + + expect(peer.metadata.has(LAST_DIAL_FAILURE_KEY)).to.be.true() + }) + it('should be able to connect to a given peer', async () => { connectionManager = new DefaultConnectionManager(localComponents) await connectionManager.start() From 9b59ccc65c6639f8f9c7de3a51abe54a954fbb7a Mon Sep 17 00:00:00 2001 From: achingbrain Date: Tue, 15 Aug 2023 13:43:42 +0100 Subject: [PATCH 2/5] chore: lower the default value --- packages/libp2p/src/connection-manager/constants.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/libp2p/src/connection-manager/constants.ts b/packages/libp2p/src/connection-manager/constants.ts index d8680e5b49..3b8b7d1146 100644 --- a/packages/libp2p/src/connection-manager/constants.ts +++ b/packages/libp2p/src/connection-manager/constants.ts @@ -56,7 +56,7 @@ export const AUTO_DIAL_MAX_QUEUE_LENGTH = 100 /** * @see https://libp2p.github.io/js-libp2p/interfaces/libp2p.index.unknown.ConnectionManagerInit.html#autoDialPeerRetryThreshold */ -export const AUTO_DIAL_PEER_RETRY_THRESHOLD = 1000 * 60 * 10 +export const AUTO_DIAL_PEER_RETRY_THRESHOLD = 1000 * 60 /** * @see https://libp2p.github.io/js-libp2p/interfaces/index._internal_.ConnectionManagerConfig.html#inboundConnectionThreshold From dea67af5c3d47cf04b7394ce526c45d934c95a6d Mon Sep 17 00:00:00 2001 From: achingbrain Date: Tue, 15 Aug 2023 13:55:28 +0100 Subject: [PATCH 3/5] chore: update test to actually test retry threshold --- .../test/connection-manager/auto-dial.spec.ts | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/packages/libp2p/test/connection-manager/auto-dial.spec.ts b/packages/libp2p/test/connection-manager/auto-dial.spec.ts index 08b7c23466..3c5968bdf1 100644 --- a/packages/libp2p/test/connection-manager/auto-dial.spec.ts +++ b/packages/libp2p/test/connection-manager/auto-dial.spec.ts @@ -264,18 +264,31 @@ describe('auto-dial', () => { events }, { minConnections: 10, - autoDialInterval: 10000 + autoDialPeerRetryThreshold: 2000 }) autoDialler.start() + void autoDialler.autoDial() await pWaitFor(() => { return connectionManager.openConnection.callCount === 1 }) - await delay(1000) expect(connectionManager.openConnection.callCount).to.equal(1) expect(connectionManager.openConnection.calledWith(matchPeerId(peerWithAddress.id))).to.be.true() expect(connectionManager.openConnection.calledWith(matchPeerId(undialablePeer.id))).to.be.false() + + // pass the retry threshold + await delay(2000) + + // autodial again + void autoDialler.autoDial() + + await pWaitFor(() => { + return connectionManager.openConnection.callCount === 3 + }) + + // should have retried the unreachable peer + expect(connectionManager.openConnection.calledWith(matchPeerId(undialablePeer.id))).to.be.true() }) }) From 8285e9e55d47b33edbbed2ecf7d19c25ca7bc927 Mon Sep 17 00:00:00 2001 From: achingbrain Date: Tue, 15 Aug 2023 17:24:15 +0100 Subject: [PATCH 4/5] chore: pr comments --- packages/libp2p/src/connection-manager/auto-dial.ts | 6 +++--- .../libp2p/src/connection-manager/constants.defaults.ts | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/libp2p/src/connection-manager/auto-dial.ts b/packages/libp2p/src/connection-manager/auto-dial.ts index a2114ccf93..783f93a44c 100644 --- a/packages/libp2p/src/connection-manager/auto-dial.ts +++ b/packages/libp2p/src/connection-manager/auto-dial.ts @@ -219,14 +219,14 @@ export class AutoDial implements Startable { return true } - const when = parseInt(uint8ArrayToString(lastDialFailure)) + const lastDialFailureTimestamp = parseInt(uint8ArrayToString(lastDialFailure)) - if (isNaN(when)) { + if (isNaN(lastDialFailureTimestamp)) { return true } // only dial if the time since the last failure is above the retry threshold - return Date.now() - when > this.autoDialPeerRetryThresholdMs + return Date.now() - lastDialFailureTimestamp > this.autoDialPeerRetryThresholdMs }) log('selected %d/%d peers to dial', peersThatHaveNotFailed.length, peers.length) diff --git a/packages/libp2p/src/connection-manager/constants.defaults.ts b/packages/libp2p/src/connection-manager/constants.defaults.ts index 700444775e..60e7e5c430 100644 --- a/packages/libp2p/src/connection-manager/constants.defaults.ts +++ b/packages/libp2p/src/connection-manager/constants.defaults.ts @@ -50,8 +50,8 @@ export const MAX_INCOMING_PENDING_CONNECTIONS = 10 /** * Store as part of the peer store metadata for a given peer, the value for this - * key is a stringified number representing the timestamp of the last time a - * dial attempted failed with the relevant peer. + * key is a timestamp of the last time a dial attempted failed with the relevant + * peer stored as a string. * * Used to insure we do not endlessly try to auto dial peers we have recently * failed to dial. From e65724fbb937722c01f47b0c1c5cc303461bc587 Mon Sep 17 00:00:00 2001 From: achingbrain Date: Tue, 15 Aug 2023 17:46:19 +0100 Subject: [PATCH 5/5] chore: update comment --- packages/libp2p/src/connection-manager/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/libp2p/src/connection-manager/index.ts b/packages/libp2p/src/connection-manager/index.ts index 0728796434..75c193072d 100644 --- a/packages/libp2p/src/connection-manager/index.ts +++ b/packages/libp2p/src/connection-manager/index.ts @@ -68,7 +68,7 @@ export interface ConnectionManagerInit { /** * When we've failed to dial a peer, do not autodial them again within this - * number of ms. (default: 10 minutes) + * number of ms. (default: 1 minute) */ autoDialPeerRetryThreshold?: number