Skip to content

Commit 4ef9c79

Browse files
authored
feat(libp2p): add autodial retry threshold config option (#1943)
When auto-dialing peers, if we have failed to dial them recently there's little point in redialing them as it's probably just going to fail again which consumes a slot in the dial queue and other resources. Adds a `autoDialPeerRetryThreshold` config key to the connection manager which is a value in ms. If we have attempted to dial a peer and that dial attempt failed but we are under our min connection count, do not auto dial the peer within the retry threshold. Defaults to 1 minute. Closes #1899
1 parent 87dc7e9 commit 4ef9c79

File tree

6 files changed

+144
-6
lines changed

6 files changed

+144
-6
lines changed

packages/libp2p/src/connection-manager/auto-dial.ts

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import { logger } from '@libp2p/logger'
22
import { PeerMap, PeerSet } from '@libp2p/peer-collections'
3+
import { toString as uint8ArrayToString } from 'uint8arrays/to-string'
34
import { PeerJobQueue } from '../utils/peer-job-queue.js'
4-
import { AUTO_DIAL_CONCURRENCY, AUTO_DIAL_INTERVAL, AUTO_DIAL_MAX_QUEUE_LENGTH, AUTO_DIAL_PRIORITY, MIN_CONNECTIONS } from './constants.js'
5+
import { AUTO_DIAL_CONCURRENCY, AUTO_DIAL_INTERVAL, AUTO_DIAL_MAX_QUEUE_LENGTH, AUTO_DIAL_PEER_RETRY_THRESHOLD, AUTO_DIAL_PRIORITY, LAST_DIAL_FAILURE_KEY, MIN_CONNECTIONS } from './constants.js'
56
import type { Libp2pEvents } from '@libp2p/interface'
67
import type { EventEmitter } from '@libp2p/interface/events'
78
import type { PeerStore } from '@libp2p/interface/peer-store'
@@ -16,6 +17,7 @@ interface AutoDialInit {
1617
autoDialConcurrency?: number
1718
autoDialPriority?: number
1819
autoDialInterval?: number
20+
autoDialPeerRetryThreshold?: number
1921
}
2022

2123
interface AutoDialComponents {
@@ -29,7 +31,8 @@ const defaultOptions = {
2931
maxQueueLength: AUTO_DIAL_MAX_QUEUE_LENGTH,
3032
autoDialConcurrency: AUTO_DIAL_CONCURRENCY,
3133
autoDialPriority: AUTO_DIAL_PRIORITY,
32-
autoDialInterval: AUTO_DIAL_INTERVAL
34+
autoDialInterval: AUTO_DIAL_INTERVAL,
35+
autoDialPeerRetryThreshold: AUTO_DIAL_PEER_RETRY_THRESHOLD
3336
}
3437

3538
export class AutoDial implements Startable {
@@ -40,6 +43,7 @@ export class AutoDial implements Startable {
4043
private readonly autoDialPriority: number
4144
private readonly autoDialIntervalMs: number
4245
private readonly autoDialMaxQueueLength: number
46+
private readonly autoDialPeerRetryThresholdMs: number
4347
private autoDialInterval?: ReturnType<typeof setInterval>
4448
private started: boolean
4549
private running: boolean
@@ -56,6 +60,7 @@ export class AutoDial implements Startable {
5660
this.autoDialPriority = init.autoDialPriority ?? defaultOptions.autoDialPriority
5761
this.autoDialIntervalMs = init.autoDialInterval ?? defaultOptions.autoDialInterval
5862
this.autoDialMaxQueueLength = init.maxQueueLength ?? defaultOptions.maxQueueLength
63+
this.autoDialPeerRetryThresholdMs = init.autoDialPeerRetryThreshold ?? defaultOptions.autoDialPeerRetryThreshold
5964
this.started = false
6065
this.running = false
6166
this.queue = new PeerJobQueue({
@@ -207,9 +212,26 @@ export class AutoDial implements Startable {
207212
return 0
208213
})
209214

210-
log('selected %d/%d peers to dial', sortedPeers.length, peers.length)
215+
const peersThatHaveNotFailed = sortedPeers.filter(peer => {
216+
const lastDialFailure = peer.metadata.get(LAST_DIAL_FAILURE_KEY)
211217

212-
for (const peer of sortedPeers) {
218+
if (lastDialFailure == null) {
219+
return true
220+
}
221+
222+
const lastDialFailureTimestamp = parseInt(uint8ArrayToString(lastDialFailure))
223+
224+
if (isNaN(lastDialFailureTimestamp)) {
225+
return true
226+
}
227+
228+
// only dial if the time since the last failure is above the retry threshold
229+
return Date.now() - lastDialFailureTimestamp > this.autoDialPeerRetryThresholdMs
230+
})
231+
232+
log('selected %d/%d peers to dial', peersThatHaveNotFailed.length, peers.length)
233+
234+
for (const peer of peersThatHaveNotFailed) {
213235
this.queue.add(async () => {
214236
const numConnections = this.connectionManager.getConnectionsMap().size
215237

packages/libp2p/src/connection-manager/constants.defaults.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ export const AUTO_DIAL_PRIORITY = 0
3333
*/
3434
export const AUTO_DIAL_MAX_QUEUE_LENGTH = 100
3535

36+
/**
37+
* @see https://libp2p.github.io/js-libp2p/interfaces/libp2p.index.unknown.ConnectionManagerInit.html#autoDialPeerRetryThreshold
38+
*/
39+
export const AUTO_DIAL_PEER_RETRY_THRESHOLD = 1000 * 60
40+
3641
/**
3742
* @see https://libp2p.github.io/js-libp2p/interfaces/index._internal_.ConnectionManagerConfig.html#inboundConnectionThreshold
3843
*/
@@ -42,3 +47,13 @@ export const INBOUND_CONNECTION_THRESHOLD = 5
4247
* @see https://libp2p.github.io/js-libp2p/interfaces/index._internal_.ConnectionManagerConfig.html#maxIncomingPendingConnections
4348
*/
4449
export const MAX_INCOMING_PENDING_CONNECTIONS = 10
50+
51+
/**
52+
* Store as part of the peer store metadata for a given peer, the value for this
53+
* key is a timestamp of the last time a dial attempted failed with the relevant
54+
* peer stored as a string.
55+
*
56+
* Used to insure we do not endlessly try to auto dial peers we have recently
57+
* failed to dial.
58+
*/
59+
export const LAST_DIAL_FAILURE_KEY = 'last-dial-failure'

packages/libp2p/src/connection-manager/dial-queue.ts

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@ import { dnsaddrResolver } from '@multiformats/multiaddr/resolvers'
77
import { type ClearableSignal, anySignal } from 'any-signal'
88
import pDefer from 'p-defer'
99
import PQueue from 'p-queue'
10+
import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string'
1011
import { codes } from '../errors.js'
1112
import { getPeerAddress } from '../get-peer.js'
1213
import {
1314
DIAL_TIMEOUT,
1415
MAX_PARALLEL_DIALS_PER_PEER,
1516
MAX_PARALLEL_DIALS,
16-
MAX_PEER_ADDRS_TO_DIAL
17+
MAX_PEER_ADDRS_TO_DIAL,
18+
LAST_DIAL_FAILURE_KEY
1719
} from './constants.js'
1820
import { combineSignals, resolveMultiaddrs } from './utils.js'
1921
import type { AddressSorter, AbortOptions, PendingDial } from '@libp2p/interface'
@@ -230,9 +232,22 @@ export class DialQueue {
230232
// clean up abort signals/controllers
231233
signal.clear()
232234
})
233-
.catch(err => {
235+
.catch(async err => {
234236
log.error('dial failed to %s', pendingDial.multiaddrs.map(ma => ma.toString()).join(', '), err)
235237

238+
if (peerId != null) {
239+
// record the last failed dial
240+
try {
241+
await this.peerStore.patch(peerId, {
242+
metadata: {
243+
[LAST_DIAL_FAILURE_KEY]: uint8ArrayFromString(Date.now().toString())
244+
}
245+
})
246+
} catch (err: any) {
247+
log.error('could not update last dial failure key for %p', peerId, err)
248+
}
249+
}
250+
236251
// Error is a timeout
237252
if (signal.aborted) {
238253
const error = new CodeError(err.message, codes.ERR_TIMEOUT)

packages/libp2p/src/connection-manager/index.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@ export interface ConnectionManagerInit {
6666
*/
6767
autoDialMaxQueueLength?: number
6868

69+
/**
70+
* When we've failed to dial a peer, do not autodial them again within this
71+
* number of ms. (default: 1 minute)
72+
*/
73+
autoDialPeerRetryThreshold?: number
74+
6975
/**
7076
* Sort the known addresses of a peer before trying to dial, By default public
7177
* addresses will be dialled before private (e.g. loopback or LAN) addresses.

packages/libp2p/test/connection-manager/auto-dial.spec.ts

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ import delay from 'delay'
1111
import pWaitFor from 'p-wait-for'
1212
import Sinon from 'sinon'
1313
import { stubInterface } from 'sinon-ts'
14+
import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string'
1415
import { AutoDial } from '../../src/connection-manager/auto-dial.js'
16+
import { LAST_DIAL_FAILURE_KEY } from '../../src/connection-manager/constants.js'
1517
import { matchPeerId } from '../fixtures/match-peer-id.js'
1618
import type { Libp2pEvents } from '@libp2p/interface'
1719
import type { Connection } from '@libp2p/interface/connection'
@@ -224,4 +226,69 @@ describe('auto-dial', () => {
224226
// should only have queried peer store once
225227
expect(peerStoreAllSpy.callCount).to.equal(1)
226228
})
229+
230+
it('should not re-dial peers we have recently failed to dial', async () => {
231+
const peerWithAddress: Peer = {
232+
id: await createEd25519PeerId(),
233+
protocols: [],
234+
addresses: [{
235+
multiaddr: multiaddr('/ip4/127.0.0.1/tcp/4001'),
236+
isCertified: true
237+
}],
238+
metadata: new Map(),
239+
tags: new Map()
240+
}
241+
const undialablePeer: Peer = {
242+
id: await createEd25519PeerId(),
243+
protocols: [],
244+
addresses: [{
245+
multiaddr: multiaddr('/ip4/127.0.0.1/tcp/4002'),
246+
isCertified: true
247+
}],
248+
// we failed to dial them recently
249+
metadata: new Map([[LAST_DIAL_FAILURE_KEY, uint8ArrayFromString(`${Date.now() - 10}`)]]),
250+
tags: new Map()
251+
}
252+
253+
await peerStore.save(peerWithAddress.id, peerWithAddress)
254+
await peerStore.save(undialablePeer.id, undialablePeer)
255+
256+
const connectionManager = stubInterface<ConnectionManager>({
257+
getConnectionsMap: new PeerMap(),
258+
getDialQueue: []
259+
})
260+
261+
autoDialler = new AutoDial({
262+
peerStore,
263+
connectionManager,
264+
events
265+
}, {
266+
minConnections: 10,
267+
autoDialPeerRetryThreshold: 2000
268+
})
269+
autoDialler.start()
270+
271+
void autoDialler.autoDial()
272+
273+
await pWaitFor(() => {
274+
return connectionManager.openConnection.callCount === 1
275+
})
276+
277+
expect(connectionManager.openConnection.callCount).to.equal(1)
278+
expect(connectionManager.openConnection.calledWith(matchPeerId(peerWithAddress.id))).to.be.true()
279+
expect(connectionManager.openConnection.calledWith(matchPeerId(undialablePeer.id))).to.be.false()
280+
281+
// pass the retry threshold
282+
await delay(2000)
283+
284+
// autodial again
285+
void autoDialler.autoDial()
286+
287+
await pWaitFor(() => {
288+
return connectionManager.openConnection.callCount === 3
289+
})
290+
291+
// should have retried the unreachable peer
292+
expect(connectionManager.openConnection.calledWith(matchPeerId(undialablePeer.id))).to.be.true()
293+
})
227294
})

packages/libp2p/test/connection-manager/direct.spec.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import { pEvent } from 'p-event'
2020
import sinon from 'sinon'
2121
import { stubInterface } from 'sinon-ts'
2222
import { defaultComponents, type Components } from '../../src/components.js'
23+
import { LAST_DIAL_FAILURE_KEY } from '../../src/connection-manager/constants.js'
2324
import { DefaultConnectionManager } from '../../src/connection-manager/index.js'
2425
import { codes as ErrorCodes } from '../../src/errors.js'
2526
import { type IdentifyService, identifyService } from '../../src/identify/index.js'
@@ -104,6 +105,18 @@ describe('dialing (direct, WebSockets)', () => {
104105
.and.to.have.nested.property('.code', ErrorCodes.ERR_NO_VALID_ADDRESSES)
105106
})
106107

108+
it('should mark a peer as having recently failed to connect', async () => {
109+
connectionManager = new DefaultConnectionManager(localComponents)
110+
await connectionManager.start()
111+
112+
await expect(connectionManager.openConnection(multiaddr(`/ip4/127.0.0.1/tcp/12984/ws/p2p/${remoteComponents.peerId.toString()}`)))
113+
.to.eventually.be.rejected()
114+
115+
const peer = await localComponents.peerStore.get(remoteComponents.peerId)
116+
117+
expect(peer.metadata.has(LAST_DIAL_FAILURE_KEY)).to.be.true()
118+
})
119+
107120
it('should be able to connect to a given peer', async () => {
108121
connectionManager = new DefaultConnectionManager(localComponents)
109122
await connectionManager.start()

0 commit comments

Comments
 (0)