diff --git a/creator-node/src/services/stateMachineManager/stateMachineConstants.ts b/creator-node/src/services/stateMachineManager/stateMachineConstants.ts index 53025ef0d04..757e5d08104 100644 --- a/creator-node/src/services/stateMachineManager/stateMachineConstants.ts +++ b/creator-node/src/services/stateMachineManager/stateMachineConstants.ts @@ -67,7 +67,7 @@ export const QUEUE_NAMES = { // Queue to find replica set updates FIND_REPLICA_SET_UPDATES: 'find-replica-set-updates-queue', // Queue that only processes jobs to fetch the cNodeEndpoint->spId mapping, - FETCH_C_NODE_ENDPOINT_TO_SP_ID_MAP: 'c-node-to-endpoint-sp-id-map-queue', + FETCH_C_NODE_ENDPOINT_TO_SP_ID_MAP: 'c-node-endpoint-to-sp-id-map-queue', // Queue to issue a manual sync MANUAL_SYNC: 'manual-sync-queue', // Queue to issue a recurring sync diff --git a/creator-node/src/services/stateMachineManager/stateMonitoring/findReplicaSetUpdates.jobProcessor.ts b/creator-node/src/services/stateMachineManager/stateMonitoring/findReplicaSetUpdates.jobProcessor.ts index d8e23a586b6..10abd70f01d 100644 --- a/creator-node/src/services/stateMachineManager/stateMonitoring/findReplicaSetUpdates.jobProcessor.ts +++ b/creator-node/src/services/stateMachineManager/stateMonitoring/findReplicaSetUpdates.jobProcessor.ts @@ -183,6 +183,20 @@ const _findReplicaSetUpdatesForUser = async ( secondary2SpID } = user + // If the user was on an old client (pre-URSM), they could have a null replica set on URSM. + // This will be resolved by client-side sanity checks next time they use the client. + // Any replica set update we issue here will fail because they have no primary SP ID that can be verified from chain. + if ( + primarySpID === null && + secondary1SpID === null && + secondary1SpID === null + ) { + logger.error( + `User ${wallet} has null SP IDs for their entire replica set. Replica set endpoints: [${primary},${secondary1},${secondary2}]` + ) + return requiredUpdateReplicaSetOps + } + /** * If this node is primary for user, check both secondaries for health * Enqueue SyncRequests against healthy secondaries, and enqueue UpdateReplicaSetOps against unhealthy secondaries @@ -267,6 +281,17 @@ const _findReplicaSetUpdatesForUser = async ( ContentNodeInfoManager.getCNodeEndpointToSpIdMap()[replica.endpoint] !== replica.spId ) { + logger.error( + `_findReplicaSetUpdatesForUser(): Replica ${ + replica.endpoint + } for user ${wallet} mismatched spID. Expected ${ + replica.spId + }, found ${ + ContentNodeInfoManager.getCNodeEndpointToSpIdMap()[replica.endpoint] + }. Marking replica as unhealthy. Endpoint to spID mapping: ${JSON.stringify( + ContentNodeInfoManager.getCNodeEndpointToSpIdMap() + )}` + ) unhealthyReplicas.add(replica.endpoint) } else if (unhealthyPeersSet.has(replica.endpoint)) { // Else, continue with conducting extra health check if the current observed node is a primary, and @@ -280,6 +305,9 @@ const _findReplicaSetUpdatesForUser = async ( } if (addToUnhealthyReplicas) { + logger.error( + `_findReplicaSetUpdatesForUser(): Replica ${replica.endpoint} for user ${wallet} was already marked unhealthy and failed an additional health check if it was primary.` + ) unhealthyReplicas.add(replica.endpoint) } } diff --git a/creator-node/src/services/stateMachineManager/stateMonitoring/monitorState.jobProcessor.ts b/creator-node/src/services/stateMachineManager/stateMonitoring/monitorState.jobProcessor.ts index 1ce2c4b8367..5b1ae5ccd3b 100644 --- a/creator-node/src/services/stateMachineManager/stateMonitoring/monitorState.jobProcessor.ts +++ b/creator-node/src/services/stateMachineManager/stateMonitoring/monitorState.jobProcessor.ts @@ -152,7 +152,11 @@ module.exports = async function ({ _addToDecisionTree( decisionTree, 'retrieveUserInfoFromReplicaSet Success', - logger + logger, + { + newUnhealthyPeerSetLength: retrieveUserInfoResp.unhealthyPeers.size, + newUnhealthyPeers: Array.from(retrieveUserInfoResp.unhealthyPeers) + } ) } catch (e: any) { logger.error(e.stack)