From c8162be36878bf44431be76af5a6d214dc6606e2 Mon Sep 17 00:00:00 2001 From: Dheeraj Manjunath Date: Fri, 19 Aug 2022 14:40:21 +0000 Subject: [PATCH 1/7] New env var + actually set healthy status --- .../components/healthCheck/healthCheckComponentService.js | 4 +++- creator-node/src/config.js | 6 ++++++ .../src/services/stateMachineManager/CNodeHealthManager.js | 7 +++++++ libs/src/services/creatorNode/CreatorNodeSelection.ts | 5 ++++- 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/creator-node/src/components/healthCheck/healthCheckComponentService.js b/creator-node/src/components/healthCheck/healthCheckComponentService.js index a4ec2736036..fd193ba052e 100644 --- a/creator-node/src/components/healthCheck/healthCheckComponentService.js +++ b/creator-node/src/components/healthCheck/healthCheckComponentService.js @@ -130,9 +130,11 @@ const healthCheck = async ( solDelegatePublicKeyBase58 = solDelegateKeyPair.publicKey.toBase58() } catch (_) {} + const healthy = !config.get('considerNodeUnhealthy') + const response = { ...versionInfo, - healthy: true, + healthy, git: process.env.GIT_SHA, selectedDiscoveryProvider: 'none', creatorNodeEndpoint: config.get('creatorNodeEndpoint'), diff --git a/creator-node/src/config.js b/creator-node/src/config.js index 2e442c655b2..8e13f4c3fa1 100644 --- a/creator-node/src/config.js +++ b/creator-node/src/config.js @@ -445,6 +445,12 @@ const config = convict({ env: 'cidWhitelist', default: '' }, + considerNodeUnhealthy: { + doc: 'Flag to mark the node as unhealthy (health_check will 200 but healthy: false in response). Wont be selected in replica sets, other nodes will roll this node off replica sets for their users', + format: Boolean, + env: 'considerNodeUnhealthy', + default: false + }, /** sync / snapback configs */ diff --git a/creator-node/src/services/stateMachineManager/CNodeHealthManager.js b/creator-node/src/services/stateMachineManager/CNodeHealthManager.js index c86ea83117e..2ff38a1f6c8 100644 --- a/creator-node/src/services/stateMachineManager/CNodeHealthManager.js +++ b/creator-node/src/services/stateMachineManager/CNodeHealthManager.js @@ -132,6 +132,13 @@ class CNodeHealthManager { */ determinePeerHealth(verboseHealthCheckResp) { // Check for sufficient minimum storage size + const { healthy } = verboseHealthCheckResp + if (!healthy) { + throw new Error( + `Node health check returned healthy: false` + ) + } + const { storagePathSize, storagePathUsed } = verboseHealthCheckResp if ( storagePathSize && diff --git a/libs/src/services/creatorNode/CreatorNodeSelection.ts b/libs/src/services/creatorNode/CreatorNodeSelection.ts index 81735f9bed8..aefef8fafb6 100644 --- a/libs/src/services/creatorNode/CreatorNodeSelection.ts +++ b/libs/src/services/creatorNode/CreatorNodeSelection.ts @@ -387,6 +387,9 @@ export class CreatorNodeSelection extends ServiceSelection { this.currentVersion as string, resp.response.data.data.version ) + + const { healthy: isHealthyStatus } = resp.response.data.data + const { storagePathSize, storagePathUsed, maxStorageUsedPercent } = resp.response.data.data if (maxStorageUsedPercent) { @@ -400,7 +403,7 @@ export class CreatorNodeSelection extends ServiceSelection { storagePathSize, storagePathUsed ) - isHealthy = isUp && versionIsUpToDate && hasEnoughStorage + isHealthy = isUp && versionIsUpToDate && hasEnoughStorage && isHealthyStatus } if (!isHealthy) { From ef33537cab235e62ec7a6c795a3dddc05f02d6bf Mon Sep 17 00:00:00 2001 From: Dheeraj Manjunath Date: Fri, 19 Aug 2022 14:47:21 +0000 Subject: [PATCH 2/7] Lint --- .../src/services/stateMachineManager/CNodeHealthManager.js | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/creator-node/src/services/stateMachineManager/CNodeHealthManager.js b/creator-node/src/services/stateMachineManager/CNodeHealthManager.js index 2ff38a1f6c8..aa12296b3f0 100644 --- a/creator-node/src/services/stateMachineManager/CNodeHealthManager.js +++ b/creator-node/src/services/stateMachineManager/CNodeHealthManager.js @@ -134,11 +134,9 @@ class CNodeHealthManager { // Check for sufficient minimum storage size const { healthy } = verboseHealthCheckResp if (!healthy) { - throw new Error( - `Node health check returned healthy: false` - ) + throw new Error(`Node health check returned healthy: false`) } - + const { storagePathSize, storagePathUsed } = verboseHealthCheckResp if ( storagePathSize && From 5eb1c4d779cf9a9006e7a9d1cec561b901b91c51 Mon Sep 17 00:00:00 2001 From: Dheeraj Manjunath Date: Wed, 24 Aug 2022 18:26:16 +0000 Subject: [PATCH 3/7] Test for new env var --- .../healthCheckComponentService.test.js | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/creator-node/src/components/healthCheck/healthCheckComponentService.test.js b/creator-node/src/components/healthCheck/healthCheckComponentService.test.js index 94c490b422f..e7934ed0e9f 100644 --- a/creator-node/src/components/healthCheck/healthCheckComponentService.test.js +++ b/creator-node/src/components/healthCheck/healthCheckComponentService.test.js @@ -677,4 +677,26 @@ describe('Test Health Check Verbose', function () { assert.deepStrictEqual(verboseRes, defaultRes) }) + + it('Should check that considerNodeUnhealthy env var returns healthy false', async function () { + config.set('considerNodeUnhealthy', true) + + const defaultRes = await healthCheck( + { + libs: libsMock, + snapbackSM: snapbackSMMock, + asyncProcessingQueue: AsyncProcessingQueueMock(0, 2), + trustedNotifierManager: trustedNotifierManagerMock + }, + mockLogger, + sequelizeMock, + getMonitorsMock, + TranscodingQueueMock(4, 0).getTranscodeQueueJobs, + TranscodingQueueMock(4, 0).isAvailable, + AsyncProcessingQueueMock(0, 2).getAsyncProcessingQueueJobs, + 2 + ) + + assert.deepStrictEqual(defaultRes.healthy, false) + }) }) From 942fdce2277d04ae5c34d15e558f20b531bc29d8 Mon Sep 17 00:00:00 2001 From: Dheeraj Manjunath Date: Wed, 24 Aug 2022 19:12:32 +0000 Subject: [PATCH 4/7] Test for creator node selection --- .../creatorNode/CreatorNodeSelection.test.ts | 35 +++++++++++++++++++ .../creatorNode/CreatorNodeSelection.ts | 3 +- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/libs/src/services/creatorNode/CreatorNodeSelection.test.ts b/libs/src/services/creatorNode/CreatorNodeSelection.test.ts index a173292fea8..6ab2bb8f226 100644 --- a/libs/src/services/creatorNode/CreatorNodeSelection.test.ts +++ b/libs/src/services/creatorNode/CreatorNodeSelection.test.ts @@ -148,6 +148,41 @@ describe('test CreatorNodeSelection', () => { assert(returnedHealthyServices.size === 0) }) + it('Does not select healthy: false node even if status code is 200', async function () { + const healthy = 'https://healthy.audius.co' + nock(healthy) + .get('/health_check/verbose') + .reply(200, { data: defaultHealthCheckData }) + + const healthyButSlow = 'https://healthybutslow.audius.co' + nock(healthyButSlow) + .get('/health_check/verbose') + .delay(100) + .reply(200, { data: defaultHealthCheckData }) + + const notHealthyButReturns200 = 'https://notHealthyButReturns200.audius.co' + nock(notHealthyButReturns200) + .get('/health_check/verbose') + .delay(200) + .reply(200, { data: { ...defaultHealthCheckData, healthy: false } }) + + const cns = new CreatorNodeSelection({ + creatorNode: mockCreatorNode, + numberOfNodes: 3, + ethContracts: mockEthContracts( + [healthy, healthyButSlow, notHealthyButReturns200], + '1.2.3' + ), + blacklist: null + }) + + const { primary, secondaries, services } = await cns.select() + console.log('services', services) + assert.deepStrictEqual(primary, healthy) + assert.deepStrictEqual(secondaries.length, 1) + assert.deepStrictEqual(secondaries[0], healthyButSlow) + }) + it('select healthy nodes as the primary and secondary, and do not select unhealthy nodes', async () => { const upToDate = 'https://upToDate.audius.co' nock(upToDate) diff --git a/libs/src/services/creatorNode/CreatorNodeSelection.ts b/libs/src/services/creatorNode/CreatorNodeSelection.ts index aefef8fafb6..90d8b26a629 100644 --- a/libs/src/services/creatorNode/CreatorNodeSelection.ts +++ b/libs/src/services/creatorNode/CreatorNodeSelection.ts @@ -403,7 +403,8 @@ export class CreatorNodeSelection extends ServiceSelection { storagePathSize, storagePathUsed ) - isHealthy = isUp && versionIsUpToDate && hasEnoughStorage && isHealthyStatus + isHealthy = + isUp && versionIsUpToDate && hasEnoughStorage && isHealthyStatus } if (!isHealthy) { From 215cff1c58a89059b52d5cf0f338dfea6e6ba0b6 Mon Sep 17 00:00:00 2001 From: Dheeraj Manjunath Date: Wed, 24 Aug 2022 22:51:46 +0000 Subject: [PATCH 5/7] Move healthy check for isNodeHealthy --- .../stateMachineManager/CNodeHealthManager.js | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/creator-node/src/services/stateMachineManager/CNodeHealthManager.js b/creator-node/src/services/stateMachineManager/CNodeHealthManager.js index 7951e76efe5..2b131c6ee4d 100644 --- a/creator-node/src/services/stateMachineManager/CNodeHealthManager.js +++ b/creator-node/src/services/stateMachineManager/CNodeHealthManager.js @@ -88,6 +88,11 @@ class CNodeHealthManager { async isNodeHealthy(peer, performSimpleCheck = false) { try { const verboseHealthCheckResp = await this.queryVerboseHealthCheck(peer) + // if node returns healthy: false consider that unhealthy just like non-200 response + const { healthy } = verboseHealthCheckResp + if (!healthy) { + throw new Error(`Node health check returned healthy: false`) + } if (!performSimpleCheck) { this.determinePeerHealth(verboseHealthCheckResp) } @@ -133,11 +138,6 @@ class CNodeHealthManager { */ determinePeerHealth(verboseHealthCheckResp) { // Check for sufficient minimum storage size - const { healthy } = verboseHealthCheckResp - if (!healthy) { - throw new Error(`Node health check returned healthy: false`) - } - const { storagePathSize, storagePathUsed } = verboseHealthCheckResp if ( storagePathSize && @@ -235,6 +235,7 @@ class CNodeHealthManager { */ async isPrimaryHealthy(primary) { const isHealthy = await this.isNodeHealthy(primary, true) + logger.info(`primary isHealthy ${isHealthy}`) if (!isHealthy) { const failedTimestamp = From f2c8f5ccfc0bc0437e14768267e3805fc1a401e2 Mon Sep 17 00:00:00 2001 From: Dheeraj Manjunath Date: Thu, 25 Aug 2022 20:12:41 +0000 Subject: [PATCH 6/7] Test fix --- .../services/stateMachineManager/CNodeHealthManager.js | 1 - creator-node/test/CNodeHealthManager.test.js | 9 ++++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/creator-node/src/services/stateMachineManager/CNodeHealthManager.js b/creator-node/src/services/stateMachineManager/CNodeHealthManager.js index 2b131c6ee4d..939c6dc152e 100644 --- a/creator-node/src/services/stateMachineManager/CNodeHealthManager.js +++ b/creator-node/src/services/stateMachineManager/CNodeHealthManager.js @@ -235,7 +235,6 @@ class CNodeHealthManager { */ async isPrimaryHealthy(primary) { const isHealthy = await this.isNodeHealthy(primary, true) - logger.info(`primary isHealthy ${isHealthy}`) if (!isHealthy) { const failedTimestamp = diff --git a/creator-node/test/CNodeHealthManager.test.js b/creator-node/test/CNodeHealthManager.test.js index 7e67f3848db..cb9a8504780 100644 --- a/creator-node/test/CNodeHealthManager.test.js +++ b/creator-node/test/CNodeHealthManager.test.js @@ -174,9 +174,10 @@ describe('test CNodeHealthManager -- isNodeHealthy()', function () { ) }) - it('returns false when determinePeerHealth throws with performSimpleCheck=false', async function () { + it.only('returns false when determinePeerHealth throws with performSimpleCheck=false', async function () { // Stub functions that isNodeHealthy() will call const node = 'http://some_content_node.co' + const isNodeHealthyError = new Error('Node health check returned healthy: false') const determinePeerHealthError = new Error('test determinePeerHealthError') const verboseHealthCheckResp = { healthy: false } const queryVerboseHealthCheckStub = sandbox @@ -191,11 +192,9 @@ describe('test CNodeHealthManager -- isNodeHealthy()', function () { const isHealthy = await CNodeHealthManager.isNodeHealthy(node, false) expect(isHealthy).to.be.false expect(queryVerboseHealthCheckStub).to.have.been.calledOnceWithExactly(node) - expect(determinePeerHealthStub).to.have.been.calledOnceWithExactly( - verboseHealthCheckResp - ) + expect(determinePeerHealthStub).to.not.have.been.called.called expect(logErrorStub).to.have.been.called.calledOnceWithExactly( - `isNodeHealthy() peer=${node} is unhealthy: ${determinePeerHealthError.toString()}` + `isNodeHealthy() peer=${node} is unhealthy: ${isNodeHealthyError.toString()}` ) }) From 1eb521ab6e6241819fd1b48c76eb4dd89940cc8e Mon Sep 17 00:00:00 2001 From: Dheeraj Manjunath Date: Thu, 25 Aug 2022 20:19:10 +0000 Subject: [PATCH 7/7] only --- creator-node/test/CNodeHealthManager.test.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/creator-node/test/CNodeHealthManager.test.js b/creator-node/test/CNodeHealthManager.test.js index cb9a8504780..b71dc72779a 100644 --- a/creator-node/test/CNodeHealthManager.test.js +++ b/creator-node/test/CNodeHealthManager.test.js @@ -174,7 +174,7 @@ describe('test CNodeHealthManager -- isNodeHealthy()', function () { ) }) - it.only('returns false when determinePeerHealth throws with performSimpleCheck=false', async function () { + it('returns false when determinePeerHealth throws with performSimpleCheck=false', async function () { // Stub functions that isNodeHealthy() will call const node = 'http://some_content_node.co' const isNodeHealthyError = new Error('Node health check returned healthy: false')