Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New "soft unhealthy" env var + actually set healthy status from content node #3730

Merged
merged 10 commits into from
Aug 25, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,11 @@ const healthCheck = async (
solDelegatePublicKeyBase58 = solDelegateKeyPair.publicKey.toBase58()
} catch (_) {}

const healthy = !config.get('considerNodeUnhealthy')

const response = {
...versionInfo,
healthy: true,
healthy,
git: process.env.GIT_SHA,
selectedDiscoveryProvider: 'none',
creatorNodeEndpoint: config.get('creatorNodeEndpoint'),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -677,4 +677,26 @@ describe('Test Health Check Verbose', function () {

assert.deepStrictEqual(verboseRes, defaultRes)
})

it('Should check that considerNodeUnhealthy env var returns healthy false', async function () {
config.set('considerNodeUnhealthy', true)

const defaultRes = await healthCheck(
{
libs: libsMock,
snapbackSM: snapbackSMMock,
asyncProcessingQueue: AsyncProcessingQueueMock(0, 2),
trustedNotifierManager: trustedNotifierManagerMock
},
mockLogger,
sequelizeMock,
getMonitorsMock,
TranscodingQueueMock(4, 0).getTranscodeQueueJobs,
TranscodingQueueMock(4, 0).isAvailable,
AsyncProcessingQueueMock(0, 2).getAsyncProcessingQueueJobs,
2
)

assert.deepStrictEqual(defaultRes.healthy, false)
})
})
6 changes: 6 additions & 0 deletions creator-node/src/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,12 @@ const config = convict({
env: 'cidWhitelist',
default: ''
},
considerNodeUnhealthy: {
doc: 'Flag to mark the node as unhealthy (health_check will 200 but healthy: false in response). Wont be selected in replica sets, other nodes will roll this node off replica sets for their users',
format: Boolean,
env: 'considerNodeUnhealthy',
default: false
},

/** sync / snapback configs */

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ class CNodeHealthManager {
async isNodeHealthy(peer, performSimpleCheck = false) {
try {
const verboseHealthCheckResp = await this.queryVerboseHealthCheck(peer)
// if node returns healthy: false consider that unhealthy just like non-200 response
const { healthy } = verboseHealthCheckResp
if (!healthy) {
throw new Error(`Node health check returned healthy: false`)
}
if (!performSimpleCheck) {
this.determinePeerHealth(verboseHealthCheckResp)
}
Expand Down
7 changes: 3 additions & 4 deletions creator-node/test/CNodeHealthManager.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ describe('test CNodeHealthManager -- isNodeHealthy()', function () {
it('returns false when determinePeerHealth throws with performSimpleCheck=false', async function () {
// Stub functions that isNodeHealthy() will call
const node = 'http://some_content_node.co'
const isNodeHealthyError = new Error('Node health check returned healthy: false')
const determinePeerHealthError = new Error('test determinePeerHealthError')
const verboseHealthCheckResp = { healthy: false }
const queryVerboseHealthCheckStub = sandbox
Expand All @@ -191,11 +192,9 @@ describe('test CNodeHealthManager -- isNodeHealthy()', function () {
const isHealthy = await CNodeHealthManager.isNodeHealthy(node, false)
expect(isHealthy).to.be.false
expect(queryVerboseHealthCheckStub).to.have.been.calledOnceWithExactly(node)
expect(determinePeerHealthStub).to.have.been.calledOnceWithExactly(
verboseHealthCheckResp
)
expect(determinePeerHealthStub).to.not.have.been.called.called
expect(logErrorStub).to.have.been.called.calledOnceWithExactly(
`isNodeHealthy() peer=${node} is unhealthy: ${determinePeerHealthError.toString()}`
`isNodeHealthy() peer=${node} is unhealthy: ${isNodeHealthyError.toString()}`
)
})

Expand Down
35 changes: 35 additions & 0 deletions libs/src/services/creatorNode/CreatorNodeSelection.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,41 @@ describe('test CreatorNodeSelection', () => {
assert(returnedHealthyServices.size === 0)
})

it('Does not select healthy: false node even if status code is 200', async function () {
const healthy = 'https://healthy.audius.co'
nock(healthy)
.get('/health_check/verbose')
.reply(200, { data: defaultHealthCheckData })

const healthyButSlow = 'https://healthybutslow.audius.co'
nock(healthyButSlow)
.get('/health_check/verbose')
.delay(100)
.reply(200, { data: defaultHealthCheckData })

const notHealthyButReturns200 = 'https://notHealthyButReturns200.audius.co'
nock(notHealthyButReturns200)
.get('/health_check/verbose')
.delay(200)
.reply(200, { data: { ...defaultHealthCheckData, healthy: false } })

const cns = new CreatorNodeSelection({
creatorNode: mockCreatorNode,
numberOfNodes: 3,
ethContracts: mockEthContracts(
[healthy, healthyButSlow, notHealthyButReturns200],
'1.2.3'
),
blacklist: null
})

const { primary, secondaries, services } = await cns.select()
console.log('services', services)
assert.deepStrictEqual(primary, healthy)
assert.deepStrictEqual(secondaries.length, 1)
assert.deepStrictEqual(secondaries[0], healthyButSlow)
})

it('select healthy nodes as the primary and secondary, and do not select unhealthy nodes', async () => {
const upToDate = 'https://upToDate.audius.co'
nock(upToDate)
Expand Down
6 changes: 5 additions & 1 deletion libs/src/services/creatorNode/CreatorNodeSelection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,9 @@ export class CreatorNodeSelection extends ServiceSelection {
this.currentVersion as string,
resp.response.data.data.version
)

const { healthy: isHealthyStatus } = resp.response.data.data

const { storagePathSize, storagePathUsed, maxStorageUsedPercent } =
resp.response.data.data
if (maxStorageUsedPercent) {
Expand All @@ -400,7 +403,8 @@ export class CreatorNodeSelection extends ServiceSelection {
storagePathSize,
storagePathUsed
)
isHealthy = isUp && versionIsUpToDate && hasEnoughStorage
isHealthy =
isUp && versionIsUpToDate && hasEnoughStorage && isHealthyStatus
}

if (!isHealthy) {
Expand Down