From 2f5ddc713774e863eef5c708e4f666b3a60d81ef Mon Sep 17 00:00:00 2001 From: Michael Lumish Date: Thu, 19 Oct 2023 13:57:57 -0700 Subject: [PATCH 1/2] grpc-js: pick_first: fix happy eyeballs and reresolution in sticky TF mode --- packages/grpc-js/package.json | 2 +- .../grpc-js/src/load-balancer-pick-first.ts | 30 +++++++++++++++---- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/packages/grpc-js/package.json b/packages/grpc-js/package.json index 9d66b4095..29e4b5849 100644 --- a/packages/grpc-js/package.json +++ b/packages/grpc-js/package.json @@ -1,6 +1,6 @@ { "name": "@grpc/grpc-js", - "version": "1.9.6", + "version": "1.9.7", "description": "gRPC Library for Node - pure JS implementation", "homepage": "https://grpc.io/", "repository": "https://github.com/grpc/grpc-node/tree/master/packages/grpc-js", diff --git a/packages/grpc-js/src/load-balancer-pick-first.ts b/packages/grpc-js/src/load-balancer-pick-first.ts index 1a22b3f76..5e8361ace 100644 --- a/packages/grpc-js/src/load-balancer-pick-first.ts +++ b/packages/grpc-js/src/load-balancer-pick-first.ts @@ -174,6 +174,12 @@ export class PickFirstLoadBalancer implements LoadBalancer { */ private stickyTransientFailureMode = false; + /** + * Indicates whether we called channelControlHelper.requestReresolution since + * the last call to updateAddressList + */ + private requestedResolutionSinceLastUpdate = false; + /** * The most recent error reported by any subchannel as it transitioned to * TRANSIENT_FAILURE. @@ -216,15 +222,28 @@ export class PickFirstLoadBalancer implements LoadBalancer { } } + private requestReresolution() { + this.requestedResolutionSinceLastUpdate = true; + this.channelControlHelper.requestReresolution(); + } + private maybeEnterStickyTransientFailureMode() { - if (this.stickyTransientFailureMode) { + if (!this.allChildrenHaveReportedTF()) { return; } - if (!this.allChildrenHaveReportedTF()) { + if (!this.requestedResolutionSinceLastUpdate) { + /* Each time we get an update we reset each subchannel's + * hasReportedTransientFailure flag, so the next time we get to this + * point after that, each subchannel has reported TRANSIENT_FAILURE + * at least once since then. That is the trigger for requesting + * reresolution, whether or not the LB policy is already in sticky TF + * mode. */ + this.requestReresolution(); + } + if (this.stickyTransientFailureMode) { return; } this.stickyTransientFailureMode = true; - this.channelControlHelper.requestReresolution(); for (const { subchannel } of this.children) { subchannel.startConnecting(); } @@ -256,7 +275,7 @@ export class PickFirstLoadBalancer implements LoadBalancer { if (newState !== ConnectivityState.READY) { this.removeCurrentPick(); this.calculateAndReportNewState(); - this.channelControlHelper.requestReresolution(); + this.requestReresolution(); } return; } @@ -283,7 +302,7 @@ export class PickFirstLoadBalancer implements LoadBalancer { private startNextSubchannelConnecting(startIndex: number) { clearTimeout(this.connectionDelayTimeout); - if (this.triedAllSubchannels || this.stickyTransientFailureMode) { + if (this.triedAllSubchannels) { return; } for (const [index, child] of this.children.entries()) { @@ -382,6 +401,7 @@ export class PickFirstLoadBalancer implements LoadBalancer { this.currentSubchannelIndex = 0; this.children = []; this.triedAllSubchannels = false; + this.requestedResolutionSinceLastUpdate = false; } updateAddressList( From d465f839d46d708ca93a06956181a7e27b4e7ba0 Mon Sep 17 00:00:00 2001 From: Michael Lumish Date: Thu, 19 Oct 2023 16:20:04 -0700 Subject: [PATCH 2/2] Add pick_first requestReresolution tests --- packages/grpc-js/test/test-pick-first.ts | 90 +++++++++++++++++++++++- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/packages/grpc-js/test/test-pick-first.ts b/packages/grpc-js/test/test-pick-first.ts index e9e9e5601..a018e7a2d 100644 --- a/packages/grpc-js/test/test-pick-first.ts +++ b/packages/grpc-js/test/test-pick-first.ts @@ -41,7 +41,11 @@ function updateStateCallBackForExpectedStateSequence( ) { const actualStateSequence: ConnectivityState[] = []; let lastPicker: Picker | null = null; + let finished = false; return (connectivityState: ConnectivityState, picker: Picker) => { + if (finished) { + return; + } // Ignore duplicate state transitions if ( connectivityState === actualStateSequence[actualStateSequence.length - 1] @@ -60,6 +64,7 @@ function updateStateCallBackForExpectedStateSequence( if ( expectedStateSequence[actualStateSequence.length] !== connectivityState ) { + finished = true; done( new Error( `Unexpected state ${ @@ -69,10 +74,12 @@ function updateStateCallBackForExpectedStateSequence( )}]` ) ); + return; } actualStateSequence.push(connectivityState); lastPicker = picker; if (actualStateSequence.length === expectedStateSequence.length) { + finished = true; done(); } }; @@ -90,7 +97,7 @@ describe('Shuffler', () => { }); }); -describe('pick_first load balancing policy', () => { +describe.only('pick_first load balancing policy', () => { const config = new PickFirstLoadBalancingConfig(false); let subchannels: MockSubchannel[] = []; const baseChannelControlHelper: ChannelControlHelper = { @@ -462,6 +469,87 @@ describe('pick_first load balancing policy', () => { }); }); }); + it('Should request reresolution every time each child reports TF', done => { + let reresolutionRequestCount = 0; + const targetReresolutionRequestCount = 3; + const currentStartState = ConnectivityState.IDLE; + const channelControlHelper = createChildChannelControlHelper( + baseChannelControlHelper, + { + createSubchannel: (subchannelAddress, subchannelArgs) => { + const subchannel = new MockSubchannel( + subchannelAddressToString(subchannelAddress), + currentStartState + ); + subchannels.push(subchannel); + return subchannel; + }, + updateState: updateStateCallBackForExpectedStateSequence( + [ConnectivityState.CONNECTING, ConnectivityState.TRANSIENT_FAILURE], + err => setImmediate(() => { + assert.strictEqual(reresolutionRequestCount, targetReresolutionRequestCount); + done(err); + }) + ), + requestReresolution: () => { + reresolutionRequestCount += 1; + } + } + ); + const pickFirst = new PickFirstLoadBalancer(channelControlHelper); + pickFirst.updateAddressList([{ host: 'localhost', port: 1 }], config); + process.nextTick(() => { + subchannels[0].transitionToState(ConnectivityState.TRANSIENT_FAILURE); + process.nextTick(() => { + pickFirst.updateAddressList([{ host: 'localhost', port: 2 }], config); + process.nextTick(() => { + subchannels[1].transitionToState(ConnectivityState.TRANSIENT_FAILURE); + process.nextTick(() => { + pickFirst.updateAddressList([{ host: 'localhost', port: 3 }], config); + process.nextTick(() => { + subchannels[2].transitionToState(ConnectivityState.TRANSIENT_FAILURE); + }); + }); + }); + }); + }); + }); + it('Should request reresolution if the new subchannels are already in TF', done => { + let reresolutionRequestCount = 0; + const targetReresolutionRequestCount = 3; + const currentStartState = ConnectivityState.TRANSIENT_FAILURE; + const channelControlHelper = createChildChannelControlHelper( + baseChannelControlHelper, + { + createSubchannel: (subchannelAddress, subchannelArgs) => { + const subchannel = new MockSubchannel( + subchannelAddressToString(subchannelAddress), + currentStartState + ); + subchannels.push(subchannel); + return subchannel; + }, + updateState: updateStateCallBackForExpectedStateSequence( + [ConnectivityState.TRANSIENT_FAILURE], + err => setImmediate(() => { + assert.strictEqual(reresolutionRequestCount, targetReresolutionRequestCount); + done(err); + }) + ), + requestReresolution: () => { + reresolutionRequestCount += 1; + } + } + ); + const pickFirst = new PickFirstLoadBalancer(channelControlHelper); + pickFirst.updateAddressList([{ host: 'localhost', port: 1 }], config); + process.nextTick(() => { + pickFirst.updateAddressList([{ host: 'localhost', port: 2 }], config); + process.nextTick(() => { + pickFirst.updateAddressList([{ host: 'localhost', port: 2 }], config); + }); + }); + }); describe('Address list randomization', () => { const shuffleConfig = new PickFirstLoadBalancingConfig(true); it('Should pick different subchannels after multiple updates', done => {