Skip to content

Commit

Permalink
Fix retries that timeout hanging forever. [ Backport to 1.59.x] (grpc…
Browse files Browse the repository at this point in the history
…#10884)

* Fix retries that timeout hanging forever. (grpc#10855)

Fixes grpc#10336
  • Loading branch information
larry-safran committed Feb 7, 2024
1 parent 455c840 commit 7c6849d
Showing 1 changed file with 19 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ public class RetryTest {
@SuppressWarnings("unchecked")
private ClientCall.Listener<Integer> mockCallListener =
mock(ClientCall.Listener.class, delegatesTo(testCallListener));
private java.util.concurrent.ScheduledFuture<?> activeFuture = null;

private CountDownLatch backoffLatch = new CountDownLatch(1);
private final EventLoopGroup group = new DefaultEventLoopGroup() {
Expand All @@ -118,7 +119,7 @@ public ScheduledFuture<?> schedule(
if (!command.getClass().getName().contains("RetryBackoffRunnable")) {
return super.schedule(command, delay, unit);
}
fakeClock.getScheduledExecutorService().schedule(
activeFuture = fakeClock.getScheduledExecutorService().schedule(
new Runnable() {
@Override
public void run() {
Expand Down Expand Up @@ -248,15 +249,22 @@ private void assertInboundWireSizeRecorded(long length) throws Exception {

private void assertRpcStatusRecorded(
Status.Code code, long roundtripLatencyMs, long outboundMessages) throws Exception {
assertRpcStatusRecorded(code, roundtripLatencyMs, 0, outboundMessages);
}

private void assertRpcStatusRecorded(
Status.Code code, long roundtripLatencyMs, long toleranceMs, long outboundMessages)
throws Exception {
MetricsRecord record = clientStatsRecorder.pollRecord(7, SECONDS);
assertNotNull(record);
TagValue statusTag = record.tags.get(RpcMeasureConstants.GRPC_CLIENT_STATUS);
assertNotNull(statusTag);
assertThat(statusTag.asString()).isEqualTo(code.toString());
assertThat(record.getMetricAsLongOrFail(DeprecatedCensusConstants.RPC_CLIENT_FINISHED_COUNT))
.isEqualTo(1);
assertThat(record.getMetricAsLongOrFail(RpcMeasureConstants.GRPC_CLIENT_ROUNDTRIP_LATENCY))
.isEqualTo(roundtripLatencyMs);
long roundtripLatency =
record.getMetricAsLongOrFail(RpcMeasureConstants.GRPC_CLIENT_ROUNDTRIP_LATENCY);
assertThat(Math.abs(roundtripLatency - roundtripLatencyMs)).isAtMost(toleranceMs);
assertThat(record.getMetricAsLongOrFail(RpcMeasureConstants.GRPC_CLIENT_SENT_MESSAGES_PER_RPC))
.isEqualTo(outboundMessages);
}
Expand Down Expand Up @@ -303,10 +311,12 @@ public void retryUntilBufferLimitExceeded() throws Exception {
call.sendMessage(message);

// let attempt fail
testCallListener.clear();
testCallListener.reset();
serverCall.close(
Status.UNAVAILABLE.withDescription("2nd attempt failed"),
new Metadata());
fakeClock.forwardTime(1, SECONDS);
activeFuture.get(1, SECONDS); // Make sure the close is done.
// no more retry
testCallListener.verifyDescription("2nd attempt failed", 5000);
}
Expand Down Expand Up @@ -420,9 +430,12 @@ public void streamClosed(Status status) {
call.cancel("Cancelled before commit", null);
// Let the netty substream listener be closed.
streamClosedLatch.countDown();
assertNotNull("No activeFuture", activeFuture);
fakeClock.forwardTime(1, SECONDS);
activeFuture.get(1, SECONDS);
// The call listener is closed.
verify(mockCallListener, timeout(5000)).onClose(any(Status.class), any(Metadata.class));
assertRpcStatusRecorded(Code.CANCELLED, 17_000, 1);
assertRpcStatusRecorded(Code.CANCELLED, 18_000, 1);
assertRetryStatsRecorded(1, 0, 0);
}

Expand Down Expand Up @@ -551,7 +564,7 @@ public void onClose(Status status, Metadata trailers) {
closeLatch.countDown();
}

void clear() {
void reset() {
status = null;
closeLatch = new CountDownLatch(1);
}
Expand Down

0 comments on commit 7c6849d

Please sign in to comment.