Skip to content

Commit

Permalink
core: DEADLINE_EXCEEDED gives hints for slow resolver (grpc#10545)
Browse files Browse the repository at this point in the history
  • Loading branch information
YifeiZhuang authored Sep 8, 2023
1 parent 75af7ab commit 6335e0b
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 15 deletions.
8 changes: 4 additions & 4 deletions api/src/main/java/io/grpc/ClientStreamTracer.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@
@ThreadSafe
public abstract class ClientStreamTracer extends StreamTracer {
/**
* The call was delayed due to waiting for name resolution result.
* Indicates how long the call was delayed, in nanoseconds, due to waiting for name resolution
* result. If the call option is not set, the call did not experience name resolution delay.
*/
public static final CallOptions.Key<Boolean> NAME_RESOLUTION_DELAYED =
CallOptions.Key.createWithDefault("io.grpc.ClientStreamTracer.NAME_RESOLUTION_DELAYED",
false);
public static final CallOptions.Key<Long> NAME_RESOLUTION_DELAYED =
CallOptions.Key.create("io.grpc.ClientStreamTracer.NAME_RESOLUTION_DELAYED");

/**
* The stream is being created on a ready transport.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ public ClientStreamTracer newClientStreamTracer(
"previous-rpc-attempts", AttributeValue.longAttributeValue(info.getPreviousAttempts()));
attemptSpan.putAttribute(
"transparent-retry", AttributeValue.booleanAttributeValue(info.isTransparentRetry()));
if (info.getCallOptions().getOption(NAME_RESOLUTION_DELAYED)) {
if (info.getCallOptions().getOption(NAME_RESOLUTION_DELAYED) != null) {
span.addAnnotation("Delayed name resolution complete");
}
return new ClientTracer(attemptSpan, span, tracingHeader, isSampledToLocalTracing);
Expand Down
2 changes: 1 addition & 1 deletion census/src/test/java/io/grpc/census/CensusModulesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ public class CensusModulesTest {
CallOptions.DEFAULT.withOption(CUSTOM_OPTION, "customvalue");
private static final ClientStreamTracer.StreamInfo STREAM_INFO =
ClientStreamTracer.StreamInfo.newBuilder()
.setCallOptions(CallOptions.DEFAULT.withOption(NAME_RESOLUTION_DELAYED, true)).build();
.setCallOptions(CallOptions.DEFAULT.withOption(NAME_RESOLUTION_DELAYED, 10L)).build();

private static class StringInputStream extends InputStream {
final String string;
Expand Down
11 changes: 9 additions & 2 deletions core/src/main/java/io/grpc/internal/ClientCallImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.util.concurrent.MoreExecutors.directExecutor;
import static io.grpc.ClientStreamTracer.NAME_RESOLUTION_DELAYED;
import static io.grpc.Contexts.statusFromCancelled;
import static io.grpc.Status.DEADLINE_EXCEEDED;
import static io.grpc.internal.GrpcUtil.CONTENT_ACCEPT_ENCODING_KEY;
Expand Down Expand Up @@ -261,9 +262,12 @@ public void runInContext() {
GrpcUtil.getClientStreamTracers(callOptions, headers, 0, false);
String deadlineName =
isFirstMin(callOptions.getDeadline(), context.getDeadline()) ? "CallOptions" : "Context";
Long nameResolutionDelay = callOptions.getOption(NAME_RESOLUTION_DELAYED);
String description = String.format(
"ClientCall started after %s deadline was exceeded .9%f seconds ago", deadlineName,
effectiveDeadline.timeRemaining(TimeUnit.NANOSECONDS) / NANO_TO_SECS);
"ClientCall started after %s deadline was exceeded %.9f seconds ago. "
+ "Name resolution delay %.9f seconds.", deadlineName,
effectiveDeadline.timeRemaining(TimeUnit.NANOSECONDS) / NANO_TO_SECS,
nameResolutionDelay == null ? 0 : nameResolutionDelay / NANO_TO_SECS);
stream = new FailingClientStream(DEADLINE_EXCEEDED.withDescription(description), tracers);
}

Expand Down Expand Up @@ -404,6 +408,9 @@ public void run() {
buf.append(seconds);
buf.append(String.format(Locale.US, ".%09d", nanos));
buf.append("s. ");
Long nsDelay = callOptions.getOption(NAME_RESOLUTION_DELAYED);
buf.append(String.format(Locale.US, "Name resolution delay %.9f seconds. ",
nsDelay == null ? 0 : nsDelay / NANO_TO_SECS));
buf.append(insight);
stream.cancel(DEADLINE_EXCEEDED.augmentDescription(buf.toString()));
}
Expand Down
8 changes: 7 additions & 1 deletion core/src/main/java/io/grpc/internal/ManagedChannelImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import io.grpc.ConnectivityState;
import io.grpc.ConnectivityStateInfo;
import io.grpc.Context;
import io.grpc.Deadline;
import io.grpc.DecompressorRegistry;
import io.grpc.EquivalentAddressGroup;
import io.grpc.ForwardingChannelBuilder;
Expand Down Expand Up @@ -298,6 +299,8 @@ public void uncaughtException(Thread t, Throwable e) {
// Temporary false flag that can skip the retry code path.
private final boolean retryEnabled;

private final Deadline.Ticker ticker = Deadline.getSystemTicker();

// Called from syncContext
private final ManagedClientTransport.Listener delayedTransportListener =
new DelayedTransportListener();
Expand Down Expand Up @@ -1066,21 +1069,24 @@ private final class PendingCall<ReqT, RespT> extends DelayedClientCall<ReqT, Res
final Context context;
final MethodDescriptor<ReqT, RespT> method;
final CallOptions callOptions;
private final long callCreationTime;

PendingCall(
Context context, MethodDescriptor<ReqT, RespT> method, CallOptions callOptions) {
super(getCallExecutor(callOptions), scheduledExecutor, callOptions.getDeadline());
this.context = context;
this.method = method;
this.callOptions = callOptions;
this.callCreationTime = ticker.nanoTime();
}

/** Called when it's ready to create a real call and reprocess the pending call. */
void reprocess() {
ClientCall<ReqT, RespT> realCall;
Context previous = context.attach();
try {
CallOptions delayResolutionOption = callOptions.withOption(NAME_RESOLUTION_DELAYED, true);
CallOptions delayResolutionOption = callOptions.withOption(NAME_RESOLUTION_DELAYED,
ticker.nanoTime() - callCreationTime);
realCall = newClientCall(method, delayResolutionOption);
} finally {
context.detach(previous);
Expand Down
29 changes: 25 additions & 4 deletions core/src/test/java/io/grpc/internal/ClientCallImplTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package io.grpc.internal;

import static com.google.common.truth.Truth.assertThat;
import static io.grpc.ClientStreamTracer.NAME_RESOLUTION_DELAYED;
import static io.grpc.internal.ClientStreamListener.RpcProgress.PROCESSED;
import static io.grpc.internal.GrpcUtil.ACCEPT_ENCODING_SPLITTER;
import static java.util.concurrent.TimeUnit.SECONDS;
Expand Down Expand Up @@ -765,7 +766,18 @@ public void onClose(Status status, Metadata trailers) {

@Test
public void deadlineExceededBeforeCallStarted() {
CallOptions callOptions = baseCallOptions.withDeadlineAfter(0, TimeUnit.SECONDS);
deadlineExeedeed(baseCallOptions.withDeadlineAfter(0, TimeUnit.SECONDS),
"Name resolution delay 0.000000000 seconds.");
}

@Test
public void deadlineExceededBeforeCallStartedDelayed() {
deadlineExeedeed(baseCallOptions.withDeadlineAfter(0, TimeUnit.SECONDS)
.withOption(NAME_RESOLUTION_DELAYED, 1200000000L),
"Name resolution delay 1.200000000 seconds.");
}

private void deadlineExeedeed(CallOptions callOptions, String descriptionSuffix) {
fakeClock.forwardTime(System.nanoTime(), TimeUnit.NANOSECONDS);
ClientCallImpl<Void, Void> call = new ClientCallImpl<>(
method,
Expand All @@ -785,8 +797,10 @@ public void deadlineExceededBeforeCallStarted() {
any(Context.class));
verify(callListener, timeout(1000)).onClose(statusCaptor.capture(), any(Metadata.class));
assertEquals(Status.Code.DEADLINE_EXCEEDED, statusCaptor.getValue().getCode());
assertThat(statusCaptor.getValue().getDescription())
String deadlineExceedDescription = statusCaptor.getValue().getDescription();
assertThat(deadlineExceedDescription)
.startsWith("ClientCall started after CallOptions deadline was exceeded");
assertThat(deadlineExceedDescription).endsWith(descriptionSuffix);
verifyNoInteractions(clientStreamProvider);
}

Expand Down Expand Up @@ -843,7 +857,8 @@ public void contextDeadlineShouldNotOverrideSmallerCallOptionsDeadline() {
.withDeadlineAfter(2000, TimeUnit.MILLISECONDS, deadlineCancellationExecutor);
Context origContext = context.attach();

CallOptions callOpts = baseCallOptions.withDeadlineAfter(1000, TimeUnit.MILLISECONDS);
CallOptions callOpts = baseCallOptions.withDeadlineAfter(1000, TimeUnit.MILLISECONDS)
.withOption(NAME_RESOLUTION_DELAYED, 1200000000L);
ClientCallImpl<Void, Void> call = new ClientCallImpl<>(
method,
MoreExecutors.directExecutor(),
Expand All @@ -859,6 +874,11 @@ public void contextDeadlineShouldNotOverrideSmallerCallOptionsDeadline() {
verify(stream).setDeadline(deadlineCaptor.capture());

assertTimeoutBetween(deadlineCaptor.getValue().timeRemaining(TimeUnit.MILLISECONDS), 600, 1000);
fakeClock.forwardNanos(TimeUnit.MILLISECONDS.toNanos(1000));
verify(stream, timeout(1000)).cancel(statusCaptor.capture());
String deadlineExceedDescription = statusCaptor.getValue().getDescription();
assertThat(deadlineExceedDescription)
.contains("Name resolution delay 1.200000000 seconds.");
}

@Test
Expand Down Expand Up @@ -914,7 +934,8 @@ public void expiredDeadlineCancelsStream_CallOptions() {
verify(stream, times(1)).cancel(statusCaptor.capture());
assertEquals(Status.Code.DEADLINE_EXCEEDED, statusCaptor.getValue().getCode());
assertThat(statusCaptor.getValue().getDescription())
.matches("deadline exceeded after [0-9]+\\.[0-9]+s. \\[remote_addr=127\\.0\\.0\\.1:443\\]");
.matches("deadline exceeded after [0-9]+\\.[0-9]+s. "
+ "Name resolution delay 0.000000000 seconds. \\[remote_addr=127\\.0\\.0\\.1:443\\]");
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1078,7 +1078,7 @@ public void loadBalancerThrowsInHandleResolvedAddresses() {
}

@Test
public void delayedNameResolution() {
public void delayedNameResolution() throws Exception {
ClientStream mockStream = mock(ClientStream.class);
final ClientStreamTracer tracer = new ClientStreamTracer() {};
ClientStreamTracer.Factory factory = new ClientStreamTracer.Factory() {
Expand All @@ -1096,6 +1096,7 @@ public ClientStreamTracer newClientStreamTracer(StreamInfo info, Metadata header
ClientCall<String, Integer> call = channel.newCall(method, callOptions);
call.start(mockCallListener, new Metadata());

Thread.sleep(500);
nsFactory.allResolved();
Subchannel subchannel =
createSubchannelSafely(helper, addressGroup, Attributes.EMPTY, subchannelStateListener);
Expand All @@ -1118,7 +1119,10 @@ public ClientStreamTracer newClientStreamTracer(StreamInfo info, Metadata header
same(method), any(Metadata.class), callOptionsCaptor.capture(),
tracersCaptor.capture());
assertThat(Arrays.asList(tracersCaptor.getValue()).contains(tracer)).isTrue();
assertThat(callOptionsCaptor.getValue().getOption(NAME_RESOLUTION_DELAYED)).isTrue();
Long realDelay = callOptionsCaptor.getValue().getOption(NAME_RESOLUTION_DELAYED);
assertThat(realDelay).isNotNull();
assertThat(realDelay).isAtLeast(
TimeUnit.MILLISECONDS.toNanos(400));//sleep not precise
}

@Test
Expand Down

0 comments on commit 6335e0b

Please sign in to comment.