Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Availability: Adds retry on 408s to gateway calls for metadata operations #3089

Merged
merged 4 commits into from
Mar 16, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 33 additions & 5 deletions Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs
Original file line number Diff line number Diff line change
Expand Up @@ -257,17 +257,35 @@ private async Task<HttpResponseMessage> SendHttpHelperAsync(
datum.RecordHttpResponse(requestMessage, responseMessage, resourceType, requestStartTime);
}

return responseMessage;
if (responseMessage.StatusCode != HttpStatusCode.RequestTimeout)
{
return responseMessage;
}

if (!timeoutPolicy.IsSafeToRetry(requestMessage.Method))
{
return responseMessage;
}

// Limit retry on timeouts to only control plane hot path operations
if (!(timeoutPolicy is HttpTimeoutPolicyControlPlaneRetriableHotPath))
j82w marked this conversation as resolved.
Show resolved Hide resolved
{
return responseMessage;
}

bool isOutOfRetries = CosmosHttpClientCore.IsOutOfRetries(timeoutPolicy, startDateTimeUtc, timeoutEnumerator);
if (isOutOfRetries)
{
return responseMessage;
}
}
catch (Exception e)
{
if (clientSideRequestStatistics is ClientSideRequestStatisticsTraceDatum datum)
{
datum.RecordHttpException(requestMessage, e, resourceType, requestStartTime);
}

bool isOutOfRetries = (DateTime.UtcNow - startDateTimeUtc) > timeoutPolicy.MaximumRetryTimeLimit || // Maximum of time for all retries
!timeoutEnumerator.MoveNext(); // No more retries are configured
bool isOutOfRetries = CosmosHttpClientCore.IsOutOfRetries(timeoutPolicy, startDateTimeUtc, timeoutEnumerator);

switch (e)
{
Expand Down Expand Up @@ -308,7 +326,7 @@ private async Task<HttpResponseMessage> SendHttpHelperAsync(
throw;
}
}

}

if (delayForNextRequest != TimeSpan.Zero)
Expand All @@ -318,6 +336,16 @@ private async Task<HttpResponseMessage> SendHttpHelperAsync(
}
}

private static bool IsOutOfRetries(
HttpTimeoutPolicy timeoutPolicy,
DateTime startDateTimeUtc,
IEnumerator<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)> timeoutEnumerator)
{
bool isOutOfRetries = (DateTime.UtcNow - startDateTimeUtc) > timeoutPolicy.MaximumRetryTimeLimit || // Maximum of time for all retries
j82w marked this conversation as resolved.
Show resolved Hide resolved
!timeoutEnumerator.MoveNext(); // No more retries are configured
return isOutOfRetries;
}

private async Task<HttpResponseMessage> ExecuteHttpHelperAsync(
HttpRequestMessage requestMessage,
ResourceType resourceType,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,55 @@ public async Task VerifyPkRangeCacheRefreshOnThrottlesAsync()
Assert.AreEqual(0, ifNoneMatchValues.Count(x => string.IsNullOrEmpty(x)), "The cache is already init. It should never re-initialize the cache.");
}

[TestMethod]
public async Task VerifyPkRangeCacheRefreshOnTimeoutsAsync()
{
int pkRangeCalls = 0;
HttpClientHandlerHelper httpHandlerHelper = new();
List<string> ifNoneMatchValues = new();
httpHandlerHelper.RequestCallBack = (request, cancellationToken) =>
{
if (!request.RequestUri.ToString().EndsWith("pkranges"))
{
return null;
}

ifNoneMatchValues.Add(request.Headers.IfNoneMatch.ToString());

pkRangeCalls++;

// Cause timeout on the init
if (pkRangeCalls == 1)
{
return Task.FromResult(new HttpResponseMessage(HttpStatusCode.RequestTimeout));
}

return null;
};

CosmosClientOptions clientOptions = new CosmosClientOptions()
{
HttpClientFactory = () => new HttpClient(httpHandlerHelper),
};

CosmosClient resourceClient = TestCommon.CreateCosmosClient(clientOptions);

string dbName = Guid.NewGuid().ToString();
string containerName = nameof(PartitionKeyRangeCacheTests);

Database db = await resourceClient.CreateDatabaseIfNotExistsAsync(dbName);
Container container = await db.CreateContainerIfNotExistsAsync(
containerName,
"/pk",
400);

ToDoActivity toDoActivity = ToDoActivity.CreateRandomToDoActivity();
await container.CreateItemAsync<ToDoActivity>(toDoActivity);
Assert.AreEqual(3, pkRangeCalls);

Assert.AreEqual(2, ifNoneMatchValues.Count(x => string.IsNullOrEmpty(x)), "First call is a 408");
}

[TestMethod]
public async Task TestRidRefreshOnNotFoundAsync()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,82 @@ async Task<HttpResponseMessage> sendFunc(HttpRequestMessage request, Cancellatio
}
}

[TestMethod]
public async Task RetryTransient408sTestAsync()
{
int count = 0;
async Task<HttpResponseMessage> sendFunc(HttpRequestMessage request, CancellationToken cancellationToken)
{
count++;

if (count <= 2)
{
await Task.Delay(TimeSpan.FromMilliseconds(10));
return new HttpResponseMessage(HttpStatusCode.RequestTimeout);
}

if (count == 3)
{
await Task.Delay(TimeSpan.FromMilliseconds(10));
return new HttpResponseMessage(HttpStatusCode.OK);
}

throw new Exception("Should not return after the success");
}

DocumentClientEventSource eventSource = DocumentClientEventSource.Instance;
HttpMessageHandler messageHandler = new MockMessageHandler(sendFunc);
using CosmosHttpClient cosmoshttpClient = MockCosmosUtil.CreateCosmosHttpClient(() => new HttpClient(messageHandler));

HttpResponseMessage responseMessage = await cosmoshttpClient.SendHttpAsync(() =>
new ValueTask<HttpRequestMessage>(
result: new HttpRequestMessage(HttpMethod.Get, new Uri("http://localhost"))),
resourceType: ResourceType.Collection,
timeoutPolicy: HttpTimeoutPolicyControlPlaneRetriableHotPath.Instance,
clientSideRequestStatistics: new ClientSideRequestStatisticsTraceDatum(DateTime.UtcNow),
cancellationToken: default);

Assert.AreEqual(HttpStatusCode.OK, responseMessage.StatusCode);
}

[TestMethod]
public async Task DoesNotRetryTransient408sOnDefaultPolicyTestAsync()
{
int count = 0;
async Task<HttpResponseMessage> sendFunc(HttpRequestMessage request, CancellationToken cancellationToken)
{
count++;

if (count <= 2)
{
await Task.Delay(TimeSpan.FromMilliseconds(10));
return new HttpResponseMessage(HttpStatusCode.RequestTimeout);
}

if (count == 3)
{
await Task.Delay(TimeSpan.FromMilliseconds(10));
return new HttpResponseMessage(HttpStatusCode.OK);
}

throw new Exception("Should not return after the success");
}

DocumentClientEventSource eventSource = DocumentClientEventSource.Instance;
HttpMessageHandler messageHandler = new MockMessageHandler(sendFunc);
using CosmosHttpClient cosmoshttpClient = MockCosmosUtil.CreateCosmosHttpClient(() => new HttpClient(messageHandler));

HttpResponseMessage responseMessage = await cosmoshttpClient.SendHttpAsync(() =>
new ValueTask<HttpRequestMessage>(
result: new HttpRequestMessage(HttpMethod.Get, new Uri("http://localhost"))),
resourceType: ResourceType.Collection,
timeoutPolicy: HttpTimeoutPolicyControlPlaneRead.Instance,
clientSideRequestStatistics: new ClientSideRequestStatisticsTraceDatum(DateTime.UtcNow),
cancellationToken: default);

Assert.AreEqual(HttpStatusCode.RequestTimeout, responseMessage.StatusCode, "Should be a request timeout");
}

[TestMethod]
public async Task RetryTransientIssuesForQueryPlanTestAsync()
{
Expand Down