Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: treat IO exceptions as retryable in HTTP engines #979

Merged
merged 4 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changes/0645c929-bdfe-4d07-a131-b0b447422fff.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"id": "0645c929-bdfe-4d07-a131-b0b447422fff",
"type": "bugfix",
"description": "Treat all IO errors in OkHttp & CRT engines as retryable (e.g., socket errors, DNS lookup failures, etc.)"
}
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,11 @@ internal inline fun <T> mapCrtException(block: () -> T): T =
try {
block()
} catch (ex: CrtRuntimeException) {
throw HttpException(fmtCrtErrorMessage(ex.errorCode), errorCode = mapCrtErrorCode(ex.errorCode))
throw HttpException(
message = fmtCrtErrorMessage(ex.errorCode),
errorCode = mapCrtErrorCode(ex.errorName),
retryable = isRetryable(ex.errorName),
)
}

internal fun fmtCrtErrorMessage(errorCode: Int): String {
Expand All @@ -133,7 +137,7 @@ internal fun fmtCrtErrorMessage(errorCode: Int): String {
// See:
// IO Errors: https://github.com/awslabs/aws-c-io/blob/v0.13.19/include/aws/io/io.h#L89
// HTTP Errors: https://github.com/awslabs/aws-c-http/blob/v0.7.6/include/aws/http/http.h#L15
internal fun mapCrtErrorCode(code: Int): HttpErrorCode = when (CRT.errorName(code)) {
private fun mapCrtErrorCode(errorName: String?) = when (errorName) {
"AWS_IO_SOCKET_TIMEOUT" -> HttpErrorCode.SOCKET_TIMEOUT
"AWS_ERROR_HTTP_UNSUPPORTED_PROTOCOL" -> HttpErrorCode.PROTOCOL_NEGOTIATION_ERROR
"AWS_IO_SOCKET_NOT_CONNECTED" -> HttpErrorCode.CONNECT_TIMEOUT
Expand All @@ -143,6 +147,27 @@ internal fun mapCrtErrorCode(code: Int): HttpErrorCode = when (CRT.errorName(cod
else -> HttpErrorCode.SDK_UNKNOWN
}

internal fun mapCrtErrorCode(code: Int) = mapCrtErrorCode(CRT.errorName(code))

internal fun isRetryable(errorName: String?) = errorName?.let {
when {
// All IO errors are retryable
it.startsWith("AWS_IO_") || it.startsWith("AWS_ERROR_IO_") -> true

// Any connection closure is retryable
it in connectionClosedErrors -> true

// All proxy errors are retryable
it.startsWith("AWS_ERROR_HTTP_PROXY_") -> true

// Any connection manager issues are retryable
it.startsWith("AWS_ERROR_HTTP_CONNECTION_MANAGER_") -> true

// Any other errors are not retryable
else -> false
}
} ?: false // Unknown error codes are not retryable

private val tlsNegotiationErrors = setOf(
"AWS_IO_TLS_ERROR_NEGOTIATION_FAILURE",
"AWS_IO_TLS_ERROR_NOT_NEGOTIATED",
Expand All @@ -152,6 +177,7 @@ private val tlsNegotiationErrors = setOf(

private val connectionClosedErrors = setOf(
"AWS_ERROR_HTTP_CONNECTION_CLOSED",
"AWS_ERROR_HTTP_SERVER_CLOSED",
"AWS_IO_BROKEN_PIPE",
"AWS_IO_SOCKET_CLOSED",
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*/
package aws.smithy.kotlin.runtime.http.engine.crt

import kotlin.test.Test
import kotlin.test.assertFalse
import kotlin.test.assertTrue

class RequestUtilTest {
@Test
fun testExceptionRetryability() {
setOf(
// All IO errors are retryable
"AWS_IO_SOCKET_NETWORK_DOWN",
"AWS_ERROR_IO_OPERATION_CANCELLED",
"AWS_IO_DNS_NO_ADDRESS_FOR_HOST",

// Any connection closure is retryable
"AWS_ERROR_HTTP_CONNECTION_CLOSED",
"AWS_ERROR_HTTP_SERVER_CLOSED",
"AWS_IO_BROKEN_PIPE",

// All proxy errors are retryable
"AWS_ERROR_HTTP_PROXY_CONNECT_FAILED",
"AWS_ERROR_HTTP_PROXY_STRATEGY_NTLM_CHALLENGE_TOKEN_MISSING",
"AWS_ERROR_HTTP_PROXY_STRATEGY_TOKEN_RETRIEVAL_FAILURE",

// Any connection manager issues are retryable
"AWS_ERROR_HTTP_CONNECTION_MANAGER_INVALID_STATE_FOR_ACQUIRE",
"AWS_ERROR_HTTP_CONNECTION_MANAGER_VENDED_CONNECTION_UNDERFLOW",
"AWS_ERROR_HTTP_CONNECTION_MANAGER_SHUTTING_DOWN",
).forEach { name -> assertTrue(isRetryable(name), "Expected $name to be retryable!") }

setOf(
// Any other errors are not retryable
"AWS_ERROR_HTTP_INVALID_METHOD",
"AWS_ERROR_PKCS11_CKR_CANCEL",
"AWS_ERROR_PEM_MALFORMED",

// Unknown error codes are not retryable
null,
).forEach { name -> assertFalse(isRetryable(name), "Expected $name to not be retryable!") }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -208,10 +208,9 @@ internal inline fun<T> mapOkHttpExceptions(block: () -> T): T =
try {
block()
} catch (ex: IOException) {
throw HttpException(ex, ex.errCode(), ex.isRetryable())
throw HttpException(ex, ex.errCode(), retryable = true) // All IOExceptions are retryable
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question/Correctness: Is there any reason we wouldn't just make all IOExceptions retryable at the retry policy level?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A few reasons came to mind:

  • IOException isn't in Kotlin stdlib—it's a JVM exception. Our retry policy is currently in common code. We could introduce an expect/actual function to dispatch down for platform-specific exceptions but it doesn't feel warranted for this use case.
  • IOException shouldn't be thrown by the CRT engine. If it is, then something is wrong that we likely don't understand and retrying may not be the best course of action.
  • Retry policies apply across the entirety of a request, not merely the HTTP call invocation. IOExceptions may occur during local IO errors such as streaming from a file, reading system properties, string encoding problems, etc. It seemed wise to scope these IOException retries to just the HTTP call itself.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IOException isn't in Kotlin stdlib—it's a JVM exception. Our retry policy is currently in common code. We could introduce an expect/actual function to dispatch down for platform-specific exceptions but it doesn't feel warranted for this use case.

We already have an expect/actual for it FYI: https://github.com/awslabs/smithy-kotlin/blob/main/runtime/runtime-core/common/src/aws/smithy/kotlin/runtime/io/Exceptions.kt#L8

IOException shouldn't be thrown by the CRT engine. If it is, then something is wrong that we likely don't understand and retrying may not be the best course of action.

It entirely depends on how we wrap the exceptions. To your point though CRT JVM bindings throw generic exceptions with a CRT specific error code IIRC. I wasn't thinking of CRT though I was thinking of other JVM specific bindings that customers may writer or that we may provide in the future. Doing it at the retry policy level would capture all of them.

Retry policies apply across the entirety of a request, not merely the HTTP call invocation. IOExceptions may occur during local IO errors such as streaming from a file, reading system properties, string encoding problems, etc. It seemed wise to scope these IOException retries to just the HTTP call itself.

I think we are wrapping all of our HTTP related exceptions into an HttpException with an error code if we can map it. I suppose because of this handling it at the specific engine level makes sense. We just have to remember to do it for each engine.


suggestion : If we are handling this at the specific engine level I think we ought to probably make the CRT exceptions retryable as well if we can in this PR. It would require looking at specific error codes probably.

}

private fun Exception.isRetryable(): Boolean = isCauseOrSuppressed<ConnectException>() || isConnectionClosedException()
private fun Exception.errCode(): HttpErrorCode = when {
isConnectTimeoutException() -> HttpErrorCode.CONNECT_TIMEOUT
isConnectionClosedException() -> HttpErrorCode.CONNECTION_CLOSED
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package aws.smithy.kotlin.runtime.http.engine.okhttp

import aws.smithy.kotlin.runtime.http.HttpErrorCode
import aws.smithy.kotlin.runtime.http.HttpException
import org.junit.jupiter.api.Assertions.assertTrue
import java.io.EOFException
import java.io.IOException
import java.net.ConnectException
Expand All @@ -20,22 +21,21 @@ class OkHttpExceptionTest {
data class ExceptionTest(
val ex: Exception,
val expectedError: HttpErrorCode,
val expectedRetryable: Boolean = false,
)

@Test
fun testMapExceptions() {
val tests = listOf(
ExceptionTest(IOException("unknown"), expectedError = HttpErrorCode.SDK_UNKNOWN),
ExceptionTest(IOException(SocketTimeoutException("connect timeout")), expectedError = HttpErrorCode.CONNECT_TIMEOUT, true),
ExceptionTest(IOException().apply { addSuppressed(SocketTimeoutException("connect timeout")) }, expectedError = HttpErrorCode.CONNECT_TIMEOUT, true),
ExceptionTest(IOException(SocketTimeoutException("connect timeout")), expectedError = HttpErrorCode.CONNECT_TIMEOUT),
ExceptionTest(IOException().apply { addSuppressed(SocketTimeoutException("connect timeout")) }, expectedError = HttpErrorCode.CONNECT_TIMEOUT),
ExceptionTest(IOException(SocketTimeoutException("read timeout")), expectedError = HttpErrorCode.SOCKET_TIMEOUT),
ExceptionTest(IOException().apply { addSuppressed(SocketTimeoutException("read timeout")) }, expectedError = HttpErrorCode.SOCKET_TIMEOUT),
ExceptionTest(SocketTimeoutException("read timeout"), expectedError = HttpErrorCode.SOCKET_TIMEOUT),
ExceptionTest(IOException(SSLHandshakeException("negotiate error")), expectedError = HttpErrorCode.TLS_NEGOTIATION_ERROR),
ExceptionTest(ConnectException("test connect error"), expectedError = HttpErrorCode.SDK_UNKNOWN, true),
ExceptionTest(ConnectException("test connect error"), expectedError = HttpErrorCode.SDK_UNKNOWN),
// see https://github.com/awslabs/aws-sdk-kotlin/issues/905
ExceptionTest(IOException("unexpected end of stream on https://test.aws.com", EOFException("\\n not found: limit=0 content=...")), expectedError = HttpErrorCode.CONNECTION_CLOSED, expectedRetryable = true),
ExceptionTest(IOException("unexpected end of stream on https://test.aws.com", EOFException("\\n not found: limit=0 content=...")), expectedError = HttpErrorCode.CONNECTION_CLOSED),
)

for (test in tests) {
Expand All @@ -46,7 +46,7 @@ class OkHttpExceptionTest {
}

assertEquals(test.expectedError, ex.errorCode, "ex=$ex; ${ex.suppressedExceptions}; cause=${ex.cause}; causeSuppressed=${ex.cause?.suppressedExceptions}")
assertEquals(test.expectedRetryable, ex.sdkErrorMetadata.isRetryable)
assertTrue(ex.sdkErrorMetadata.isRetryable)
}
}
}
Loading