From 2e1a5fdcc7b9cd978660e6e9f030e03075f6fdc5 Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Fri, 13 Dec 2024 18:18:02 -0500 Subject: [PATCH] [Bug] Use bigquery default retryable exceptions (#1431) * replace our custom list of retryable exceptions with BigQuery's defaults * remove BadRequest as a retryable error --- .../unreleased/Fixes-20241211-144752.yaml | 6 ++++++ dbt/adapters/bigquery/retry.py | 19 ++----------------- .../unit/test_bigquery_connection_manager.py | 6 ++++-- 3 files changed, 12 insertions(+), 19 deletions(-) create mode 100644 .changes/unreleased/Fixes-20241211-144752.yaml diff --git a/.changes/unreleased/Fixes-20241211-144752.yaml b/.changes/unreleased/Fixes-20241211-144752.yaml new file mode 100644 index 000000000..e666d5c31 --- /dev/null +++ b/.changes/unreleased/Fixes-20241211-144752.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Fix retry scenarios so that dbt always retries when BigQuery recommends a retry +time: 2024-12-11T14:47:52.36905-05:00 +custom: + Author: mikealfare + Issue: "263" diff --git a/dbt/adapters/bigquery/retry.py b/dbt/adapters/bigquery/retry.py index 391c00e46..2cbdaa245 100644 --- a/dbt/adapters/bigquery/retry.py +++ b/dbt/adapters/bigquery/retry.py @@ -1,10 +1,8 @@ from typing import Callable, Optional -from google.api_core.exceptions import Forbidden from google.api_core.future.polling import DEFAULT_POLLING from google.api_core.retry import Retry -from google.cloud.bigquery.retry import DEFAULT_RETRY -from google.cloud.exceptions import BadGateway, BadRequest, ServerError +from google.cloud.bigquery.retry import DEFAULT_RETRY, _job_should_retry from requests.exceptions import ConnectionError from dbt.adapters.contracts.connection import Connection, ConnectionState @@ -83,7 +81,7 @@ def __call__(self, error: Exception) -> bool: self._error_count += 1 # if the error is retryable, and we haven't breached the threshold, log and continue - if _is_retryable(error) and self._error_count <= self._retries: + if _job_should_retry(error) and self._error_count <= self._retries: _logger.debug( f"Retry attempt {self._error_count} of {self._retries} after error: {repr(error)}" ) @@ -113,16 +111,3 @@ def on_error(error: Exception): raise FailedToConnectError(str(e)) return on_error - - -def _is_retryable(error: Exception) -> bool: - """Return true for errors that are unlikely to occur again if retried.""" - if isinstance( - error, (BadGateway, BadRequest, ConnectionError, ConnectionResetError, ServerError) - ): - return True - elif isinstance(error, Forbidden) and any( - e["reason"] == "rateLimitExceeded" for e in error.errors - ): - return True - return False diff --git a/tests/unit/test_bigquery_connection_manager.py b/tests/unit/test_bigquery_connection_manager.py index d4c95792e..e7afd692f 100644 --- a/tests/unit/test_bigquery_connection_manager.py +++ b/tests/unit/test_bigquery_connection_manager.py @@ -53,7 +53,7 @@ def generate_connection_reset_error(): assert new_mock_client is not self.mock_client def test_is_retryable(self): - _is_retryable = dbt.adapters.bigquery.retry._is_retryable + _is_retryable = google.cloud.bigquery.retry._job_should_retry exceptions = dbt.adapters.bigquery.impl.google.cloud.exceptions internal_server_error = exceptions.InternalServerError("code broke") bad_request_error = exceptions.BadRequest("code broke") @@ -65,7 +65,9 @@ def test_is_retryable(self): service_unavailable_error = exceptions.ServiceUnavailable("service is unavailable") self.assertTrue(_is_retryable(internal_server_error)) - self.assertTrue(_is_retryable(bad_request_error)) + self.assertFalse( + _is_retryable(bad_request_error) + ) # this was removed after initially being included self.assertTrue(_is_retryable(connection_error)) self.assertFalse(_is_retryable(client_error)) self.assertTrue(_is_retryable(rate_limit_error))