From 8868d08745c4fb760bb82d5a2fc2e3a60da67afb Mon Sep 17 00:00:00 2001 From: Pierre Gayvallet Date: Tue, 31 Oct 2023 13:55:03 +0100 Subject: [PATCH] make `isInlineScriptingEnabled` resilient to ES errors (#170208) ## Summary Fix https://github.com/elastic/kibana/issues/163787 Change the way `isInlineScriptingEnabled` function to retry retryable errors from ES (similar to how the valid connection or migration ES calls do) --- .../index.ts | 1 + .../src/is_scripting_enabled.test.mocks.ts | 15 ++++ .../src/is_scripting_enabled.test.ts | 55 ++++++++++++++ .../src/is_scripting_enabled.ts | 47 ++++++++---- .../src/retryable_es_client_errors.test.ts | 73 +++++++++++++++++++ .../src/retryable_es_client_errors.ts | 40 ++++++++++ .../catch_retryable_es_client_errors.ts | 23 +----- 7 files changed, 220 insertions(+), 34 deletions(-) create mode 100644 packages/core/elasticsearch/core-elasticsearch-server-internal/src/is_scripting_enabled.test.mocks.ts create mode 100644 packages/core/elasticsearch/core-elasticsearch-server-internal/src/retryable_es_client_errors.test.ts create mode 100644 packages/core/elasticsearch/core-elasticsearch-server-internal/src/retryable_es_client_errors.ts diff --git a/packages/core/elasticsearch/core-elasticsearch-server-internal/index.ts b/packages/core/elasticsearch/core-elasticsearch-server-internal/index.ts index 3d81cebf9dc85..983be835a8dbb 100644 --- a/packages/core/elasticsearch/core-elasticsearch-server-internal/index.ts +++ b/packages/core/elasticsearch/core-elasticsearch-server-internal/index.ts @@ -30,4 +30,5 @@ export { CoreElasticsearchRouteHandlerContext } from './src/elasticsearch_route_ export { retryCallCluster, migrationRetryCallCluster } from './src/retry_call_cluster'; export { isInlineScriptingEnabled } from './src/is_scripting_enabled'; export { getCapabilitiesFromClient } from './src/get_capabilities'; +export { isRetryableEsClientError } from './src/retryable_es_client_errors'; export type { ClusterInfo } from './src/get_cluster_info'; diff --git a/packages/core/elasticsearch/core-elasticsearch-server-internal/src/is_scripting_enabled.test.mocks.ts b/packages/core/elasticsearch/core-elasticsearch-server-internal/src/is_scripting_enabled.test.mocks.ts new file mode 100644 index 0000000000000..99485dca9a581 --- /dev/null +++ b/packages/core/elasticsearch/core-elasticsearch-server-internal/src/is_scripting_enabled.test.mocks.ts @@ -0,0 +1,15 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +export const isRetryableEsClientErrorMock = jest.fn(); + +jest.doMock('./retryable_es_client_errors', () => { + return { + isRetryableEsClientError: isRetryableEsClientErrorMock, + }; +}); diff --git a/packages/core/elasticsearch/core-elasticsearch-server-internal/src/is_scripting_enabled.test.ts b/packages/core/elasticsearch/core-elasticsearch-server-internal/src/is_scripting_enabled.test.ts index d2922c0161c6f..57d40936b8242 100644 --- a/packages/core/elasticsearch/core-elasticsearch-server-internal/src/is_scripting_enabled.test.ts +++ b/packages/core/elasticsearch/core-elasticsearch-server-internal/src/is_scripting_enabled.test.ts @@ -6,6 +6,7 @@ * Side Public License, v 1. */ +import { isRetryableEsClientErrorMock } from './is_scripting_enabled.test.mocks'; import * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; import { elasticsearchClientMock } from '@kbn/core-elasticsearch-client-server-mocks'; import { isInlineScriptingEnabled } from './is_scripting_enabled'; @@ -94,4 +95,58 @@ describe('isInlineScriptingEnabled', () => { expect(await isInlineScriptingEnabled({ client })).toEqual(false); }); + + describe('resiliency', () => { + beforeEach(() => { + isRetryableEsClientErrorMock.mockReset(); + }); + + const mockSuccessOnce = () => { + client.cluster.getSettings.mockResolvedValueOnce({ + transient: {}, + persistent: {}, + defaults: {}, + }); + }; + const mockErrorOnce = () => { + client.cluster.getSettings.mockResponseImplementationOnce(() => { + throw Error('ERR CON REFUSED'); + }); + }; + + it('retries the ES api call in case of retryable error', async () => { + isRetryableEsClientErrorMock.mockReturnValue(true); + + mockErrorOnce(); + mockSuccessOnce(); + + await expect(isInlineScriptingEnabled({ client, maxRetryDelay: 1 })).resolves.toEqual(true); + expect(client.cluster.getSettings).toHaveBeenCalledTimes(2); + }); + + it('throws in case of non-retryable error', async () => { + isRetryableEsClientErrorMock.mockReturnValue(false); + + mockErrorOnce(); + mockSuccessOnce(); + + await expect(isInlineScriptingEnabled({ client, maxRetryDelay: 0.1 })).rejects.toThrowError( + 'ERR CON REFUSED' + ); + }); + + it('retries up to `maxRetries` times', async () => { + isRetryableEsClientErrorMock.mockReturnValue(true); + + mockErrorOnce(); + mockErrorOnce(); + mockErrorOnce(); + mockSuccessOnce(); + + await expect( + isInlineScriptingEnabled({ client, maxRetryDelay: 0.1, maxRetries: 2 }) + ).rejects.toThrowError('ERR CON REFUSED'); + expect(client.cluster.getSettings).toHaveBeenCalledTimes(3); + }); + }); }); diff --git a/packages/core/elasticsearch/core-elasticsearch-server-internal/src/is_scripting_enabled.ts b/packages/core/elasticsearch/core-elasticsearch-server-internal/src/is_scripting_enabled.ts index 6a3900229c0d4..ca3ca5b5c59aa 100644 --- a/packages/core/elasticsearch/core-elasticsearch-server-internal/src/is_scripting_enabled.ts +++ b/packages/core/elasticsearch/core-elasticsearch-server-internal/src/is_scripting_enabled.ts @@ -6,27 +6,48 @@ * Side Public License, v 1. */ +import { defer, map, retry, timer, firstValueFrom, throwError } from 'rxjs'; import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server'; +import { isRetryableEsClientError } from './retryable_es_client_errors'; const scriptAllowedTypesKey = 'script.allowed_types'; export const isInlineScriptingEnabled = async ({ client, + maxRetries = 20, + maxRetryDelay = 64, }: { client: ElasticsearchClient; + maxRetries?: number; + maxRetryDelay?: number; }): Promise => { - const settings = await client.cluster.getSettings({ - include_defaults: true, - flat_settings: true, - }); + return firstValueFrom( + defer(() => { + return client.cluster.getSettings({ + include_defaults: true, + flat_settings: true, + }); + }).pipe( + retry({ + count: maxRetries, + delay: (error, retryIndex) => { + if (isRetryableEsClientError(error)) { + const retryDelay = 1000 * Math.min(Math.pow(2, retryIndex), maxRetryDelay); // 2s, 4s, 8s, 16s, 32s, 64s, 64s, 64s ... + return timer(retryDelay); + } else { + return throwError(error); + } + }, + }), + map((settings) => { + const scriptAllowedTypes: string[] = + settings.transient[scriptAllowedTypesKey] ?? + settings.persistent[scriptAllowedTypesKey] ?? + settings.defaults![scriptAllowedTypesKey] ?? + []; - // priority: transient -> persistent -> default - const scriptAllowedTypes: string[] = - settings.transient[scriptAllowedTypesKey] ?? - settings.persistent[scriptAllowedTypesKey] ?? - settings.defaults![scriptAllowedTypesKey] ?? - []; - - // when unspecified, the setting as a default `[]` value that means that both scriptings are allowed. - return scriptAllowedTypes.length === 0 || scriptAllowedTypes.includes('inline'); + return scriptAllowedTypes.length === 0 || scriptAllowedTypes.includes('inline'); + }) + ) + ); }; diff --git a/packages/core/elasticsearch/core-elasticsearch-server-internal/src/retryable_es_client_errors.test.ts b/packages/core/elasticsearch/core-elasticsearch-server-internal/src/retryable_es_client_errors.test.ts new file mode 100644 index 0000000000000..45015cece5b43 --- /dev/null +++ b/packages/core/elasticsearch/core-elasticsearch-server-internal/src/retryable_es_client_errors.test.ts @@ -0,0 +1,73 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +import { errors as esErrors } from '@elastic/elasticsearch'; +import { elasticsearchClientMock } from '@kbn/core-elasticsearch-client-server-mocks'; +import { isRetryableEsClientError } from './retryable_es_client_errors'; + +describe('isRetryableEsClientError', () => { + describe('returns `false` for', () => { + test('non-retryable response errors', async () => { + const error = new esErrors.ResponseError( + elasticsearchClientMock.createApiResponse({ + body: { error: { type: 'cluster_block_exception' } }, + statusCode: 400, + }) + ); + + expect(isRetryableEsClientError(error)).toEqual(false); + }); + }); + + describe('returns `true` for', () => { + it('NoLivingConnectionsError', () => { + const error = new esErrors.NoLivingConnectionsError( + 'reason', + elasticsearchClientMock.createApiResponse() + ); + + expect(isRetryableEsClientError(error)).toEqual(true); + }); + + it('ConnectionError', () => { + const error = new esErrors.ConnectionError( + 'reason', + elasticsearchClientMock.createApiResponse() + ); + expect(isRetryableEsClientError(error)).toEqual(true); + }); + + it('TimeoutError', () => { + const error = new esErrors.TimeoutError( + 'reason', + elasticsearchClientMock.createApiResponse() + ); + expect(isRetryableEsClientError(error)).toEqual(true); + }); + + it('ResponseError of type snapshot_in_progress_exception', () => { + const error = new esErrors.ResponseError( + elasticsearchClientMock.createApiResponse({ + body: { error: { type: 'snapshot_in_progress_exception' } }, + }) + ); + expect(isRetryableEsClientError(error)).toEqual(true); + }); + + it.each([503, 401, 403, 408, 410, 429])('ResponseError with %p status code', (statusCode) => { + const error = new esErrors.ResponseError( + elasticsearchClientMock.createApiResponse({ + statusCode, + body: { error: { type: 'reason' } }, + }) + ); + + expect(isRetryableEsClientError(error)).toEqual(true); + }); + }); +}); diff --git a/packages/core/elasticsearch/core-elasticsearch-server-internal/src/retryable_es_client_errors.ts b/packages/core/elasticsearch/core-elasticsearch-server-internal/src/retryable_es_client_errors.ts new file mode 100644 index 0000000000000..2ba0ff20a2732 --- /dev/null +++ b/packages/core/elasticsearch/core-elasticsearch-server-internal/src/retryable_es_client_errors.ts @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +import { errors as EsErrors } from '@elastic/elasticsearch'; + +const retryResponseStatuses = [ + 503, // ServiceUnavailable + 401, // AuthorizationException + 403, // AuthenticationException + 408, // RequestTimeout + 410, // Gone + 429, // TooManyRequests -> ES circuit breaker +]; + +/** + * Returns true if the given elasticsearch error should be retried + * by retry-based resiliency systems such as the SO migration, false otherwise. + */ +export const isRetryableEsClientError = (e: EsErrors.ElasticsearchClientError): boolean => { + if ( + e instanceof EsErrors.NoLivingConnectionsError || + e instanceof EsErrors.ConnectionError || + e instanceof EsErrors.TimeoutError || + (e instanceof EsErrors.ResponseError && + (retryResponseStatuses.includes(e?.statusCode!) || + // ES returns a 400 Bad Request when trying to close or delete an + // index while snapshots are in progress. This should have been a 503 + // so once https://github.com/elastic/elasticsearch/issues/65883 is + // fixed we can remove this. + e?.body?.error?.type === 'snapshot_in_progress_exception')) + ) { + return true; + } + return false; +}; diff --git a/packages/core/saved-objects/core-saved-objects-migration-server-internal/src/actions/catch_retryable_es_client_errors.ts b/packages/core/saved-objects/core-saved-objects-migration-server-internal/src/actions/catch_retryable_es_client_errors.ts index 74877c9386422..965742a2abb8f 100644 --- a/packages/core/saved-objects/core-saved-objects-migration-server-internal/src/actions/catch_retryable_es_client_errors.ts +++ b/packages/core/saved-objects/core-saved-objects-migration-server-internal/src/actions/catch_retryable_es_client_errors.ts @@ -8,15 +8,7 @@ import * as Either from 'fp-ts/lib/Either'; import { errors as EsErrors } from '@elastic/elasticsearch'; - -const retryResponseStatuses = [ - 503, // ServiceUnavailable - 401, // AuthorizationException - 403, // AuthenticationException - 408, // RequestTimeout - 410, // Gone - 429, // TooManyRequests -> ES circuit breaker -]; +import { isRetryableEsClientError } from '@kbn/core-elasticsearch-server-internal'; export interface RetryableEsClientError { type: 'retryable_es_client_error'; @@ -27,18 +19,7 @@ export interface RetryableEsClientError { export const catchRetryableEsClientErrors = ( e: EsErrors.ElasticsearchClientError ): Either.Either => { - if ( - e instanceof EsErrors.NoLivingConnectionsError || - e instanceof EsErrors.ConnectionError || - e instanceof EsErrors.TimeoutError || - (e instanceof EsErrors.ResponseError && - (retryResponseStatuses.includes(e?.statusCode!) || - // ES returns a 400 Bad Request when trying to close or delete an - // index while snapshots are in progress. This should have been a 503 - // so once https://github.com/elastic/elasticsearch/issues/65883 is - // fixed we can remove this. - e?.body?.error?.type === 'snapshot_in_progress_exception')) - ) { + if (isRetryableEsClientError(e)) { return Either.left({ type: 'retryable_es_client_error' as const, message: e?.message,