From c5e8173f2a89dd3f9f709ecd1c385572acae23d0 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 3 Jul 2024 14:14:49 +0100 Subject: [PATCH] Stricter failure handling in `TransportGetSnapshotsAction` (#107191) Today if there's a failure during a multi-repo get-snapshots request then we record a per-repository failure but allow the rest of the request to proceed. This is trappy for clients, it means that they must always remember to check the `failures` response field or else risk missing some results. It's also a pain for the implementation because it means we have to collect the per-repository results separately first before adding them to the final results set just in case the last one triggers a failure. This commit drops this leniency and bubbles all failures straight up to the top level. --- docs/changelog/107191.yaml | 17 +++++++++ .../snapshots/GetSnapshotsIT.java | 18 ++++++---- .../snapshots/SnapshotStatusApisIT.java | 10 ++---- .../snapshots/get/GetSnapshotsRequest.java | 8 ----- .../snapshots/get/GetSnapshotsResponse.java | 2 ++ .../get/TransportGetSnapshotsAction.java | 36 ++++++++----------- 6 files changed, 47 insertions(+), 44 deletions(-) create mode 100644 docs/changelog/107191.yaml diff --git a/docs/changelog/107191.yaml b/docs/changelog/107191.yaml new file mode 100644 index 0000000000000..5ef6297c0f3f1 --- /dev/null +++ b/docs/changelog/107191.yaml @@ -0,0 +1,17 @@ +pr: 107191 +summary: Stricter failure handling in multi-repo get-snapshots request handling +area: Snapshot/Restore +type: bug +issues: [] +highlight: + title: Stricter failure handling in multi-repo get-snapshots request handling + body: | + If a multi-repo get-snapshots request encounters a failure in one of the + targeted repositories then earlier versions of Elasticsearch would proceed + as if the faulty repository did not exist, except for a per-repository + failure report in a separate section of the response body. This makes it + impossible to paginate the results properly in the presence of failures. In + versions 8.15.0 and later this API's failure handling behaviour has been + made stricter, reporting an overall failure if any targeted repository's + contents cannot be listed. + notable: true diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/GetSnapshotsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/GetSnapshotsIT.java index 7c5f38fee02a9..1130ddaa74f38 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/GetSnapshotsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/GetSnapshotsIT.java @@ -31,10 +31,8 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.hamcrest.Matchers.empty; -import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.in; -import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; public class GetSnapshotsIT extends AbstractSnapshotIntegTestCase { @@ -314,6 +312,7 @@ public void testExcludePatterns() throws Exception { assertThat( clusterAdmin().prepareGetSnapshots(TEST_REQUEST_TIMEOUT, matchAllPattern()) .setSnapshots("non-existing*", otherPrefixSnapshot1, "-o*") + .setIgnoreUnavailable(true) .get() .getSnapshots(), empty() @@ -586,12 +585,17 @@ public void testRetrievingSnapshotsWhenRepositoryIsMissing() throws Exception { final List snapshotNames = createNSnapshots(repoName, randomIntBetween(1, 10)); snapshotNames.sort(String::compareTo); - final GetSnapshotsResponse response = clusterAdmin().prepareGetSnapshots(TEST_REQUEST_TIMEOUT, repoName, missingRepoName) + final var oneRepoFuture = clusterAdmin().prepareGetSnapshots(TEST_REQUEST_TIMEOUT, repoName, missingRepoName) .setSort(SnapshotSortKey.NAME) - .get(); - assertThat(response.getSnapshots().stream().map(info -> info.snapshotId().getName()).toList(), equalTo(snapshotNames)); - assertTrue(response.getFailures().containsKey(missingRepoName)); - assertThat(response.getFailures().get(missingRepoName), instanceOf(RepositoryMissingException.class)); + .setIgnoreUnavailable(randomBoolean()) + .execute(); + expectThrows(RepositoryMissingException.class, oneRepoFuture::actionGet); + + final var multiRepoFuture = clusterAdmin().prepareGetSnapshots(TEST_REQUEST_TIMEOUT, repoName, missingRepoName) + .setSort(SnapshotSortKey.NAME) + .setIgnoreUnavailable(randomBoolean()) + .execute(); + expectThrows(RepositoryMissingException.class, multiRepoFuture::actionGet); } // Create a snapshot that is guaranteed to have a unique start time and duration for tests around ordering by either. diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotStatusApisIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotStatusApisIT.java index 600a3953d9bda..b155ef73783eb 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotStatusApisIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotStatusApisIT.java @@ -52,7 +52,6 @@ import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasSize; -import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.oneOf; @@ -395,16 +394,13 @@ public void testGetSnapshotsMultipleRepos() throws Exception { } logger.info("--> specify all snapshot names with ignoreUnavailable=false"); - GetSnapshotsResponse getSnapshotsResponse2 = client.admin() + final var failingFuture = client.admin() .cluster() .prepareGetSnapshots(TEST_REQUEST_TIMEOUT, randomFrom("_all", "repo*")) .setIgnoreUnavailable(false) .setSnapshots(snapshotList.toArray(new String[0])) - .get(); - - for (String repo : repoList) { - assertThat(getSnapshotsResponse2.getFailures().get(repo), instanceOf(SnapshotMissingException.class)); - } + .execute(); + expectThrows(SnapshotMissingException.class, failingFuture::actionGet); logger.info("--> specify all snapshot names with ignoreUnavailable=true"); GetSnapshotsResponse getSnapshotsResponse3 = client.admin() diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/GetSnapshotsRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/GetSnapshotsRequest.java index 8ef828d07d8b0..7c797444fc458 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/GetSnapshotsRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/GetSnapshotsRequest.java @@ -15,7 +15,6 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.regex.Regex; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.search.sort.SortOrder; @@ -220,13 +219,6 @@ public String[] policies() { return policies; } - public boolean isSingleRepositoryRequest() { - return repositories.length == 1 - && repositories[0] != null - && "_all".equals(repositories[0]) == false - && Regex.isSimpleMatchPattern(repositories[0]) == false; - } - /** * Returns the names of the snapshots. * diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/GetSnapshotsResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/GetSnapshotsResponse.java index 85c2ff2806ace..f7dedc21f93b6 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/GetSnapshotsResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/GetSnapshotsResponse.java @@ -16,6 +16,7 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.ChunkedToXContentObject; import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.snapshots.SnapshotInfo; import org.elasticsearch.xcontent.ToXContent; @@ -33,6 +34,7 @@ public class GetSnapshotsResponse extends ActionResponse implements ChunkedToXCo private final List snapshots; + @UpdateForV9 // always empty, can be dropped private final Map failures; @Nullable diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java index dd08746236fed..ff5fdbaa787fe 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java @@ -8,7 +8,6 @@ package org.elasticsearch.action.admin.cluster.snapshots.get; -import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionType; import org.elasticsearch.action.support.ActionFilters; @@ -120,10 +119,14 @@ protected void masterOperation( ) { assert task instanceof CancellableTask : task + " not cancellable"; + final var resolvedRepositories = ResolvedRepositories.resolve(state, request.repositories()); + if (resolvedRepositories.hasMissingRepositories()) { + throw new RepositoryMissingException(String.join(", ", resolvedRepositories.missing())); + } + new GetSnapshotsOperation( (CancellableTask) task, - ResolvedRepositories.resolve(state, request.repositories()), - request.isSingleRepositoryRequest() == false, + resolvedRepositories.repositoryMetadata(), request.snapshots(), request.ignoreUnavailable(), request.policies(), @@ -151,7 +154,6 @@ private class GetSnapshotsOperation { // repositories private final List repositories; - private final boolean isMultiRepoRequest; // snapshots selection private final SnapshotNamePredicate snapshotNamePredicate; @@ -179,7 +181,6 @@ private class GetSnapshotsOperation { private final GetSnapshotInfoExecutor getSnapshotInfoExecutor; // results - private final Map failuresByRepository = ConcurrentCollections.newConcurrentMap(); private final Queue> allSnapshotInfos = ConcurrentCollections.newQueue(); /** @@ -195,8 +196,7 @@ private class GetSnapshotsOperation { GetSnapshotsOperation( CancellableTask cancellableTask, - ResolvedRepositories resolvedRepositories, - boolean isMultiRepoRequest, + List repositories, String[] snapshots, boolean ignoreUnavailable, String[] policies, @@ -211,8 +211,7 @@ private class GetSnapshotsOperation { boolean indices ) { this.cancellableTask = cancellableTask; - this.repositories = resolvedRepositories.repositoryMetadata(); - this.isMultiRepoRequest = isMultiRepoRequest; + this.repositories = repositories; this.ignoreUnavailable = ignoreUnavailable; this.sortBy = sortBy; this.order = order; @@ -232,10 +231,6 @@ private class GetSnapshotsOperation { threadPool.info(ThreadPool.Names.SNAPSHOT_META).getMax(), cancellableTask::isCancelled ); - - for (final var missingRepo : resolvedRepositories.missing()) { - failuresByRepository.put(missingRepo, new RepositoryMissingException(missingRepo)); - } } void getMultipleReposSnapshotInfo(ActionListener listener) { @@ -249,6 +244,10 @@ void getMultipleReposSnapshotInfo(ActionListener listener) continue; } + if (listeners.isFailing()) { + return; + } + SubscribableListener .newForked(repositoryDataListener -> { @@ -261,14 +260,7 @@ void getMultipleReposSnapshotInfo(ActionListener listener) .andThen((l, repositoryData) -> loadSnapshotInfos(repoName, repositoryData, l)) - .addListener(listeners.acquire().delegateResponse((l, e) -> { - if (isMultiRepoRequest && e instanceof ElasticsearchException elasticsearchException) { - failuresByRepository.put(repoName, elasticsearchException); - l.onResponse(null); - } else { - l.onFailure(e); - } - })); + .addListener(listeners.acquire()); } } }) @@ -503,7 +495,7 @@ private GetSnapshotsResponse buildResponse() { } return new GetSnapshotsResponse( snapshotInfos, - failuresByRepository, + null, remaining > 0 ? sortBy.encodeAfterQueryParam(snapshotInfos.get(snapshotInfos.size() - 1)) : null, totalCount.get(), remaining