From 87d721e1a963912d17af7189c9728c190407617d Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 3 Aug 2020 12:09:20 +0200 Subject: [PATCH 01/94] bck --- .../org/elasticsearch/repositories/Repository.java | 13 +++++++++++++ .../elasticsearch/snapshots/SnapshotsService.java | 2 ++ 2 files changed, 15 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/repositories/Repository.java b/server/src/main/java/org/elasticsearch/repositories/Repository.java index ec0fb5b561ad1..fc83ee9636940 100644 --- a/server/src/main/java/org/elasticsearch/repositories/Repository.java +++ b/server/src/main/java/org/elasticsearch/repositories/Repository.java @@ -262,6 +262,19 @@ void restoreShard(Store store, SnapshotId snapshotId, IndexId indexId, ShardId s void executeConsistentStateUpdate(Function createUpdateTask, String source, Consumer onFailure); + /** + * Clones a shard snapshot. + * + * @param source source snapshot + * @param target target snapshot + * @param index index for shard + * @param shardId shard id + * @param listener listener to complete with new shard generation once clone has completed + */ + default void cloneShardSnapshot(SnapshotId source, SnapshotId target, IndexId index, int shardId, ActionListener listener) { + throw new AssertionError("not implemented yet"); + } + /** * Hook that allows a repository to filter the user supplied snapshot metadata in {@link SnapshotsInProgress.Entry#userMetadata()} * during snapshot initialization. diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 75d7e6a447f92..4476a9fcfcaa1 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -119,6 +119,8 @@ */ public class SnapshotsService extends AbstractLifecycleComponent implements ClusterStateApplier { + public static final Version CLONE_SNAPSHOT_VERSION = Version.V_8_0_0; + public static final Version FULL_CONCURRENCY_VERSION = Version.V_7_9_0; public static final Version SHARD_GEN_IN_REPO_DATA_VERSION = Version.V_7_6_0; From d84264d479a95a53abf89e5601aa3d5fb3599975 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 3 Aug 2020 18:41:12 +0200 Subject: [PATCH 02/94] Clone snapshot API start --- .../cluster/ClusterStateDiffIT.java | 2 +- .../snapshots/CloneSnapshotIT.java | 42 ++++++++ .../elasticsearch/action/ActionModule.java | 3 + .../snapshots/clone/CloneSnapshotAction.java | 33 +++++++ .../snapshots/clone/CloneSnapshotRequest.java | 99 +++++++++++++++++++ .../clone/CloneSnapshotRequestBuilder.java | 47 +++++++++ .../clone/TransportCloneSnapshotAction.java | 74 ++++++++++++++ .../client/ClusterAdminClient.java | 8 ++ .../client/support/AbstractClient.java | 18 ++++ .../cluster/SnapshotsInProgress.java | 38 +++++-- .../repositories/Repository.java | 8 ++ .../snapshots/SnapshotsService.java | 23 +++++ .../MetadataDeleteIndexServiceTests.java | 2 +- .../MetadataIndexStateServiceTests.java | 2 +- ...SnapshotsInProgressSerializationTests.java | 3 +- .../DeleteDataStreamTransportActionTests.java | 1 + .../xpack/slm/SnapshotRetentionTaskTests.java | 2 +- 17 files changed, 393 insertions(+), 12 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java create mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotAction.java create mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java create mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequestBuilder.java create mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/ClusterStateDiffIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/ClusterStateDiffIT.java index c04ad12ecd955..ec344ba931499 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/ClusterStateDiffIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/ClusterStateDiffIT.java @@ -725,7 +725,7 @@ public ClusterState.Custom randomCreate(String name) { ImmutableOpenMap.of(), null, SnapshotInfoTests.randomUserMetadata(), - randomVersion(random())))); + randomVersion(random()), null))); case 1: return new RestoreInProgress.Builder().add( new RestoreInProgress.Entry( diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java new file mode 100644 index 0000000000000..8929b1d6e2bb3 --- /dev/null +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -0,0 +1,42 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.snapshots; + +import org.elasticsearch.test.ESIntegTestCase; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; + +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) +public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase { + + public void testCloneSnapshot() throws Exception { + internalCluster().startMasterOnlyNode(); + internalCluster().startDataOnlyNode(); + final String repoName = "repo-name"; + createRepository(repoName, "fs"); + + final String indexName = "index-1"; + createIndexWithRandomDocs(indexName, randomIntBetween(10, 100)); + final String sourceSnapshot = "source-snapshot"; + createFullSnapshot(repoName, sourceSnapshot); + + final String targetSnapshot = "target-snapshot"; + assertAcked(client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexName).get()); + } +} diff --git a/server/src/main/java/org/elasticsearch/action/ActionModule.java b/server/src/main/java/org/elasticsearch/action/ActionModule.java index 055457edb0345..5dff69a0a7164 100644 --- a/server/src/main/java/org/elasticsearch/action/ActionModule.java +++ b/server/src/main/java/org/elasticsearch/action/ActionModule.java @@ -63,6 +63,8 @@ import org.elasticsearch.action.admin.cluster.settings.TransportClusterUpdateSettingsAction; import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsAction; import org.elasticsearch.action.admin.cluster.shards.TransportClusterSearchShardsAction; +import org.elasticsearch.action.admin.cluster.snapshots.clone.CloneSnapshotAction; +import org.elasticsearch.action.admin.cluster.snapshots.clone.TransportCloneSnapshotAction; import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotAction; import org.elasticsearch.action.admin.cluster.snapshots.create.TransportCreateSnapshotAction; import org.elasticsearch.action.admin.cluster.snapshots.delete.DeleteSnapshotAction; @@ -507,6 +509,7 @@ public void reg actions.register(GetSnapshotsAction.INSTANCE, TransportGetSnapshotsAction.class); actions.register(DeleteSnapshotAction.INSTANCE, TransportDeleteSnapshotAction.class); actions.register(CreateSnapshotAction.INSTANCE, TransportCreateSnapshotAction.class); + actions.register(CloneSnapshotAction.INSTANCE, TransportCloneSnapshotAction.class); actions.register(RestoreSnapshotAction.INSTANCE, TransportRestoreSnapshotAction.class); actions.register(SnapshotsStatusAction.INSTANCE, TransportSnapshotsStatusAction.class); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotAction.java new file mode 100644 index 0000000000000..e995469759bf0 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotAction.java @@ -0,0 +1,33 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.action.admin.cluster.snapshots.clone; + +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.support.master.AcknowledgedResponse; + +public final class CloneSnapshotAction extends ActionType { + + public static final CloneSnapshotAction INSTANCE = new CloneSnapshotAction(); + public static final String NAME = "cluster:admin/snapshot/clone"; + + private CloneSnapshotAction() { + super(NAME, AcknowledgedResponse::new); + } +} \ No newline at end of file diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java new file mode 100644 index 0000000000000..91a2f052d8a34 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java @@ -0,0 +1,99 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.action.admin.cluster.snapshots.clone; + +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.action.IndicesRequest; +import org.elasticsearch.action.support.IndicesOptions; +import org.elasticsearch.action.support.master.MasterNodeRequest; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.settings.Settings; + +import java.io.IOException; + +public class CloneSnapshotRequest extends MasterNodeRequest implements IndicesRequest.Replaceable { + + private final String repository; + + private final String source; + + private final String target; + + private String[] indices; + + private IndicesOptions indicesOptions = IndicesOptions.strictExpandHidden(); + + private final String[] excludedSettings; + + private final Settings updatedSettings; + + public CloneSnapshotRequest(StreamInput in) throws IOException { + repository = in.readString(); + source = in.readString(); + target = in.readString(); + indices = in.readStringArray(); + excludedSettings = in.readStringArray(); + updatedSettings = Settings.readSettingsFromStream(in); + } + + public CloneSnapshotRequest(String repository, String source, String target, String[] indices, String[] excludedSettings, + Settings updatedSettings) { + this.repository = repository; + this.source = source; + this.target = target; + this.indices = indices; + this.excludedSettings = excludedSettings; + this.updatedSettings = updatedSettings; + } + + @Override + public ActionRequestValidationException validate() { + return null; + } + + @Override + public String[] indices() { + return this.indices; + } + + @Override + public IndicesOptions indicesOptions() { + return indicesOptions; + } + + public CloneSnapshotRequest indicesOptions(IndicesOptions indicesOptions) { + this.indicesOptions = indicesOptions; + return this; + } + + @Override + public CloneSnapshotRequest indices(String... indices) { + this.indices = indices; + return this; + } + + public String repository() { + return this.repository; + } + + public String target() { + return this.target; + } +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequestBuilder.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequestBuilder.java new file mode 100644 index 0000000000000..4cb2bd06a0671 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequestBuilder.java @@ -0,0 +1,47 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.action.admin.cluster.snapshots.clone; + +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.action.support.master.MasterNodeOperationRequestBuilder; +import org.elasticsearch.client.ElasticsearchClient; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.settings.Settings; + +public class CloneSnapshotRequestBuilder extends MasterNodeOperationRequestBuilder { + + protected CloneSnapshotRequestBuilder(ElasticsearchClient client, ActionType action, + CloneSnapshotRequest request) { + super(client, action, request); + } + + public CloneSnapshotRequestBuilder(ElasticsearchClient client, ActionType action, + String repository, String source, String target) { + this(client, action, + new CloneSnapshotRequest(repository, source, target, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY, Settings.EMPTY)); + } + + public CloneSnapshotRequestBuilder setIndices(String... indices) { + request.indices(indices); + return this; + } +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java new file mode 100644 index 0000000000000..fdf3c7ce4359a --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java @@ -0,0 +1,74 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.action.admin.cluster.snapshots.clone; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.action.support.master.TransportMasterNodeAction; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.block.ClusterBlockException; +import org.elasticsearch.cluster.block.ClusterBlockLevel; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.snapshots.SnapshotsService; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; + +import java.io.IOException; + +public class TransportCloneSnapshotAction extends TransportMasterNodeAction { + + private final SnapshotsService snapshotsService; + + @Inject + public TransportCloneSnapshotAction(TransportService transportService, ClusterService clusterService, + ThreadPool threadPool, SnapshotsService snapshotsService, ActionFilters actionFilters, + IndexNameExpressionResolver indexNameExpressionResolver) { + super(CloneSnapshotAction.NAME, transportService, clusterService, threadPool, actionFilters, + CloneSnapshotRequest::new, indexNameExpressionResolver); + this.snapshotsService = snapshotsService; + } + + @Override + protected String executor() { + return ThreadPool.Names.SAME; + } + + @Override + protected AcknowledgedResponse read(StreamInput in) throws IOException { + return new AcknowledgedResponse(in); + } + + @Override + protected ClusterBlockException checkBlock(CloneSnapshotRequest request, ClusterState state) { + // Cluster is not affected but we look up repositories in metadata + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_READ); + } + + @Override + protected void masterOperation(Task task, final CloneSnapshotRequest request, ClusterState state, + final ActionListener listener) { + snapshotsService.cloneSnapshot(request, listener); + } +} \ No newline at end of file diff --git a/server/src/main/java/org/elasticsearch/client/ClusterAdminClient.java b/server/src/main/java/org/elasticsearch/client/ClusterAdminClient.java index 0093eee3204fb..32e9f27dd44a3 100644 --- a/server/src/main/java/org/elasticsearch/client/ClusterAdminClient.java +++ b/server/src/main/java/org/elasticsearch/client/ClusterAdminClient.java @@ -71,6 +71,8 @@ import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsRequest; import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsRequestBuilder; import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsResponse; +import org.elasticsearch.action.admin.cluster.snapshots.clone.CloneSnapshotRequest; +import org.elasticsearch.action.admin.cluster.snapshots.clone.CloneSnapshotRequestBuilder; import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotRequest; import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotRequestBuilder; import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; @@ -505,6 +507,12 @@ public interface ClusterAdminClient extends ElasticsearchClient { */ CreateSnapshotRequestBuilder prepareCreateSnapshot(String repository, String name); + CloneSnapshotRequestBuilder prepareCloneSnapshot(String repository, String source, String target); + + ActionFuture cloneSnapshot(CloneSnapshotRequest request); + + void cloneSnapshot(CloneSnapshotRequest request, ActionListener listener); + /** * Get snapshots. */ diff --git a/server/src/main/java/org/elasticsearch/client/support/AbstractClient.java b/server/src/main/java/org/elasticsearch/client/support/AbstractClient.java index 2601a9af69690..763e5ec88bbc7 100644 --- a/server/src/main/java/org/elasticsearch/client/support/AbstractClient.java +++ b/server/src/main/java/org/elasticsearch/client/support/AbstractClient.java @@ -94,6 +94,9 @@ import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsRequest; import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsRequestBuilder; import org.elasticsearch.action.admin.cluster.shards.ClusterSearchShardsResponse; +import org.elasticsearch.action.admin.cluster.snapshots.clone.CloneSnapshotAction; +import org.elasticsearch.action.admin.cluster.snapshots.clone.CloneSnapshotRequest; +import org.elasticsearch.action.admin.cluster.snapshots.clone.CloneSnapshotRequestBuilder; import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotAction; import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotRequest; import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotRequestBuilder; @@ -935,6 +938,21 @@ public CreateSnapshotRequestBuilder prepareCreateSnapshot(String repository, Str return new CreateSnapshotRequestBuilder(this, CreateSnapshotAction.INSTANCE, repository, name); } + @Override + public CloneSnapshotRequestBuilder prepareCloneSnapshot(String repository, String source, String target) { + return new CloneSnapshotRequestBuilder(this, CloneSnapshotAction.INSTANCE, repository, source, target); + } + + @Override + public ActionFuture cloneSnapshot(CloneSnapshotRequest request) { + return execute(CloneSnapshotAction.INSTANCE, request); + } + + @Override + public void cloneSnapshot(CloneSnapshotRequest request, ActionListener listener) { + execute(CloneSnapshotAction.INSTANCE, request, listener); + } + @Override public ActionFuture getSnapshots(GetSnapshotsRequest request) { return execute(GetSnapshotsAction.INSTANCE, request); diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index c43b336992252..ee524bc5d07b3 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -37,6 +37,8 @@ import org.elasticsearch.repositories.IndexId; import org.elasticsearch.repositories.RepositoryOperation; import org.elasticsearch.snapshots.Snapshot; +import org.elasticsearch.snapshots.SnapshotId; +import org.elasticsearch.snapshots.SnapshotsService; import java.io.IOException; import java.util.Collections; @@ -93,7 +95,14 @@ public static Entry startedEntry(Snapshot snapshot, boolean includeGlobalState, Version version) { return new SnapshotsInProgress.Entry(snapshot, includeGlobalState, partial, completed(shards.values()) ? State.SUCCESS : State.STARTED, - indices, dataStreams, startTime, repositoryStateId, shards, null, userMetadata, version); + indices, dataStreams, startTime, repositoryStateId, shards, null, userMetadata, version, null); + } + + public static Entry startClone(Snapshot snapshot, SnapshotId source, boolean includeGlobalState, List indices, long startTime, + long repositoryStateId, ImmutableOpenMap shards, Version version) { + return new SnapshotsInProgress.Entry(snapshot, includeGlobalState, false, + completed(shards.values()) ? State.SUCCESS : State.STARTED, indices, Collections.emptyList(), startTime, repositoryStateId, + shards, null, Collections.emptyMap(), version, source); } public static class Entry implements Writeable, ToXContent, RepositoryOperation { @@ -106,8 +115,14 @@ public static class Entry implements Writeable, ToXContent, RepositoryOperation private final List dataStreams; private final long startTime; private final long repositoryStateId; - // see #useShardGenerations private final Version version; + + /** + * Source snapshot if this is a clone operation or {@code null} if this is a normal snapshot. + */ + @Nullable + private final SnapshotId source; + @Nullable private final Map userMetadata; @Nullable private final String failure; @@ -115,7 +130,7 @@ public static class Entry implements Writeable, ToXContent, RepositoryOperation public Entry(Snapshot snapshot, boolean includeGlobalState, boolean partial, State state, List indices, List dataStreams, long startTime, long repositoryStateId, ImmutableOpenMap shards, String failure, Map userMetadata, - Version version) { + Version version, @Nullable SnapshotId source) { this.state = state; this.snapshot = snapshot; this.includeGlobalState = includeGlobalState; @@ -129,6 +144,7 @@ public Entry(Snapshot snapshot, boolean includeGlobalState, boolean partial, Sta this.failure = failure; this.userMetadata = userMetadata; this.version = version; + this.source = source; } private Entry(StreamInput in) throws IOException { @@ -148,6 +164,11 @@ private Entry(StreamInput in) throws IOException { } else { dataStreams = Collections.emptyList(); } + if (in.getVersion().onOrAfter(SnapshotsService.CLONE_SNAPSHOT_VERSION)) { + source = in.readOptionalWriteable(SnapshotId::new); + } else { + source = null; + } } private static boolean assertShardsConsistent(State state, List indices, @@ -170,7 +191,7 @@ public Entry withRepoGen(long newRepoGen) { assert newRepoGen > repositoryStateId : "Updated repository generation [" + newRepoGen + "] must be higher than current generation [" + repositoryStateId + "]"; return new Entry(snapshot, includeGlobalState, partial, state, indices, dataStreams, startTime, newRepoGen, shards, failure, - userMetadata, version); + userMetadata, version, source); } /** @@ -199,7 +220,7 @@ public Entry abort() { public Entry fail(ImmutableOpenMap shards, State state, String failure) { return new Entry(snapshot, includeGlobalState, partial, state, indices, dataStreams, startTime, repositoryStateId, shards, - failure, userMetadata, version); + failure, userMetadata, version, source); } /** @@ -213,7 +234,7 @@ public Entry fail(ImmutableOpenMap shards, State s public Entry withShardStates(ImmutableOpenMap shards) { if (completed(shards.values())) { return new Entry(snapshot, includeGlobalState, partial, State.SUCCESS, indices, dataStreams, startTime, repositoryStateId, - shards, failure, userMetadata, version); + shards, failure, userMetadata, version, source); } return withStartedShards(shards); } @@ -224,7 +245,7 @@ public Entry withShardStates(ImmutableOpenMap shar */ public Entry withStartedShards(ImmutableOpenMap shards) { final SnapshotsInProgress.Entry updated = new Entry(snapshot, includeGlobalState, partial, state, indices, dataStreams, - startTime, repositoryStateId, shards, failure, userMetadata, version); + startTime, repositoryStateId, shards, failure, userMetadata, version, source); assert updated.state().completed() == false && completed(updated.shards().values()) == false : "Only running snapshots allowed but saw [" + updated + "]"; return updated; @@ -381,6 +402,9 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(DATA_STREAMS_IN_SNAPSHOT)) { out.writeStringCollection(dataStreams); } + if (out.getVersion().onOrAfter(SnapshotsService.CLONE_SNAPSHOT_VERSION)) { + out.writeOptionalWriteable(source); + } } @Override diff --git a/server/src/main/java/org/elasticsearch/repositories/Repository.java b/server/src/main/java/org/elasticsearch/repositories/Repository.java index fc83ee9636940..7f9116206311a 100644 --- a/server/src/main/java/org/elasticsearch/repositories/Repository.java +++ b/server/src/main/java/org/elasticsearch/repositories/Repository.java @@ -37,9 +37,11 @@ import org.elasticsearch.indices.recovery.RecoveryState; import org.elasticsearch.snapshots.SnapshotId; import org.elasticsearch.snapshots.SnapshotInfo; +import org.locationtech.jts.util.AssertionFailedException; import java.io.IOException; import java.util.Collection; +import java.util.List; import java.util.Map; import java.util.function.Consumer; import java.util.function.Function; @@ -275,6 +277,12 @@ default void cloneShardSnapshot(SnapshotId source, SnapshotId target, IndexId in throw new AssertionError("not implemented yet"); } + default void finalizeSnapshotClone(SnapshotId source, ShardGenerations shardGenerations, long repositoryStateId, + SnapshotInfo snapshotInfo, Version repositoryMetaVersion, + Function stateTransformer, ActionListener listener) { + throw new AssertionFailedException("not implemented yet"); + } + /** * Hook that allows a repository to filter the user supplied snapshot metadata in {@link SnapshotsInProgress.Entry#userMetadata()} * during snapshot initialization. diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 4c9230884944b..40ec96a593ebc 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -27,9 +27,12 @@ import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.Version; import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.admin.cluster.snapshots.clone.CloneSnapshotRequest; import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotRequest; import org.elasticsearch.action.admin.cluster.snapshots.delete.DeleteSnapshotRequest; import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.master.AcknowledgedRequest; +import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.action.support.master.TransportMasterNodeAction; import org.elasticsearch.cluster.ClusterChangedEvent; import org.elasticsearch.cluster.ClusterState; @@ -328,6 +331,26 @@ public TimeValue timeout() { }, "create_snapshot [" + snapshotName + ']', listener::onFailure); } + public void cloneSnapshot(CloneSnapshotRequest request, ActionListener listener) { + final String repositoryName = request.repository(); + final String snapshotName = indexNameExpressionResolver.resolveDateMathExpression(request.target()); + validate(repositoryName, snapshotName); + Repository repository = repositoriesService.repository(request.repository()); + if (repository.isReadOnly()) { + listener.onFailure( + new RepositoryException(repository.getMetadata().name(), "cannot create snapshot in a readonly repository")); + return; + } + final SnapshotId snapshotId = new SnapshotId(snapshotName, UUIDs.randomBase64UUID()); + // TODO: do not allow delete of in-progress-clone + // TODO: handle index metadata via metadata generations only (no physical file writing) ... implement this in repo + // TODO: just copy global metadata blob? Or just make it empty with the index metadata in it? (latter might be better with DS) + // TODO: Clone DS? (probably no, not relevant for searchable snapshots ...) + // TODO: SnapshotInfo build as always + // TODO: shards are snapshot shard-by-shard on the master node, no need for coordination here + // TODO: what about snapshot metadata? + } + private void ensureBelowConcurrencyLimit(String repository, String name, SnapshotsInProgress snapshotsInProgress, SnapshotDeletionsInProgress deletionsInProgress) { final int inProgressOperations = snapshotsInProgress.entries().size() + deletionsInProgress.getEntries().size(); diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataDeleteIndexServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataDeleteIndexServiceTests.java index ae90621f1420d..e12fbe78c2e76 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataDeleteIndexServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataDeleteIndexServiceTests.java @@ -82,7 +82,7 @@ public void testDeleteSnapshotting() { SnapshotsInProgress snaps = SnapshotsInProgress.of(List.of(new SnapshotsInProgress.Entry(snapshot, true, false, SnapshotsInProgress.State.INIT, singletonList(new IndexId(index, "doesn't matter")), Collections.emptyList(), System.currentTimeMillis(), (long) randomIntBetween(0, 1000), ImmutableOpenMap.of(), null, - SnapshotInfoTests.randomUserMetadata(), VersionUtils.randomVersion(random())))); + SnapshotInfoTests.randomUserMetadata(), VersionUtils.randomVersion(random()), null))); ClusterState state = ClusterState.builder(clusterState(index)) .putCustom(SnapshotsInProgress.TYPE, snaps) .build(); diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataIndexStateServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataIndexStateServiceTests.java index 8e982c01cdb59..e426b3851e46e 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataIndexStateServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataIndexStateServiceTests.java @@ -461,7 +461,7 @@ private static ClusterState addSnapshotIndex(final String index, final int numSh final SnapshotsInProgress.Entry entry = new SnapshotsInProgress.Entry(snapshot, randomBoolean(), false, SnapshotsInProgress.State.INIT, Collections.singletonList(new IndexId(index, index)), Collections.emptyList(), randomNonNegativeLong(), randomLong(), - shardsBuilder.build(), null, SnapshotInfoTests.randomUserMetadata(), VersionUtils.randomVersion(random())); + shardsBuilder.build(), null, SnapshotInfoTests.randomUserMetadata(), VersionUtils.randomVersion(random()), null); return ClusterState.builder(newState).putCustom(SnapshotsInProgress.TYPE, SnapshotsInProgress.of(List.of(entry))).build(); } diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotsInProgressSerializationTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotsInProgressSerializationTests.java index e2a144ab62ba3..91b53998e995d 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotsInProgressSerializationTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotsInProgressSerializationTests.java @@ -79,7 +79,8 @@ private Entry randomSnapshot() { } ImmutableOpenMap shards = builder.build(); return new Entry(snapshot, includeGlobalState, partial, randomState(shards), indices, dataStreams, - startTime, repositoryStateId, shards, null, SnapshotInfoTests.randomUserMetadata(), VersionUtils.randomVersion(random())); + startTime, repositoryStateId, shards, null, SnapshotInfoTests.randomUserMetadata(), VersionUtils.randomVersion(random()), + null); } @Override diff --git a/x-pack/plugin/data-streams/src/test/java/org/elasticsearch/xpack/datastreams/action/DeleteDataStreamTransportActionTests.java b/x-pack/plugin/data-streams/src/test/java/org/elasticsearch/xpack/datastreams/action/DeleteDataStreamTransportActionTests.java index 0d5747c680006..23de4a1a2bb11 100644 --- a/x-pack/plugin/data-streams/src/test/java/org/elasticsearch/xpack/datastreams/action/DeleteDataStreamTransportActionTests.java +++ b/x-pack/plugin/data-streams/src/test/java/org/elasticsearch/xpack/datastreams/action/DeleteDataStreamTransportActionTests.java @@ -113,6 +113,7 @@ private SnapshotsInProgress.Entry createEntry(String dataStreamName, String repo ImmutableOpenMap.of(), null, null, + null, null ); } diff --git a/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SnapshotRetentionTaskTests.java b/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SnapshotRetentionTaskTests.java index 908a795f82677..2e1ac25ec23de 100644 --- a/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SnapshotRetentionTaskTests.java +++ b/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SnapshotRetentionTaskTests.java @@ -346,7 +346,7 @@ public void testOkToDeleteSnapshots() { snapshot, true, false, SnapshotsInProgress.State.INIT, Collections.singletonList(new IndexId("name", "id")), Collections.emptyList(), 0, 0, ImmutableOpenMap.builder().build(), null, Collections.emptyMap(), - VersionUtils.randomVersion(random())))); + VersionUtils.randomVersion(random()), null))); ClusterState state = ClusterState.builder(new ClusterName("cluster")) .putCustom(SnapshotsInProgress.TYPE, inProgress) .build(); From 38da49aee9b2aa375f87766791eb86e45e9fee97 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 3 Aug 2020 20:10:03 +0200 Subject: [PATCH 03/94] works --- .../src/main/java/org/elasticsearch/repositories/Repository.java | 1 - .../main/java/org/elasticsearch/snapshots/SnapshotsService.java | 1 - 2 files changed, 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/Repository.java b/server/src/main/java/org/elasticsearch/repositories/Repository.java index 7f9116206311a..3eb57518422ea 100644 --- a/server/src/main/java/org/elasticsearch/repositories/Repository.java +++ b/server/src/main/java/org/elasticsearch/repositories/Repository.java @@ -41,7 +41,6 @@ import java.io.IOException; import java.util.Collection; -import java.util.List; import java.util.Map; import java.util.function.Consumer; import java.util.function.Function; diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 40ec96a593ebc..f90ef2efabe4d 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -31,7 +31,6 @@ import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotRequest; import org.elasticsearch.action.admin.cluster.snapshots.delete.DeleteSnapshotRequest; import org.elasticsearch.action.support.ActionFilters; -import org.elasticsearch.action.support.master.AcknowledgedRequest; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.action.support.master.TransportMasterNodeAction; import org.elasticsearch.cluster.ClusterChangedEvent; From b4be1f6f0dec850b4a28533a10492561568dc026 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 3 Aug 2020 20:33:22 +0200 Subject: [PATCH 04/94] r/w --- .../snapshots/clone/CloneSnapshotRequest.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java index 91a2f052d8a34..51e8071c457d8 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java @@ -24,6 +24,7 @@ import org.elasticsearch.action.support.IndicesOptions; import org.elasticsearch.action.support.master.MasterNodeRequest; import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.settings.Settings; import java.io.IOException; @@ -49,6 +50,7 @@ public CloneSnapshotRequest(StreamInput in) throws IOException { source = in.readString(); target = in.readString(); indices = in.readStringArray(); + indicesOptions = IndicesOptions.readIndicesOptions(in); excludedSettings = in.readStringArray(); updatedSettings = Settings.readSettingsFromStream(in); } @@ -63,6 +65,17 @@ public CloneSnapshotRequest(String repository, String source, String target, Str this.updatedSettings = updatedSettings; } + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeString(repository); + out.writeString(source); + out.writeString(target); + indicesOptions.writeIndicesOptions(out); + out.writeStringArray(excludedSettings); + Settings.writeSettingsToStream(updatedSettings, out); + } + @Override public ActionRequestValidationException validate() { return null; From 2e3fbccea3b3da0270077c2d4668766e9bf3234a Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 3 Aug 2020 21:30:30 +0200 Subject: [PATCH 05/94] bck --- .../snapshots/CloneSnapshotIT.java | 17 +++- .../snapshots/clone/CloneSnapshotRequest.java | 5 + .../clone/TransportCloneSnapshotAction.java | 2 +- .../cluster/SnapshotsInProgress.java | 4 +- .../snapshots/SnapshotsService.java | 96 ++++++++++++++++++- 5 files changed, 117 insertions(+), 7 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index 8929b1d6e2bb3..b599aad29a823 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -18,9 +18,14 @@ */ package org.elasticsearch.snapshots; +import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotIndexStatus; +import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus; import org.elasticsearch.test.ESIntegTestCase; +import java.util.List; + import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.hamcrest.Matchers.hasSize; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase { @@ -32,11 +37,21 @@ public void testCloneSnapshot() throws Exception { createRepository(repoName, "fs"); final String indexName = "index-1"; - createIndexWithRandomDocs(indexName, randomIntBetween(10, 100)); + createIndexWithRandomDocs(indexName, randomIntBetween(5, 10)); final String sourceSnapshot = "source-snapshot"; createFullSnapshot(repoName, sourceSnapshot); + indexRandomDocs(indexName, randomIntBetween(20, 100)); + final String targetSnapshot = "target-snapshot"; assertAcked(client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexName).get()); + + final List status = client().admin().cluster().prepareSnapshotStatus(repoName) + .setSnapshots(sourceSnapshot, targetSnapshot).get().getSnapshots(); + assertThat(status, hasSize(2)); + final SnapshotIndexStatus status1 = status.get(0).getIndices().get(indexName); + final SnapshotIndexStatus status2 = status.get(1).getIndices().get(indexName); + assertEquals(status1.getStats().getTotalFileCount(), status2.getStats().getTotalFileCount()); + assertEquals(status1.getStats().getTotalSize(), status2.getStats().getTotalSize()); } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java index 51e8071c457d8..411172b2686fc 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java @@ -71,6 +71,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeString(repository); out.writeString(source); out.writeString(target); + out.writeStringArray(indices); indicesOptions.writeIndicesOptions(out); out.writeStringArray(excludedSettings); Settings.writeSettingsToStream(updatedSettings, out); @@ -109,4 +110,8 @@ public String repository() { public String target() { return this.target; } + + public String source() { + return this.source; + } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java index fdf3c7ce4359a..fc157504972fd 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java @@ -69,6 +69,6 @@ protected ClusterBlockException checkBlock(CloneSnapshotRequest request, Cluster @Override protected void masterOperation(Task task, final CloneSnapshotRequest request, ClusterState state, final ActionListener listener) { - snapshotsService.cloneSnapshot(request, listener); + snapshotsService.cloneSnapshot(request, ActionListener.map(listener, v -> new AcknowledgedResponse(true))); } } \ No newline at end of file diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index ee524bc5d07b3..5be0286194e47 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -98,9 +98,9 @@ public static Entry startedEntry(Snapshot snapshot, boolean includeGlobalState, indices, dataStreams, startTime, repositoryStateId, shards, null, userMetadata, version, null); } - public static Entry startClone(Snapshot snapshot, SnapshotId source, boolean includeGlobalState, List indices, long startTime, + public static Entry startClone(Snapshot snapshot, SnapshotId source, List indices, long startTime, long repositoryStateId, ImmutableOpenMap shards, Version version) { - return new SnapshotsInProgress.Entry(snapshot, includeGlobalState, false, + return new SnapshotsInProgress.Entry(snapshot, false, false, completed(shards.values()) ? State.SUCCESS : State.STARTED, indices, Collections.emptyList(), startTime, repositoryStateId, shards, null, Collections.emptyMap(), version, source); } diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index f90ef2efabe4d..72a5dead1aaf4 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -31,7 +31,6 @@ import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotRequest; import org.elasticsearch.action.admin.cluster.snapshots.delete.DeleteSnapshotRequest; import org.elasticsearch.action.support.ActionFilters; -import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.action.support.master.TransportMasterNodeAction; import org.elasticsearch.cluster.ClusterChangedEvent; import org.elasticsearch.cluster.ClusterState; @@ -330,7 +329,7 @@ public TimeValue timeout() { }, "create_snapshot [" + snapshotName + ']', listener::onFailure); } - public void cloneSnapshot(CloneSnapshotRequest request, ActionListener listener) { + public void cloneSnapshot(CloneSnapshotRequest request, ActionListener listener) { final String repositoryName = request.repository(); final String snapshotName = indexNameExpressionResolver.resolveDateMathExpression(request.target()); validate(repositoryName, snapshotName); @@ -341,13 +340,104 @@ public void cloneSnapshot(CloneSnapshotRequest request, ActionListener new ClusterStateUpdateTask() { + + private SnapshotsInProgress.Entry newEntry; + + @Override + public ClusterState execute(ClusterState currentState) { + // check if the snapshot name already exists in the repository + if (repositoryData.getSnapshotIds().stream().anyMatch(s -> s.getName().equals(snapshotName))) { + throw new InvalidSnapshotNameException( + repository.getMetadata().name(), snapshotName, "snapshot with the same name already exists"); + } + final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); + final List runningSnapshots = snapshots.entries(); + if (runningSnapshots.stream().anyMatch(s -> { + final Snapshot running = s.snapshot(); + return running.getRepository().equals(repositoryName) && running.getSnapshotId().getName().equals(snapshotName); + })) { + throw new InvalidSnapshotNameException( + repository.getMetadata().name(), snapshotName, "snapshot with the same name is already in-progress"); + } + validate(repositoryName, snapshotName, currentState); + final boolean concurrentOperationsAllowed = currentState.nodes().getMinNodeVersion().onOrAfter(FULL_CONCURRENCY_VERSION); + final SnapshotDeletionsInProgress deletionsInProgress = + currentState.custom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY); + // TODO: check that no delete for the source snapshot is running + final SnapshotId sourceSnapshotId = repositoryData.getSnapshotIds() + .stream() + .filter(src -> src.getName().equals(request.source())) + .findAny() + .orElseThrow(() -> new SnapshotMissingException(repositoryName, request.source())); + if (deletionsInProgress.hasDeletionsInProgress() && concurrentOperationsAllowed == false) { + throw new ConcurrentSnapshotExecutionException(repositoryName, snapshotName, + "cannot snapshot while a snapshot deletion is in-progress in [" + deletionsInProgress + "]"); + } + final RepositoryCleanupInProgress repositoryCleanupInProgress = + currentState.custom(RepositoryCleanupInProgress.TYPE, RepositoryCleanupInProgress.EMPTY); + if (repositoryCleanupInProgress.hasCleanupInProgress()) { + throw new ConcurrentSnapshotExecutionException(repositoryName, snapshotName, + "cannot snapshot while a repository cleanup is in-progress in [" + repositoryCleanupInProgress + "]"); + } + // Fail if there are any concurrently running snapshots. The only exception to this being a snapshot in INIT state from a + // previous master that we can simply ignore and remove from the cluster state because we would clean it up from the + // cluster state anyway in #applyClusterState. + if (concurrentOperationsAllowed == false && runningSnapshots.stream().anyMatch(entry -> entry.state() != State.INIT)) { + throw new ConcurrentSnapshotExecutionException(repositoryName, snapshotName, " a snapshot is already running"); + } + ensureBelowConcurrencyLimit(repositoryName, snapshotName, snapshots, deletionsInProgress); + // TODO: figure out indices by pattern matching + List indices = Arrays.asList(indexNameExpressionResolver.concreteIndexNames(currentState, request)); + + logger.trace("[{}][{}] creating snapshot clone for indices [{}]", repositoryName, snapshotName, indices); + + final List indexIds = repositoryData.resolveNewIndices( + indices, runningSnapshots.stream().filter(entry -> entry.repository().equals(repositoryName)) + .flatMap(entry -> entry.indices().stream()).distinct() + .collect(Collectors.toMap(IndexId::getName, Function.identity()))); + final Version version = minCompatibleVersion(currentState.nodes().getMinNodeVersion(), repositoryData, null); + ImmutableOpenMap shards = shards(snapshots, deletionsInProgress, currentState.metadata(), + currentState.routingTable(), indexIds, useShardGenerations(version), repositoryData, repositoryName); + newEntry = SnapshotsInProgress.startClone(snapshot, sourceSnapshotId, + indexIds, threadPool.absoluteTimeInMillis(), repositoryData.getGenId(), shards, version); + final List newEntries = new ArrayList<>(runningSnapshots); + newEntries.add(newEntry); + return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, + SnapshotsInProgress.of(List.copyOf(newEntries))).build(); + } + + @Override + public void onFailure(String source, Exception e) { + logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to create snapshot", repositoryName, snapshotName), e); + listener.onFailure(e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, final ClusterState newState) { + try { + logger.info("snapshot clone [{}] started", snapshot); + addListener(snapshot, ActionListener.wrap(r -> listener.onResponse(null), listener::onFailure)); + } finally { + if (newEntry.state().completed()) { + endSnapshot(newEntry, newState.metadata(), repositoryData); + } + } + } + + @Override + public TimeValue timeout() { + return request.masterNodeTimeout(); + } + }, "clone_snapshot [" + request.source() + "][" + snapshotName + ']', listener::onFailure); } private void ensureBelowConcurrencyLimit(String repository, String name, SnapshotsInProgress snapshotsInProgress, From cf5d19915c2d4e349f78d118edfabb5507fe67e4 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 5 Aug 2020 10:37:06 +0200 Subject: [PATCH 06/94] fix --- .../admin/cluster/snapshots/clone/CloneSnapshotRequest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java index 411172b2686fc..c70ea2e6fb674 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java @@ -46,6 +46,7 @@ public class CloneSnapshotRequest extends MasterNodeRequest Date: Wed, 5 Aug 2020 14:31:36 +0200 Subject: [PATCH 07/94] progress --- .../cluster/SnapshotsInProgress.java | 15 ++++++++++++--- .../snapshots/SnapshotsService.java | 19 +++++++++++++++---- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index 5be0286194e47..5486761eacdbc 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -139,12 +139,12 @@ public Entry(Snapshot snapshot, boolean includeGlobalState, boolean partial, Sta this.dataStreams = dataStreams; this.startTime = startTime; this.shards = shards; - assert assertShardsConsistent(state, indices, shards); this.repositoryStateId = repositoryStateId; this.failure = failure; this.userMetadata = userMetadata; this.version = version; this.source = source; + assert assertShardsConsistent(source, state, indices, shards); } private Entry(StreamInput in) throws IOException { @@ -171,14 +171,18 @@ private Entry(StreamInput in) throws IOException { } } - private static boolean assertShardsConsistent(State state, List indices, + private static boolean assertShardsConsistent(SnapshotId source, State state, List indices, ImmutableOpenMap shards) { if ((state == State.INIT || state == State.ABORTED) && shards.isEmpty()) { return true; } final Set indexNames = indices.stream().map(IndexId::getName).collect(Collectors.toSet()); final Set indexNamesInShards = new HashSet<>(); - shards.keysIt().forEachRemaining(s -> indexNamesInShards.add(s.getIndexName())); + shards.iterator().forEachRemaining(s -> { + indexNamesInShards.add(s.key.getIndexName()); + assert source == null || s.value.nodeId == null : + "Shard snapshot must not be assigned to data node when copying from snapshot [" + source + "]"; + }); assert indexNames.equals(indexNamesInShards) : "Indices in shards " + indexNamesInShards + " differ from expected indices " + indexNames + " for state [" + state + "]"; final boolean shardsCompleted = completed(shards.values()); @@ -308,6 +312,11 @@ public Version version() { return version; } + @Nullable + public SnapshotId source() { + return source; + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 72a5dead1aaf4..4352fc6caf1bf 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -27,6 +27,8 @@ import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.Version; import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionRunnable; +import org.elasticsearch.action.StepListener; import org.elasticsearch.action.admin.cluster.snapshots.clone.CloneSnapshotRequest; import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotRequest; import org.elasticsearch.action.admin.cluster.snapshots.delete.DeleteSnapshotRequest; @@ -992,9 +994,17 @@ private void finalizeSnapshotEntry(SnapshotsInProgress.Entry entry, Metadata met entry.startTime(), failure, threadPool.absoluteTimeInMillis(), entry.partial() ? shardGenerations.totalShards() : entry.shards().size(), shardFailures, entry.includeGlobalState(), entry.userMetadata()); - repositoriesService.repository(snapshot.getRepository()).finalizeSnapshot( + final StepListener metadataListener = new StepListener<>(); + final Repository repo = repositoriesService.repository(snapshot.getRepository()); + if (entry.source() == null) { + metadataListener.onResponse(metadata); + } else { + threadPool.executor(ThreadPool.Names.SNAPSHOT).execute( + ActionRunnable.supply(metadataListener, () -> repo.getSnapshotGlobalMetadata(entry.source()))); + } + metadataListener.whenComplete(meta -> repo.finalizeSnapshot( shardGenerations, - repositoryData.getGenId(), + repositoryData.getGenId(), metadataForSnapshot(entry, metadata), snapshotInfo, entry.version(), @@ -1002,10 +1012,11 @@ private void finalizeSnapshotEntry(SnapshotsInProgress.Entry entry, Metadata met ActionListener.wrap(newRepoData -> { endingSnapshots.remove(snapshot); completeListenersIgnoringException( - snapshotCompletionListeners.remove(snapshot), Tuple.tuple(newRepoData, snapshotInfo)); + snapshotCompletionListeners.remove(snapshot), Tuple.tuple(newRepoData, snapshotInfo)); logger.info("snapshot [{}] completed with state [{}]", snapshot, snapshotInfo.state()); runNextQueuedOperation(newRepoData, repository, true); - }, e -> handleFinalizationFailure(e, entry, repositoryData))); + }, e -> handleFinalizationFailure(e, entry, repositoryData))), + e -> handleFinalizationFailure(e, entry, repositoryData)); } catch (Exception e) { assert false : new AssertionError(e); handleFinalizationFailure(e, entry, repositoryData); From 789a07df787cc3da80c418e3359bdcadbb640114 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 5 Aug 2020 18:46:09 +0200 Subject: [PATCH 08/94] bck --- .../snapshots/SnapshotShardsService.java | 3 ++ .../snapshots/SnapshotsService.java | 28 +++++++------------ 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java index a9ca775388a6d..bb2e031d41bf0 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java @@ -200,6 +200,9 @@ private void startNewSnapshots(SnapshotsInProgress snapshotsInProgress) { final String localNodeId = clusterService.localNode().getId(); for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) { final State entryState = entry.state(); + if (entry.source() != null) { + // This is a snapshot clone, it will be executed on the current master + } if (entryState == State.STARTED) { Map startedShards = null; final Snapshot snapshot = entry.snapshot(); diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 4352fc6caf1bf..7781cbf2277c9 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -75,6 +75,7 @@ import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.index.Index; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.repositories.IndexId; @@ -397,18 +398,14 @@ public ClusterState execute(ClusterState currentState) { throw new ConcurrentSnapshotExecutionException(repositoryName, snapshotName, " a snapshot is already running"); } ensureBelowConcurrencyLimit(repositoryName, snapshotName, snapshots, deletionsInProgress); - // TODO: figure out indices by pattern matching - List indices = Arrays.asList(indexNameExpressionResolver.concreteIndexNames(currentState, request)); - - logger.trace("[{}][{}] creating snapshot clone for indices [{}]", repositoryName, snapshotName, indices); - final List indexIds = repositoryData.resolveNewIndices( - indices, runningSnapshots.stream().filter(entry -> entry.repository().equals(repositoryName)) - .flatMap(entry -> entry.indices().stream()).distinct() - .collect(Collectors.toMap(IndexId::getName, Function.identity()))); + final List indexIds = repositoryData.getIndices().values().stream().filter(indexId -> + repositoryData.getSnapshots(indexId).contains(sourceSnapshotId) && + Regex.simpleMatch(request.indices(), indexId.getName())).collect(Collectors.toList()); final Version version = minCompatibleVersion(currentState.nodes().getMinNodeVersion(), repositoryData, null); - ImmutableOpenMap shards = shards(snapshots, deletionsInProgress, currentState.metadata(), - currentState.routingTable(), indexIds, useShardGenerations(version), repositoryData, repositoryName); + // TODO: load all index metadata blobs for the indices to copy and then assign shard clones + // TODO: tricky here ... index uuid in shard-id does not matter + ImmutableOpenMap shards = ImmutableOpenMap.of(); newEntry = SnapshotsInProgress.startClone(snapshot, sourceSnapshotId, indexIds, threadPool.absoluteTimeInMillis(), repositoryData.getGenId(), shards, version); final List newEntries = new ArrayList<>(runningSnapshots); @@ -425,14 +422,9 @@ public void onFailure(String source, Exception e) { @Override public void clusterStateProcessed(String source, ClusterState oldState, final ClusterState newState) { - try { - logger.info("snapshot clone [{}] started", snapshot); - addListener(snapshot, ActionListener.wrap(r -> listener.onResponse(null), listener::onFailure)); - } finally { - if (newEntry.state().completed()) { - endSnapshot(newEntry, newState.metadata(), repositoryData); - } - } + logger.info("snapshot clone [{}] started", snapshot); + addListener(snapshot, ActionListener.wrap(r -> listener.onResponse(null), listener::onFailure)); + } @Override From 9647e44e5c73b7b427ff2cc8df66775e540d46ad Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 6 Aug 2020 14:19:05 +0200 Subject: [PATCH 09/94] fix --- .../main/java/org/elasticsearch/snapshots/SnapshotsService.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 7781cbf2277c9..22e4d13da75f0 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -75,7 +75,6 @@ import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.index.Index; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.repositories.IndexId; From e95a9e2a2df34231c56495b1ac7fbdf9115bf0f1 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 11 Aug 2020 15:27:11 +0200 Subject: [PATCH 10/94] bck --- .../snapshots/CloneSnapshotIT.java | 26 +++++++++++++++++++ .../cluster/SnapshotsInProgress.java | 3 ++- .../snapshots/SnapshotsService.java | 1 + 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index b599aad29a823..366cfe5e75f9f 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -54,4 +54,30 @@ public void testCloneSnapshot() throws Exception { assertEquals(status1.getStats().getTotalFileCount(), status2.getStats().getTotalFileCount()); assertEquals(status1.getStats().getTotalSize(), status2.getStats().getTotalSize()); } + + public void testCloneSnapshotIndexMissing() throws Exception { + internalCluster().startMasterOnlyNode(); + internalCluster().startDataOnlyNode(); + final String repoName = "repo-name"; + createRepository(repoName, "fs"); + + final String indexName = "index-1"; + createIndexWithRandomDocs(indexName, randomIntBetween(5, 10)); + final String sourceSnapshot = "source-snapshot"; + createFullSnapshot(repoName, sourceSnapshot); + + indexRandomDocs(indexName, randomIntBetween(20, 100)); + + final String targetSnapshot = "target-snapshot"; + assertAcked(client().admin().indices().prepareDelete(indexName)); + assertAcked(client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexName).get()); + + final List status = client().admin().cluster().prepareSnapshotStatus(repoName) + .setSnapshots(sourceSnapshot, targetSnapshot).get().getSnapshots(); + assertThat(status, hasSize(2)); + final SnapshotIndexStatus status1 = status.get(0).getIndices().get(indexName); + final SnapshotIndexStatus status2 = status.get(1).getIndices().get(indexName); + assertEquals(status1.getStats().getTotalFileCount(), status2.getStats().getTotalFileCount()); + assertEquals(status1.getStats().getTotalSize(), status2.getStats().getTotalSize()); + } } diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index 5486761eacdbc..fba4ef81dd8da 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -183,7 +183,8 @@ private static boolean assertShardsConsistent(SnapshotId source, State state, Li assert source == null || s.value.nodeId == null : "Shard snapshot must not be assigned to data node when copying from snapshot [" + source + "]"; }); - assert indexNames.equals(indexNamesInShards) + assert source == null || indexNames.isEmpty() == false : "No empty snapshot clones allowed"; + assert source != null || indexNames.equals(indexNamesInShards) : "Indices in shards " + indexNamesInShards + " differ from expected indices " + indexNames + " for state [" + state + "]"; final boolean shardsCompleted = completed(shards.values()); assert (state.completed() && shardsCompleted) || (state.completed() == false && shardsCompleted == false) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 22e4d13da75f0..05c56d0d548b9 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -349,6 +349,7 @@ public void cloneSnapshot(CloneSnapshotRequest request, ActionListener lis // TODO: Clone DS? (probably no, not relevant for searchable snapshots ...) // TODO: SnapshotInfo build as always // TODO: shards are snapshot shard-by-shard on the master node, no need for coordination here + // TODO: throw when no indices match // TODO: what about snapshot metadata? repository.executeConsistentStateUpdate(repositoryData -> new ClusterStateUpdateTask() { From 1c135969ee83f223cf1b8018ecd88a870e36713e Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 11 Aug 2020 21:23:45 +0200 Subject: [PATCH 11/94] bck --- .../org/elasticsearch/cluster/SnapshotsInProgress.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index fba4ef81dd8da..ceef23495376f 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -27,6 +27,7 @@ import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Strings; import org.elasticsearch.common.collect.ImmutableOpenMap; +import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; @@ -123,6 +124,8 @@ public static class Entry implements Writeable, ToXContent, RepositoryOperation @Nullable private final SnapshotId source; + private final ImmutableOpenMap> clones; + @Nullable private final Map userMetadata; @Nullable private final String failure; @@ -130,7 +133,7 @@ public static class Entry implements Writeable, ToXContent, RepositoryOperation public Entry(Snapshot snapshot, boolean includeGlobalState, boolean partial, State state, List indices, List dataStreams, long startTime, long repositoryStateId, ImmutableOpenMap shards, String failure, Map userMetadata, - Version version, @Nullable SnapshotId source) { + Version version, @Nullable SnapshotId source, @Nullable ImmutableOpenMap> clones) { this.state = state; this.snapshot = snapshot; this.includeGlobalState = includeGlobalState; @@ -144,6 +147,11 @@ public Entry(Snapshot snapshot, boolean includeGlobalState, boolean partial, Sta this.userMetadata = userMetadata; this.version = version; this.source = source; + if (source == null) { + this.clones = ImmutableOpenMap.of(); + } else { + this.clones = clones; + } assert assertShardsConsistent(source, state, indices, shards); } From e01a58e3af38ac579885aca3487f8324350010b7 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 12 Aug 2020 16:20:57 +0200 Subject: [PATCH 12/94] bck --- .../cluster/SnapshotsInProgress.java | 43 +++++++++++++------ .../snapshots/SnapshotsService.java | 42 +++++++++++++----- 2 files changed, 62 insertions(+), 23 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index ceef23495376f..ef44f1b3292b5 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -19,6 +19,7 @@ package org.elasticsearch.cluster; +import com.carrotsearch.hppc.IntHashSet; import com.carrotsearch.hppc.ObjectContainer; import com.carrotsearch.hppc.cursors.ObjectCursor; import com.carrotsearch.hppc.cursors.ObjectObjectCursor; @@ -96,14 +97,25 @@ public static Entry startedEntry(Snapshot snapshot, boolean includeGlobalState, Version version) { return new SnapshotsInProgress.Entry(snapshot, includeGlobalState, partial, completed(shards.values()) ? State.SUCCESS : State.STARTED, - indices, dataStreams, startTime, repositoryStateId, shards, null, userMetadata, version, null); + indices, dataStreams, startTime, repositoryStateId, shards, null, userMetadata, version, null, null); } + /** + * Creates the initial snapshot clone entry + * + * @param snapshot snapshot to clone into + * @param source snapshot to clone from + * @param indices indices to clone + * @param startTime start time + * @param repositoryStateId repository state id that this clone is based on + * @param version repository metadata version to write + * @return snapshot clone entry + */ public static Entry startClone(Snapshot snapshot, SnapshotId source, List indices, long startTime, - long repositoryStateId, ImmutableOpenMap shards, Version version) { - return new SnapshotsInProgress.Entry(snapshot, false, false, - completed(shards.values()) ? State.SUCCESS : State.STARTED, indices, Collections.emptyList(), startTime, repositoryStateId, - shards, null, Collections.emptyMap(), version, source); + long repositoryStateId, Version version) { + return new SnapshotsInProgress.Entry(snapshot, false, false, State.STARTED, indices, Collections.emptyList(), + startTime, repositoryStateId, ImmutableOpenMap.of(), null, Collections.emptyMap(), version, source, + ImmutableOpenMap.of()); } public static class Entry implements Writeable, ToXContent, RepositoryOperation { @@ -124,7 +136,7 @@ public static class Entry implements Writeable, ToXContent, RepositoryOperation @Nullable private final SnapshotId source; - private final ImmutableOpenMap> clones; + private final @Nullable ImmutableOpenMap> clones; @Nullable private final Map userMetadata; @Nullable private final String failure; @@ -133,7 +145,8 @@ public static class Entry implements Writeable, ToXContent, RepositoryOperation public Entry(Snapshot snapshot, boolean includeGlobalState, boolean partial, State state, List indices, List dataStreams, long startTime, long repositoryStateId, ImmutableOpenMap shards, String failure, Map userMetadata, - Version version, @Nullable SnapshotId source, @Nullable ImmutableOpenMap> clones) { + Version version, @Nullable SnapshotId source, + @Nullable ImmutableOpenMap> clones) { this.state = state; this.snapshot = snapshot; this.includeGlobalState = includeGlobalState; @@ -148,6 +161,7 @@ public Entry(Snapshot snapshot, boolean includeGlobalState, boolean partial, Sta this.version = version; this.source = source; if (source == null) { + assert clones == null : "Provided [" + clones + "] but no source"; this.clones = ImmutableOpenMap.of(); } else { this.clones = clones; @@ -174,8 +188,10 @@ private Entry(StreamInput in) throws IOException { } if (in.getVersion().onOrAfter(SnapshotsService.CLONE_SNAPSHOT_VERSION)) { source = in.readOptionalWriteable(SnapshotId::new); + clones = in.readImmutableMap(StreamInput::readString, inpt -> inpt.readList(ShardState::readFrom)); } else { source = null; + clones = ImmutableOpenMap.of(); } } @@ -204,7 +220,7 @@ public Entry withRepoGen(long newRepoGen) { assert newRepoGen > repositoryStateId : "Updated repository generation [" + newRepoGen + "] must be higher than current generation [" + repositoryStateId + "]"; return new Entry(snapshot, includeGlobalState, partial, state, indices, dataStreams, startTime, newRepoGen, shards, failure, - userMetadata, version, source); + userMetadata, version, source, clones); } /** @@ -233,7 +249,7 @@ public Entry abort() { public Entry fail(ImmutableOpenMap shards, State state, String failure) { return new Entry(snapshot, includeGlobalState, partial, state, indices, dataStreams, startTime, repositoryStateId, shards, - failure, userMetadata, version, source); + failure, userMetadata, version, source, clones); } /** @@ -247,7 +263,7 @@ public Entry fail(ImmutableOpenMap shards, State s public Entry withShardStates(ImmutableOpenMap shards) { if (completed(shards.values())) { return new Entry(snapshot, includeGlobalState, partial, State.SUCCESS, indices, dataStreams, startTime, repositoryStateId, - shards, failure, userMetadata, version, source); + shards, failure, userMetadata, version, source, clones); } return withStartedShards(shards); } @@ -258,7 +274,7 @@ public Entry withShardStates(ImmutableOpenMap shar */ public Entry withStartedShards(ImmutableOpenMap shards) { final SnapshotsInProgress.Entry updated = new Entry(snapshot, includeGlobalState, partial, state, indices, dataStreams, - startTime, repositoryStateId, shards, failure, userMetadata, version, source); + startTime, repositoryStateId, shards, failure, userMetadata, version, source, clones); assert updated.state().completed() == false && completed(updated.shards().values()) == false : "Only running snapshots allowed but saw [" + updated + "]"; return updated; @@ -490,7 +506,7 @@ private boolean assertConsistent() { public ShardSnapshotStatus(StreamInput in) throws IOException { nodeId = in.readOptionalString(); - state = ShardState.fromValue(in.readByte()); + state = ShardState.readFrom(in); generation = in.readOptionalString(); reason = in.readOptionalString(); } @@ -722,7 +738,8 @@ public boolean failed() { return failed; } - public static ShardState fromValue(byte value) { + public static ShardState readFrom(StreamInput in) throws IOException { + final byte value = in.readByte(); switch (value) { case 0: return INIT; diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 05c56d0d548b9..ba109f3f24463 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -33,6 +33,7 @@ import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotRequest; import org.elasticsearch.action.admin.cluster.snapshots.delete.DeleteSnapshotRequest; import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.GroupedActionListener; import org.elasticsearch.action.support.master.TransportMasterNodeAction; import org.elasticsearch.cluster.ClusterChangedEvent; import org.elasticsearch.cluster.ClusterState; @@ -106,6 +107,7 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.Executor; import java.util.function.Function; import java.util.function.Predicate; import java.util.stream.Collectors; @@ -403,11 +405,8 @@ public ClusterState execute(ClusterState currentState) { repositoryData.getSnapshots(indexId).contains(sourceSnapshotId) && Regex.simpleMatch(request.indices(), indexId.getName())).collect(Collectors.toList()); final Version version = minCompatibleVersion(currentState.nodes().getMinNodeVersion(), repositoryData, null); - // TODO: load all index metadata blobs for the indices to copy and then assign shard clones - // TODO: tricky here ... index uuid in shard-id does not matter - ImmutableOpenMap shards = ImmutableOpenMap.of(); - newEntry = SnapshotsInProgress.startClone(snapshot, sourceSnapshotId, - indexIds, threadPool.absoluteTimeInMillis(), repositoryData.getGenId(), shards, version); + newEntry = SnapshotsInProgress.startClone( + snapshot, sourceSnapshotId, indexIds, threadPool.absoluteTimeInMillis(), repositoryData.getGenId(), version); final List newEntries = new ArrayList<>(runningSnapshots); newEntries.add(newEntry); return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, @@ -424,7 +423,7 @@ public void onFailure(String source, Exception e) { public void clusterStateProcessed(String source, ClusterState oldState, final ClusterState newState) { logger.info("snapshot clone [{}] started", snapshot); addListener(snapshot, ActionListener.wrap(r -> listener.onResponse(null), listener::onFailure)); - + startCloning(repositoryData, newEntry); } @Override @@ -434,6 +433,25 @@ public TimeValue timeout() { }, "clone_snapshot [" + request.source() + "][" + snapshotName + ']', listener::onFailure); } + /** + * Determine the number of shards in each index of a clone operation and update the cluster state accordingly. + * + * @param repositoryData repository data at the time the clone operation was started + * @param cloneEntry clone operation in the cluster state + */ + private void startCloning(RepositoryData repositoryData, SnapshotsInProgress.Entry cloneEntry) { + final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); + final List indices = cloneEntry.indices(); + final GroupedActionListener> shardCountListener = new GroupedActionListener<>( + ActionListener.wrap(counts -> { + final ImmutableOpenMap.Builder> builder = ImmutableOpenMap.builder(); + for (Tuple count : counts) { + // TODO: fill with either init or started then submit cs update and actually run the clones that are possible + // to execute + } + }, e -> removeFailedSnapshotFromClusterState(cloneEntry.snapshot(), e, repositoryData)), indices.size()); + } + private void ensureBelowConcurrencyLimit(String repository, String name, SnapshotsInProgress snapshotsInProgress, SnapshotDeletionsInProgress deletionsInProgress) { final int inProgressOperations = snapshotsInProgress.entries().size() + deletionsInProgress.getEntries().size(); @@ -1176,10 +1194,12 @@ private static ClusterState stateWithoutSnapshot(ClusterState state, Snapshot sn * used when the snapshot fails for some reason. During normal operation the snapshot repository will remove the * {@link SnapshotsInProgress.Entry} from the cluster state once it's done finalizing the snapshot. * - * @param snapshot snapshot that failed - * @param failure exception that failed the snapshot + * @param snapshot snapshot that failed + * @param failure exception that failed the snapshot + * @param repositoryData repository data if the next finalization operation on the repository should be attempted or {@code null} if + * no further actions should be executed */ - private void removeFailedSnapshotFromClusterState(Snapshot snapshot, Exception failure, RepositoryData repositoryData) { + private void removeFailedSnapshotFromClusterState(Snapshot snapshot, Exception failure, @Nullable RepositoryData repositoryData) { assert failure != null : "Failure must be supplied"; clusterService.submitStateUpdateTask("remove snapshot metadata", new ClusterStateUpdateTask() { @@ -1210,7 +1230,9 @@ public void onNoLongerMaster(String source) { @Override public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { failSnapshotCompletionListeners(snapshot, failure); - runNextQueuedOperation(repositoryData, snapshot.getRepository(), true); + if (repositoryData != null) { + runNextQueuedOperation(repositoryData, snapshot.getRepository(), true); + } } }); } From 4c754d127fe0330669ba30c13093f6258ce31107 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Sun, 30 Aug 2020 18:59:59 +0200 Subject: [PATCH 13/94] bck --- .../cluster/SnapshotsInProgress.java | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index 1287c4ed2f024..c1673d4c6d347 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -159,7 +159,7 @@ public Entry(Snapshot snapshot, boolean includeGlobalState, boolean partial, Sta this.version = version; this.source = source; if (source == null) { - assert clones == null : "Provided [" + clones + "] but no source"; + assert clones == null || clones.isEmpty() : "Provided [" + clones + "] but no source"; this.clones = ImmutableOpenMap.of(); } else { this.clones = clones; @@ -209,8 +209,12 @@ private static boolean assertShardsConsistent(SnapshotId source, State state, Li assert source != null || indexNames.equals(indexNamesInShards) : "Indices in shards " + indexNamesInShards + " differ from expected indices " + indexNames + " for state [" + state + "]"; final boolean shardsCompleted = completed(shards.values()); - assert (state.completed() && shardsCompleted) || (state.completed() == false && shardsCompleted == false) - : "Completed state must imply all shards completed but saw state [" + state + "] and shards " + shards; + if (source == null) { + assert (state.completed() && shardsCompleted) || (state.completed() == false && shardsCompleted == false) + : "Completed state must imply all shards completed but saw state [" + state + "] and shards " + shards; + } else { + // TODO: assert things about clones + } return true; } @@ -436,6 +440,7 @@ public void writeTo(StreamOutput out) throws IOException { } if (out.getVersion().onOrAfter(SnapshotsService.CLONE_SNAPSHOT_VERSION)) { out.writeOptionalWriteable(source); + out.writeMap(clones, StreamOutput::writeString, StreamOutput::writeList); } } @@ -539,7 +544,7 @@ public boolean isActive() { @Override public void writeTo(StreamOutput out) throws IOException { out.writeOptionalString(nodeId); - out.writeByte(state.value); + state.writeTo(out); out.writeOptionalString(generation); out.writeOptionalString(reason); } @@ -701,7 +706,7 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par return builder; } - public enum ShardState { + public enum ShardState implements Writeable { INIT((byte) 0, false, false), SUCCESS((byte) 2, true, false), FAILED((byte) 3, true, true), @@ -757,5 +762,10 @@ public static ShardState readFrom(StreamInput in) throws IOException { throw new IllegalArgumentException("No shard snapshot state for value [" + value + "]"); } } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeByte(value); + } } } From 617ee9bbbbb48fcb6f3ccc92284e45c421b28cfd Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 31 Aug 2020 14:55:25 +0200 Subject: [PATCH 14/94] bck --- .../cluster/SnapshotsInProgress.java | 49 +++- .../repositories/FilterRepository.java | 6 + .../repositories/Repository.java | 16 +- .../blobstore/BlobStoreRepository.java | 17 ++ .../snapshots/SnapshotsService.java | 220 ++++++++++++++---- .../RepositoriesServiceTests.java | 6 + .../index/shard/RestoreOnlyRepository.java | 6 + 7 files changed, 265 insertions(+), 55 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index c1673d4c6d347..e4122a2a293b2 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -134,7 +134,7 @@ public static class Entry implements Writeable, ToXContent, RepositoryOperation @Nullable private final SnapshotId source; - private final @Nullable ImmutableOpenMap> clones; + private final ImmutableOpenMap> clones; @Nullable private final Map userMetadata; @Nullable private final String failure; @@ -144,7 +144,7 @@ public Entry(Snapshot snapshot, boolean includeGlobalState, boolean partial, Sta List dataStreams, long startTime, long repositoryStateId, ImmutableOpenMap shards, String failure, Map userMetadata, Version version, @Nullable SnapshotId source, - @Nullable ImmutableOpenMap> clones) { + @Nullable ImmutableOpenMap> clones) { this.state = state; this.snapshot = snapshot; this.includeGlobalState = includeGlobalState; @@ -186,7 +186,7 @@ private Entry(StreamInput in) throws IOException { } if (in.getVersion().onOrAfter(SnapshotsService.CLONE_SNAPSHOT_VERSION)) { source = in.readOptionalWriteable(SnapshotId::new); - clones = in.readImmutableMap(StreamInput::readString, inpt -> inpt.readList(ShardState::readFrom)); + clones = in.readImmutableMap(StreamInput::readString, inpt -> inpt.readList(ShardSnapshotStatus::new)); } else { source = null; clones = ImmutableOpenMap.of(); @@ -225,6 +225,12 @@ public Entry withRepoGen(long newRepoGen) { userMetadata, version, source, clones); } + public Entry initiateClones(ImmutableOpenMap> updatedClones) { + assert clones.isEmpty() : "Can't initiate clones, already have running clones " + clones; + return new Entry(snapshot, includeGlobalState, partial, state, this.indices, dataStreams, startTime, repositoryStateId, shards, + failure, userMetadata, version, source, updatedClones); + } + /** * Create a new instance by aborting this instance. Moving all in-progress shards to {@link ShardState#ABORTED} if assigned to a * data node or to {@link ShardState#FAILED} if not assigned to any data node. @@ -344,6 +350,10 @@ public SnapshotId source() { return source; } + public ImmutableOpenMap> clones() { + return clones; + } + @Override public boolean equals(Object o) { if (this == o) return true; @@ -706,6 +716,39 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par return builder; } + public static RepoShardId repoShardId(IndexId indexId, int shard) { + return new RepoShardId(indexId, shard); + } + + public static final class RepoShardId { + + private final IndexId index; + + private final int shard; + + private RepoShardId(IndexId index, int shard) { + this.index = index; + this.shard = shard; + } + + @Override + public int hashCode() { + return Objects.hash(index, shard); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj instanceof RepoShardId == false) { + return false; + } + final RepoShardId that = (RepoShardId) obj; + return that.index.equals(index) && that.shard == shard; + } + } + public enum ShardState implements Writeable { INIT((byte) 0, false, false), SUCCESS((byte) 2, true, false), diff --git a/server/src/main/java/org/elasticsearch/repositories/FilterRepository.java b/server/src/main/java/org/elasticsearch/repositories/FilterRepository.java index a27301724ac87..d67d75d89f777 100644 --- a/server/src/main/java/org/elasticsearch/repositories/FilterRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/FilterRepository.java @@ -149,6 +149,12 @@ public void executeConsistentStateUpdate(Function listener) { + in.cloneShardSnapshot(source, target, index, shardId, shardGeneration, listener); + } + @Override public Lifecycle.State lifecycleState() { return in.lifecycleState(); diff --git a/server/src/main/java/org/elasticsearch/repositories/Repository.java b/server/src/main/java/org/elasticsearch/repositories/Repository.java index 3eb57518422ea..23aa16fe0dc54 100644 --- a/server/src/main/java/org/elasticsearch/repositories/Repository.java +++ b/server/src/main/java/org/elasticsearch/repositories/Repository.java @@ -266,15 +266,15 @@ void executeConsistentStateUpdate(Function listener) { - throw new AssertionError("not implemented yet"); - } + void cloneShardSnapshot(SnapshotId source, SnapshotId target, IndexId index, int shardId, @Nullable String shardGeneration, + ActionListener listener); default void finalizeSnapshotClone(SnapshotId source, ShardGenerations shardGenerations, long repositoryStateId, SnapshotInfo snapshotInfo, Version repositoryMetaVersion, diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index 4daf2014535c1..eb730cc71cc0e 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -397,6 +397,23 @@ public TimeValue timeout() { }, onFailure)); } + @Override + public void cloneShardSnapshot(SnapshotId source, SnapshotId target, IndexId index, int shardId, @Nullable String shardGeneration, + ActionListener listener) { + final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); + executor.execute(ActionRunnable.supply(listener, () -> { + final BlobContainer shardContainer = shardContainer(index, shardId); + final BlobStoreIndexShardSnapshot sourceMeta = loadShardSnapshot(shardContainer, source); + final String newGen; + if (shardGeneration == null) { + throw new AssertionError("Not implemented yet"); + } else { + newGen = UUIDs.randomBase64UUID(); + } + return newGen; + })); + } + // Inspects all cluster state elements that contain a hint about what the current repository generation is and updates // #latestKnownRepoGen if a newer than currently known generation is found @Override diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 1b3c60daf056a..80510c3eaba75 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -47,6 +47,7 @@ import org.elasticsearch.cluster.RestoreInProgress; import org.elasticsearch.cluster.SnapshotDeletionsInProgress; import org.elasticsearch.cluster.SnapshotsInProgress; +import org.elasticsearch.cluster.SnapshotsInProgress.RepoShardId; import org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus; import org.elasticsearch.cluster.SnapshotsInProgress.ShardState; import org.elasticsearch.cluster.SnapshotsInProgress.State; @@ -399,7 +400,7 @@ public void onFailure(String source, Exception e) { public void clusterStateProcessed(String source, ClusterState oldState, final ClusterState newState) { logger.info("snapshot clone [{}] started", snapshot); addListener(snapshot, ActionListener.wrap(r -> listener.onResponse(null), listener::onFailure)); - startCloning(repositoryData, newEntry); + startCloning(repository, repositoryData, newEntry); } @Override @@ -412,20 +413,108 @@ public TimeValue timeout() { /** * Determine the number of shards in each index of a clone operation and update the cluster state accordingly. * + * @param repository repository to run operation on * @param repositoryData repository data at the time the clone operation was started * @param cloneEntry clone operation in the cluster state */ - private void startCloning(RepositoryData repositoryData, SnapshotsInProgress.Entry cloneEntry) { + private void startCloning(Repository repository, RepositoryData repositoryData, SnapshotsInProgress.Entry cloneEntry) { final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); final List indices = cloneEntry.indices(); final GroupedActionListener> shardCountListener = new GroupedActionListener<>( ActionListener.wrap(counts -> { - final ImmutableOpenMap.Builder> builder = ImmutableOpenMap.builder(); - for (Tuple count : counts) { - // TODO: fill with either init or started then submit cs update and actually run the clones that are possible - // to execute - } + clusterService.submitStateUpdateTask("start snapshot clone", new ClusterStateUpdateTask() { + + private SnapshotsInProgress.Entry updatedEntry; + + @Override + public ClusterState execute(ClusterState currentState) { + final SnapshotsInProgress snapshotsInProgress = + currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); + final List updatedEntries = new ArrayList<>(snapshotsInProgress.entries()); + boolean changed = false; + for (int i = 0; i < updatedEntries.size(); i++) { + if (cloneEntry.equals(updatedEntries.get(i))) { + final ImmutableOpenMap.Builder> clonesBuilder = + ImmutableOpenMap.builder(); + final Set busyShards = + busyShardsForRepo(repository.getMetadata().name(), snapshotsInProgress); + final Set busyShardsInRepo = busyShards.stream().map(shardId -> + SnapshotsInProgress.repoShardId( + repositoryData.resolveIndexId(shardId.getIndexName()), shardId.getId())) + .collect(Collectors.toSet()); + for (Tuple count : counts) { + final List shardSnapshotStatuses = new ArrayList<>(count.v2()); + for (int shardId = 0; shardId < count.v2(); shardId++) { + if (busyShardsInRepo.contains(SnapshotsInProgress.repoShardId(count.v1(), shardId))) { + shardSnapshotStatuses.add(ShardSnapshotStatus.UNASSIGNED_QUEUED); + } else { + // TODO: proper state + generation + shardSnapshotStatuses.add(new ShardSnapshotStatus(currentState.nodes().getLocalNodeId(), + repositoryData.shardGenerations().getShardGen(count.v1(), shardId))); + } + } + clonesBuilder.put(count.v1().getName(), shardSnapshotStatuses); + } + updatedEntry = cloneEntry.initiateClones(clonesBuilder.build()); + updatedEntries.set(i, updatedEntry); + changed = true; + break; + } + } + return updateWithSnapshots( + currentState, changed ? SnapshotsInProgress.of(updatedEntries) : null, null); + } + + @Override + public void onFailure(String source, Exception e) { + logger.info(() -> new ParameterizedMessage("Failed to start snapshot clone [{}]", cloneEntry), e); + failAllListenersOnMasterFailOver(e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + if (updatedEntry != null) { + final SnapshotId sourceSnapshot = updatedEntry.source(); + final SnapshotId targetSnapshot = updatedEntry.snapshot().getSnapshotId(); + for (ObjectObjectCursor> indexClone : updatedEntry.clones()) { + final IndexId indexId = repositoryData.resolveIndexId(indexClone.key); + for (int i = 0; i < indexClone.value.size(); i++) { + final int shardId = i; + final ShardSnapshotStatus shardStatusBefore = indexClone.value.get(shardId); + repository.cloneShardSnapshot(sourceSnapshot, targetSnapshot, indexId, shardId, + shardStatusBefore.generation(), ActionListener.wrap( + generation -> innerUpdateSnapshotState( + new ShardSnapshotUpdate(updatedEntry.snapshot(), + SnapshotsInProgress.repoShardId(indexId, shardId), null, + new ShardSnapshotStatus(clusterService.localNode().getId(), + ShardState.SUCCESS, generation)), + ActionListener.wrap( + v -> { + + }, e -> { + //TODO: Error handling + throw new AssertionError(e); + } + + )), e -> { + //TODO: Error handling + throw new AssertionError(e); + })); + } + } + } else { + // TODO: this is broken, we should error somehow maybe + } + } + }); }, e -> removeFailedSnapshotFromClusterState(cloneEntry.snapshot(), e, repositoryData)), indices.size()); + final SnapshotId sourceSnapshot = cloneEntry.source(); + for (IndexId index : cloneEntry.indices()) { + executor.execute(ActionRunnable.supply(shardCountListener, () -> { + final IndexMetadata metadata = repository.getSnapshotIndexMetaData(repositoryData, sourceSnapshot, index); + return Tuple.tuple(index, metadata.getNumberOfShards()); + })); + } } private void ensureBelowConcurrencyLimit(String repository, String name, SnapshotsInProgress snapshotsInProgress, @@ -1997,11 +2086,48 @@ public boolean assertAllListenersResolved() { return true; } - private static class SnapshotStateExecutor implements ClusterStateTaskExecutor { + private static final class ShardSnapshotUpdate { + + private final Snapshot snapshot; + + @Nullable + private final RepoShardId repoShardId; + + @Nullable + private final ShardId shardId; + + private final SnapshotsInProgress.ShardSnapshotStatus status; + + private ShardSnapshotUpdate(Snapshot snapshot, @Nullable RepoShardId repoShardId, @Nullable ShardId shardId, + ShardSnapshotStatus status) { + this.snapshot = snapshot; + this.repoShardId = repoShardId; + this.shardId = shardId; + this.status = status; + } + + public boolean isClone() { + return repoShardId != null; + } + + public ShardId shardId() { + return shardId; + } + + public Snapshot snapshot() { + return snapshot; + } + + public ShardSnapshotStatus status() { + return status; + } + } + + private static class SnapshotStateExecutor implements ClusterStateTaskExecutor { @Override - public ClusterTasksResult - execute(ClusterState currentState, List tasks) { + public ClusterTasksResult + execute(ClusterState currentState, List tasks) { int changedCount = 0; final List entries = new ArrayList<>(); final Map> reusedShardIdsByRepo = new HashMap<>(); @@ -2009,36 +2135,41 @@ private static class SnapshotStateExecutor implements ClusterStateTaskExecutor shards = ImmutableOpenMap.builder(); boolean updated = false; - for (UpdateIndexShardSnapshotStatusRequest updateSnapshotState : tasks) { - final ShardId finishedShardId = updateSnapshotState.shardId(); - if (entry.snapshot().equals(updateSnapshotState.snapshot())) { - logger.trace("[{}] Updating shard [{}] with status [{}]", updateSnapshotState.snapshot(), - finishedShardId, updateSnapshotState.status().state()); - if (updated == false) { - shards.putAll(entry.shards()); - updated = true; - } - shards.put(finishedShardId, updateSnapshotState.status()); - changedCount++; + for (ShardSnapshotUpdate updateSnapshotState : tasks) { + if (updateSnapshotState.isClone()) { + throw new AssertionError("TODO"); } else { - final String updatedRepository = updateSnapshotState.snapshot().getRepository(); - final Set reusedShardIds = reusedShardIdsByRepo.computeIfAbsent(updatedRepository, k -> new HashSet<>()); - if (entry.repository().equals(updatedRepository) && - entry.state().completed() == false && reusedShardIds.contains(finishedShardId) == false - && entry.shards().keys().contains(finishedShardId)) { - final ShardSnapshotStatus existingStatus = entry.shards().get(finishedShardId); - if (existingStatus.state() != ShardState.QUEUED) { - continue; - } + final ShardId finishedShardId = updateSnapshotState.shardId(); + if (entry.snapshot().equals(updateSnapshotState.snapshot())) { + logger.trace("[{}] Updating shard [{}] with status [{}]", updateSnapshotState.snapshot(), + finishedShardId, updateSnapshotState.status().state()); if (updated == false) { shards.putAll(entry.shards()); updated = true; } - final ShardSnapshotStatus finishedStatus = updateSnapshotState.status(); - logger.trace("Starting [{}] on [{}] with generation [{}]", finishedShardId, - finishedStatus.nodeId(), finishedStatus.generation()); - shards.put(finishedShardId, new ShardSnapshotStatus(finishedStatus.nodeId(), finishedStatus.generation())); - reusedShardIds.add(finishedShardId); + shards.put(finishedShardId, updateSnapshotState.status()); + changedCount++; + } else { + final String updatedRepository = updateSnapshotState.snapshot().getRepository(); + final Set reusedShardIds = + reusedShardIdsByRepo.computeIfAbsent(updatedRepository, k -> new HashSet<>()); + if (entry.repository().equals(updatedRepository) && + entry.state().completed() == false && reusedShardIds.contains(finishedShardId) == false + && entry.shards().keys().contains(finishedShardId)) { + final ShardSnapshotStatus existingStatus = entry.shards().get(finishedShardId); + if (existingStatus.state() != ShardState.QUEUED) { + continue; + } + if (updated == false) { + shards.putAll(entry.shards()); + updated = true; + } + final ShardSnapshotStatus finishedStatus = updateSnapshotState.status(); + logger.trace("Starting [{}] on [{}] with generation [{}]", finishedShardId, + finishedStatus.nodeId(), finishedStatus.generation()); + shards.put(finishedShardId, new ShardSnapshotStatus(finishedStatus.nodeId(), finishedStatus.generation())); + reusedShardIds.add(finishedShardId); + } } } } @@ -2051,25 +2182,25 @@ private static class SnapshotStateExecutor implements ClusterStateTaskExecutor 0) { logger.trace("changed cluster state triggered by {} snapshot state updates", changedCount); - return ClusterTasksResult.builder().successes(tasks) + return ClusterTasksResult.builder().successes(tasks) .build(ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, SnapshotsInProgress.of(unmodifiableList(entries))).build()); } - return ClusterTasksResult.builder().successes(tasks).build(currentState); + return ClusterTasksResult.builder().successes(tasks).build(currentState); } } /** * Updates the shard status on master node * - * @param request update shard status request + * @param update update shard status request */ - private void innerUpdateSnapshotState(final UpdateIndexShardSnapshotStatusRequest request, + private void innerUpdateSnapshotState(final ShardSnapshotUpdate update, ActionListener listener) { - logger.trace("received updated snapshot restore state [{}]", request); + logger.trace("received updated snapshot restore state [{}]", update); clusterService.submitStateUpdateTask( "update snapshot state", - request, + update, ClusterStateTaskConfig.build(Priority.NORMAL), snapshotStateExecutor, new ClusterStateTaskListener() { @@ -2085,9 +2216,9 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS } finally { // Maybe this state update completed the snapshot. If we are not already ending it because of a concurrent // state update we check if its state is completed and end it if it is. - if (endingSnapshots.contains(request.snapshot()) == false) { + if (endingSnapshots.contains(update.snapshot()) == false) { final SnapshotsInProgress snapshotsInProgress = newState.custom(SnapshotsInProgress.TYPE); - final SnapshotsInProgress.Entry updatedEntry = snapshotsInProgress.snapshot(request.snapshot()); + final SnapshotsInProgress.Entry updatedEntry = snapshotsInProgress.snapshot(update.snapshot()); // If the entry is still in the cluster state and is completed, try finalizing the snapshot in the repo if (updatedEntry != null && updatedEntry.state().completed()) { endSnapshot(updatedEntry, newState.metadata(), null); @@ -2121,7 +2252,8 @@ protected UpdateIndexShardSnapshotStatusResponse read(StreamInput in) throws IOE @Override protected void masterOperation(Task task, UpdateIndexShardSnapshotStatusRequest request, ClusterState state, ActionListener listener) { - innerUpdateSnapshotState(request, listener); + innerUpdateSnapshotState( + new ShardSnapshotUpdate(request.snapshot(), null, request.shardId(), request.status()), listener); } @Override diff --git a/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java b/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java index 6a7738c1c32e4..65f3699441383 100644 --- a/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java +++ b/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java @@ -227,6 +227,12 @@ public void executeConsistentStateUpdate(Function onFailure) { } + @Override + public void cloneShardSnapshot(SnapshotId source, SnapshotId target, IndexId index, int shardId, String shardGeneration, + ActionListener listener) { + + } + @Override public Lifecycle.State lifecycleState() { return null; diff --git a/test/framework/src/main/java/org/elasticsearch/index/shard/RestoreOnlyRepository.java b/test/framework/src/main/java/org/elasticsearch/index/shard/RestoreOnlyRepository.java index 6d1168f419b12..a321e734300fb 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/shard/RestoreOnlyRepository.java +++ b/test/framework/src/main/java/org/elasticsearch/index/shard/RestoreOnlyRepository.java @@ -157,4 +157,10 @@ public void executeConsistentStateUpdate(Function onFailure) { throw new UnsupportedOperationException("Unsupported for restore-only repository"); } + + @Override + public void cloneShardSnapshot(SnapshotId source, SnapshotId target, IndexId index, int shardId, String shardGeneration, + ActionListener listener) { + throw new AssertionError("not supported"); + } } From c1d9e9350ac8fd5d125832948ab7cc2bc0343dd6 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 31 Aug 2020 15:54:44 +0200 Subject: [PATCH 15/94] bck --- .../cluster/SnapshotsInProgress.java | 11 +++++-- .../snapshots/SnapshotsService.java | 29 +++++++++++++++---- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index e4122a2a293b2..152d158092ea1 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -225,8 +225,7 @@ public Entry withRepoGen(long newRepoGen) { userMetadata, version, source, clones); } - public Entry initiateClones(ImmutableOpenMap> updatedClones) { - assert clones.isEmpty() : "Can't initiate clones, already have running clones " + clones; + public Entry withClones(ImmutableOpenMap> updatedClones) { return new Entry(snapshot, includeGlobalState, partial, state, this.indices, dataStreams, startTime, repositoryStateId, shards, failure, userMetadata, version, source, updatedClones); } @@ -731,6 +730,14 @@ private RepoShardId(IndexId index, int shard) { this.shard = shard; } + public String indexName() { + return index.getName(); + } + + public int shardId() { + return shard; + } + @Override public int hashCode() { return Objects.hash(index, shard); diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 80510c3eaba75..0ef16d811f60a 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -455,7 +455,7 @@ public ClusterState execute(ClusterState currentState) { } clonesBuilder.put(count.v1().getName(), shardSnapshotStatuses); } - updatedEntry = cloneEntry.initiateClones(clonesBuilder.build()); + updatedEntry = cloneEntry.withClones(clonesBuilder.build()); updatedEntries.set(i, updatedEntry); changed = true; break; @@ -2114,6 +2114,10 @@ public ShardId shardId() { return shardId; } + public RepoShardId repoShardId() { + return repoShardId; + } + public Snapshot snapshot() { return snapshot; } @@ -2132,12 +2136,27 @@ private static class SnapshotStateExecutor implements ClusterStateTaskExecutor entries = new ArrayList<>(); final Map> reusedShardIdsByRepo = new HashMap<>(); for (SnapshotsInProgress.Entry entry : currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY).entries()) { - ImmutableOpenMap.Builder shards = ImmutableOpenMap.builder(); boolean updated = false; - + final ImmutableOpenMap.Builder shards = ImmutableOpenMap.builder(); + final ImmutableOpenMap.Builder> clones = ImmutableOpenMap.builder(); for (ShardSnapshotUpdate updateSnapshotState : tasks) { if (updateSnapshotState.isClone()) { - throw new AssertionError("TODO"); + final RepoShardId finishedShardId = updateSnapshotState.repoShardId(); + if (entry.snapshot().equals(updateSnapshotState.snapshot())) { + logger.trace("[{}] Updating shard clone [{}] with status [{}]", updateSnapshotState.snapshot(), + finishedShardId, updateSnapshotState.status().state()); + if (updated == false) { + clones.putAll(entry.clones()); + updated = true; + } + final List indexStatuses = new ArrayList<>(clones.get(finishedShardId.indexName())); + indexStatuses.set(finishedShardId.shardId(), updateSnapshotState.status); + changedCount++; + clones.put(finishedShardId.indexName(), indexStatuses); + } else { + throw new AssertionError("TODO"); + // TODO: start next snapshot + } } else { final ShardId finishedShardId = updateSnapshotState.shardId(); if (entry.snapshot().equals(updateSnapshotState.snapshot())) { @@ -2175,7 +2194,7 @@ private static class SnapshotStateExecutor implements ClusterStateTaskExecutor Date: Mon, 31 Aug 2020 16:09:09 +0200 Subject: [PATCH 16/94] bck --- .../elasticsearch/xpack/ccr/repository/CcrRepository.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java index 6bd8e02da356e..6382f8726c463 100644 --- a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java +++ b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java @@ -442,6 +442,12 @@ public void executeConsistentStateUpdate(Function listener) { + throw new UnsupportedOperationException("Unsupported for repository of type: " + TYPE); + } + private void updateMappings(Client leaderClient, Index leaderIndex, long leaderMappingVersion, Client followerClient, Index followerIndex) { final PlainActionFuture indexMetadataFuture = new PlainActionFuture<>(); From 214901f666a38c7f79015f894064d9b82ab087d9 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 31 Aug 2020 17:16:54 +0200 Subject: [PATCH 17/94] worksish --- .../BlobStoreIndexShardSnapshot.java | 5 +++ .../BlobStoreIndexShardSnapshots.java | 16 ++++++++ .../snapshots/blobstore/SnapshotFiles.java | 4 ++ .../repositories/Repository.java | 7 ---- .../blobstore/BlobStoreRepository.java | 6 +++ .../snapshots/SnapshotsService.java | 41 +++++++++++++------ 6 files changed, 59 insertions(+), 20 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java b/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java index c53ad31080e4d..457dbc9df6d3f 100644 --- a/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java +++ b/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java @@ -385,6 +385,11 @@ public BlobStoreIndexShardSnapshot(String snapshot, long indexVersion, List files, List updated = new ArrayList<>(shardSnapshots); + updated.add(sourceFiles.clone(target)); + return new BlobStoreIndexShardSnapshots(updated); + } + /** * Returns list of snapshots * diff --git a/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/SnapshotFiles.java b/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/SnapshotFiles.java index 039ea9405a158..2252bdda0f347 100644 --- a/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/SnapshotFiles.java +++ b/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/SnapshotFiles.java @@ -59,6 +59,10 @@ public SnapshotFiles(String snapshot, List indexFiles, @Nullable Strin this.shardStateIdentifier = shardStateIdentifier; } + public SnapshotFiles clone(String targetName) { + return new SnapshotFiles(targetName, indexFiles, shardStateIdentifier); + } + /** * Returns an identifier for the shard state that can be used to check whether a shard has changed between * snapshots or not. diff --git a/server/src/main/java/org/elasticsearch/repositories/Repository.java b/server/src/main/java/org/elasticsearch/repositories/Repository.java index 23aa16fe0dc54..a6f9065cbfa4b 100644 --- a/server/src/main/java/org/elasticsearch/repositories/Repository.java +++ b/server/src/main/java/org/elasticsearch/repositories/Repository.java @@ -37,7 +37,6 @@ import org.elasticsearch.indices.recovery.RecoveryState; import org.elasticsearch.snapshots.SnapshotId; import org.elasticsearch.snapshots.SnapshotInfo; -import org.locationtech.jts.util.AssertionFailedException; import java.io.IOException; import java.util.Collection; @@ -276,12 +275,6 @@ void executeConsistentStateUpdate(Function listener); - default void finalizeSnapshotClone(SnapshotId source, ShardGenerations shardGenerations, long repositoryStateId, - SnapshotInfo snapshotInfo, Version repositoryMetaVersion, - Function stateTransformer, ActionListener listener) { - throw new AssertionFailedException("not implemented yet"); - } - /** * Hook that allows a repository to filter the user supplied snapshot metadata in {@link SnapshotsInProgress.Entry#userMetadata()} * during snapshot initialization. diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index eb730cc71cc0e..79d3e543a5447 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -409,6 +409,12 @@ public void cloneShardSnapshot(SnapshotId source, SnapshotId target, IndexId ind throw new AssertionError("Not implemented yet"); } else { newGen = UUIDs.randomBase64UUID(); + logger.trace("[{}] [{}] writing shard snapshot file for clone", shardId, target); + INDEX_SHARD_SNAPSHOT_FORMAT.write(sourceMeta.clone(target.getName()), shardContainer, target.getUUID(), compress); + final BlobStoreIndexShardSnapshots existingSnapshots = + buildBlobStoreIndexShardSnapshots(Collections.emptySet(), shardContainer, shardGeneration).v1(); + INDEX_SHARD_SNAPSHOTS_FORMAT.write(existingSnapshots.withClone(source.getName(), target.getName()), shardContainer, newGen, + compress); } return newGen; })); diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 0ef16d811f60a..d51e4bf12fe85 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -573,17 +573,26 @@ private static ShardGenerations buildGenerations(SnapshotsInProgress.Entry snaps ShardGenerations.Builder builder = ShardGenerations.builder(); final Map indexLookup = new HashMap<>(); snapshot.indices().forEach(idx -> indexLookup.put(idx.getName(), idx)); - snapshot.shards().forEach(c -> { - if (metadata.index(c.key.getIndex()) == null) { - assert snapshot.partial() : - "Index [" + c.key.getIndex() + "] was deleted during a snapshot but snapshot was not partial."; - return; - } - final IndexId indexId = indexLookup.get(c.key.getIndexName()); - if (indexId != null) { - builder.put(indexId, c.key.id(), c.value.generation()); - } - }); + if (snapshot.source() == null) { + snapshot.shards().forEach(c -> { + if (metadata.index(c.key.getIndex()) == null) { + assert snapshot.partial() : + "Index [" + c.key.getIndex() + "] was deleted during a snapshot but snapshot was not partial."; + return; + } + final IndexId indexId = indexLookup.get(c.key.getIndexName()); + if (indexId != null) { + builder.put(indexId, c.key.id(), c.value.generation()); + } + }); + } else { + snapshot.clones().forEach(c -> { + final IndexId indexId = indexLookup.get(c.key); + for (int i = 0; i < c.value.size(); i++) { + builder.put(indexId, i, c.value.get(i).generation()); + } + }); + } return builder.build(); } @@ -1075,12 +1084,18 @@ private void finalizeSnapshotEntry(SnapshotsInProgress.Entry entry, Metadata met metadataListener.onResponse(metadata); } else { threadPool.executor(ThreadPool.Names.SNAPSHOT).execute( - ActionRunnable.supply(metadataListener, () -> repo.getSnapshotGlobalMetadata(entry.source()))); + ActionRunnable.supply(metadataListener, () -> { + final Metadata.Builder metaBuilder = Metadata.builder(repo.getSnapshotGlobalMetadata(entry.source())); + for (IndexId index : entry.indices()) { + metaBuilder.put(repo.getSnapshotIndexMetaData(repositoryData, entry.source(), index), false); + } + return metaBuilder.build(); + })); } metadataListener.whenComplete(meta -> repo.finalizeSnapshot( shardGenerations, repositoryData.getGenId(), - metadataForSnapshot(entry, metadata), + metadataForSnapshot(entry, meta), snapshotInfo, entry.version(), state -> stateWithoutSnapshot(state, snapshot), From 5b14bd42c4ebd2b63bd0bc20af8d7541407d0f94 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 31 Aug 2020 18:04:24 +0200 Subject: [PATCH 18/94] bck --- .../snapshots/CloneSnapshotIT.java | 2 ++ .../snapshots/ConcurrentSnapshotsIT.java | 25 +++++++++++++++++++ .../snapshots/SnapshotsService.java | 6 +++-- 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index 366cfe5e75f9f..c8438d2a6ae8c 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -18,8 +18,10 @@ */ package org.elasticsearch.snapshots; +import org.elasticsearch.action.ActionFuture; import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotIndexStatus; import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus; +import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.test.ESIntegTestCase; import java.util.List; diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java index 4c031b4b4c1c8..8d6a6278f3005 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java @@ -1188,6 +1188,31 @@ public void testQueuedDeleteAfterFinalizationFailure() throws Exception { assertThat(sne.getCause().getMessage(), containsString("exception after block")); } + public void testConcurrentCloneAndSnapshot() throws Exception { + internalCluster().startMasterOnlyNode(); + final String dataNode = internalCluster().startDataOnlyNode(); + final String repoName = "repo-name"; + createRepository(repoName, "mock"); + + final String indexName = "index-1"; + createIndexWithRandomDocs(indexName, randomIntBetween(5, 10)); + final String sourceSnapshot = "source-snapshot"; + createFullSnapshot(repoName, sourceSnapshot); + + indexRandomDocs(indexName, randomIntBetween(20, 100)); + + final String targetSnapshot = "target-snapshot"; + final ActionFuture snapshot2Future = + startFullSnapshotBlockedOnDataNode("snapshot-2", repoName, dataNode); + waitForBlock(dataNode, repoName, TimeValue.timeValueSeconds(30L)); + final ActionFuture cloneFuture = + client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexName).execute(); + awaitNSnapshotsInProgress(2); + unblockNode(repoName, dataNode); + assertAcked(cloneFuture.get()); + assertSuccessful(snapshot2Future); + } + private static String startDataNodeWithLargeSnapshotPool() { return internalCluster().startDataOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS); } diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index d51e4bf12fe85..2d0fde0ff88e8 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -481,6 +481,9 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS for (int i = 0; i < indexClone.value.size(); i++) { final int shardId = i; final ShardSnapshotStatus shardStatusBefore = indexClone.value.get(shardId); + if (shardStatusBefore.state() != ShardState.INIT) { + continue; + } repository.cloneShardSnapshot(sourceSnapshot, targetSnapshot, indexId, shardId, shardStatusBefore.generation(), ActionListener.wrap( generation -> innerUpdateSnapshotState( @@ -2169,8 +2172,7 @@ private static class SnapshotStateExecutor implements ClusterStateTaskExecutor Date: Mon, 31 Aug 2020 18:12:01 +0200 Subject: [PATCH 19/94] bck --- .../java/org/elasticsearch/snapshots/SnapshotsService.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 2d0fde0ff88e8..7173bf8707ad0 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -334,13 +334,9 @@ public void cloneSnapshot(CloneSnapshotRequest request, ActionListener lis final SnapshotId snapshotId = new SnapshotId(snapshotName, UUIDs.randomBase64UUID()); final Snapshot snapshot = new Snapshot(repositoryName, snapshotId); // TODO: do not allow delete of source of in-progress-clone - // TODO: handle index metadata via metadata generations only (no physical file writing) ... implement this in repo - // TODO: just copy global metadata blob? Or just make it empty with the index metadata in it? (latter might be better with DS) // TODO: Clone DS? (probably no, not relevant for searchable snapshots ...) - // TODO: SnapshotInfo build as always // TODO: shards are snapshot shard-by-shard on the master node, no need for coordination here // TODO: throw when no indices match - // TODO: what about snapshot metadata? repository.executeConsistentStateUpdate(repositoryData -> new ClusterStateUpdateTask() { private SnapshotsInProgress.Entry newEntry; From a56e8262c63c70d864f4e76ace893c7ba7044def Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 31 Aug 2020 18:13:43 +0200 Subject: [PATCH 20/94] bck --- .../main/java/org/elasticsearch/snapshots/SnapshotsService.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 7173bf8707ad0..180ff86332307 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -444,7 +444,6 @@ public ClusterState execute(ClusterState currentState) { if (busyShardsInRepo.contains(SnapshotsInProgress.repoShardId(count.v1(), shardId))) { shardSnapshotStatuses.add(ShardSnapshotStatus.UNASSIGNED_QUEUED); } else { - // TODO: proper state + generation shardSnapshotStatuses.add(new ShardSnapshotStatus(currentState.nodes().getLocalNodeId(), repositoryData.shardGenerations().getShardGen(count.v1(), shardId))); } From bdac60a9797c393092ef7ddf2b86e2b183ebf9e5 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 31 Aug 2020 19:09:33 +0200 Subject: [PATCH 21/94] bck --- .../cluster/SnapshotsInProgress.java | 42 ++++++--- .../snapshots/SnapshotsService.java | 88 ++++++++++++------- ...SnapshotsInProgressSerializationTests.java | 2 +- 3 files changed, 87 insertions(+), 45 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index 152d158092ea1..4d78b7a741d26 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -33,6 +33,7 @@ import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.Index; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.repositories.IndexId; import org.elasticsearch.repositories.RepositoryOperation; @@ -94,7 +95,7 @@ public static Entry startedEntry(Snapshot snapshot, boolean includeGlobalState, ImmutableOpenMap shards, Map userMetadata, Version version) { return new SnapshotsInProgress.Entry(snapshot, includeGlobalState, partial, - completed(shards.values()) ? State.SUCCESS : State.STARTED, + completed(shards.values(), ImmutableOpenMap.of()) ? State.SUCCESS : State.STARTED, indices, dataStreams, startTime, repositoryStateId, shards, null, userMetadata, version, null, null); } @@ -134,7 +135,7 @@ public static class Entry implements Writeable, ToXContent, RepositoryOperation @Nullable private final SnapshotId source; - private final ImmutableOpenMap> clones; + private final ImmutableOpenMap clones; @Nullable private final Map userMetadata; @Nullable private final String failure; @@ -144,7 +145,7 @@ public Entry(Snapshot snapshot, boolean includeGlobalState, boolean partial, Sta List dataStreams, long startTime, long repositoryStateId, ImmutableOpenMap shards, String failure, Map userMetadata, Version version, @Nullable SnapshotId source, - @Nullable ImmutableOpenMap> clones) { + @Nullable ImmutableOpenMap clones) { this.state = state; this.snapshot = snapshot; this.includeGlobalState = includeGlobalState; @@ -186,7 +187,7 @@ private Entry(StreamInput in) throws IOException { } if (in.getVersion().onOrAfter(SnapshotsService.CLONE_SNAPSHOT_VERSION)) { source = in.readOptionalWriteable(SnapshotId::new); - clones = in.readImmutableMap(StreamInput::readString, inpt -> inpt.readList(ShardSnapshotStatus::new)); + clones = in.readImmutableMap(RepoShardId::new, ShardSnapshotStatus::new); } else { source = null; clones = ImmutableOpenMap.of(); @@ -208,7 +209,7 @@ private static boolean assertShardsConsistent(SnapshotId source, State state, Li assert source == null || indexNames.isEmpty() == false : "No empty snapshot clones allowed"; assert source != null || indexNames.equals(indexNamesInShards) : "Indices in shards " + indexNamesInShards + " differ from expected indices " + indexNames + " for state [" + state + "]"; - final boolean shardsCompleted = completed(shards.values()); + final boolean shardsCompleted = completed(shards.values(), ImmutableOpenMap.of()); if (source == null) { assert (state.completed() && shardsCompleted) || (state.completed() == false && shardsCompleted == false) : "Completed state must imply all shards completed but saw state [" + state + "] and shards " + shards; @@ -225,7 +226,7 @@ public Entry withRepoGen(long newRepoGen) { userMetadata, version, source, clones); } - public Entry withClones(ImmutableOpenMap> updatedClones) { + public Entry withClones(ImmutableOpenMap updatedClones) { return new Entry(snapshot, includeGlobalState, partial, state, this.indices, dataStreams, startTime, repositoryStateId, shards, failure, userMetadata, version, source, updatedClones); } @@ -268,7 +269,7 @@ public Entry fail(ImmutableOpenMap shards, State s * @return new snapshot entry */ public Entry withShardStates(ImmutableOpenMap shards) { - if (completed(shards.values())) { + if (completed(shards.values(), clones)) { return new Entry(snapshot, includeGlobalState, partial, State.SUCCESS, indices, dataStreams, startTime, repositoryStateId, shards, failure, userMetadata, version, source, clones); } @@ -282,7 +283,7 @@ public Entry withShardStates(ImmutableOpenMap shar public Entry withStartedShards(ImmutableOpenMap shards) { final SnapshotsInProgress.Entry updated = new Entry(snapshot, includeGlobalState, partial, state, indices, dataStreams, startTime, repositoryStateId, shards, failure, userMetadata, version, source, clones); - assert updated.state().completed() == false && completed(updated.shards().values()) == false + assert updated.state().completed() == false && completed(updated.shards().values(), clones) == false : "Only running snapshots allowed but saw [" + updated + "]"; return updated; } @@ -349,7 +350,7 @@ public SnapshotId source() { return source; } - public ImmutableOpenMap> clones() { + public ImmutableOpenMap clones() { return clones; } @@ -449,7 +450,7 @@ public void writeTo(StreamOutput out) throws IOException { } if (out.getVersion().onOrAfter(SnapshotsService.CLONE_SNAPSHOT_VERSION)) { out.writeOptionalWriteable(source); - out.writeMap(clones, StreamOutput::writeString, StreamOutput::writeList); + out.writeMap(clones); } } @@ -463,14 +464,21 @@ public boolean isFragment() { * Checks if all shards in the list have completed * * @param shards list of shard statuses + * @param clones shard clone statuses * @return true if all shards have completed (either successfully or failed), false otherwise */ - public static boolean completed(ObjectContainer shards) { + public static boolean completed(ObjectContainer shards, + ImmutableOpenMap clones) { for (ObjectCursor status : shards) { if (status.value.state().completed == false) { return false; } } + for (ObjectCursor status : clones.values()) { + if (status.value.state().completed == false) { + return false; + } + } return true; } @@ -719,7 +727,7 @@ public static RepoShardId repoShardId(IndexId indexId, int shard) { return new RepoShardId(indexId, shard); } - public static final class RepoShardId { + public static final class RepoShardId implements Writeable { private final IndexId index; @@ -730,6 +738,10 @@ private RepoShardId(IndexId index, int shard) { this.shard = shard; } + private RepoShardId(StreamInput in) throws IOException { + this(new IndexId(in), in.readVInt()); + } + public String indexName() { return index.getName(); } @@ -754,6 +766,12 @@ public boolean equals(Object obj) { final RepoShardId that = (RepoShardId) obj; return that.index.equals(index) && that.shard == shard; } + + @Override + public void writeTo(StreamOutput out) throws IOException { + index.writeTo(out); + out.writeVInt(shard); + } } public enum ShardState implements Writeable { diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 180ff86332307..9afb2aef59ef0 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -430,25 +430,25 @@ public ClusterState execute(ClusterState currentState) { boolean changed = false; for (int i = 0; i < updatedEntries.size(); i++) { if (cloneEntry.equals(updatedEntries.get(i))) { - final ImmutableOpenMap.Builder> clonesBuilder = + final ImmutableOpenMap.Builder clonesBuilder = ImmutableOpenMap.builder(); - final Set busyShards = - busyShardsForRepo(repository.getMetadata().name(), snapshotsInProgress); + final Set busyShards = busyShardsForRepo( + repository.getMetadata().name(), snapshotsInProgress, currentState.metadata()); final Set busyShardsInRepo = busyShards.stream().map(shardId -> SnapshotsInProgress.repoShardId( repositoryData.resolveIndexId(shardId.getIndexName()), shardId.getId())) .collect(Collectors.toSet()); for (Tuple count : counts) { - final List shardSnapshotStatuses = new ArrayList<>(count.v2()); for (int shardId = 0; shardId < count.v2(); shardId++) { - if (busyShardsInRepo.contains(SnapshotsInProgress.repoShardId(count.v1(), shardId))) { - shardSnapshotStatuses.add(ShardSnapshotStatus.UNASSIGNED_QUEUED); + final RepoShardId repoShardId = SnapshotsInProgress.repoShardId(count.v1(), shardId); + if (busyShardsInRepo.contains(repoShardId)) { + clonesBuilder.put(repoShardId, ShardSnapshotStatus.UNASSIGNED_QUEUED); } else { - shardSnapshotStatuses.add(new ShardSnapshotStatus(currentState.nodes().getLocalNodeId(), - repositoryData.shardGenerations().getShardGen(count.v1(), shardId))); + clonesBuilder.put(repoShardId, + new ShardSnapshotStatus(currentState.nodes().getLocalNodeId(), + repositoryData.shardGenerations().getShardGen(count.v1(), shardId))); } } - clonesBuilder.put(count.v1().getName(), shardSnapshotStatuses); } updatedEntry = cloneEntry.withClones(clonesBuilder.build()); updatedEntries.set(i, updatedEntry); @@ -471,19 +471,17 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS if (updatedEntry != null) { final SnapshotId sourceSnapshot = updatedEntry.source(); final SnapshotId targetSnapshot = updatedEntry.snapshot().getSnapshotId(); - for (ObjectObjectCursor> indexClone : updatedEntry.clones()) { - final IndexId indexId = repositoryData.resolveIndexId(indexClone.key); - for (int i = 0; i < indexClone.value.size(); i++) { - final int shardId = i; - final ShardSnapshotStatus shardStatusBefore = indexClone.value.get(shardId); + for (ObjectObjectCursor indexClone : updatedEntry.clones()) { + final IndexId indexId = repositoryData.resolveIndexId(indexClone.key.indexName()); + final ShardSnapshotStatus shardStatusBefore = indexClone.value; if (shardStatusBefore.state() != ShardState.INIT) { continue; } - repository.cloneShardSnapshot(sourceSnapshot, targetSnapshot, indexId, shardId, + repository.cloneShardSnapshot(sourceSnapshot, targetSnapshot, indexId, indexClone.key.shardId(), shardStatusBefore.generation(), ActionListener.wrap( generation -> innerUpdateSnapshotState( new ShardSnapshotUpdate(updatedEntry.snapshot(), - SnapshotsInProgress.repoShardId(indexId, shardId), null, + indexClone.key, null, new ShardSnapshotStatus(clusterService.localNode().getId(), ShardState.SUCCESS, generation)), ActionListener.wrap( @@ -498,7 +496,6 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS //TODO: Error handling throw new AssertionError(e); })); - } } } else { // TODO: this is broken, we should error somehow maybe @@ -585,10 +582,8 @@ private static ShardGenerations buildGenerations(SnapshotsInProgress.Entry snaps }); } else { snapshot.clones().forEach(c -> { - final IndexId indexId = indexLookup.get(c.key); - for (int i = 0; i < c.value.size(); i++) { - builder.put(indexId, i, c.value.get(i).generation()); - } + final IndexId indexId = indexLookup.get(c.key.indexName()); + builder.put(indexId, c.key.shardId(), c.value.generation()); }); } return builder.build(); @@ -824,7 +819,7 @@ public ClusterState execute(ClusterState currentState) { changed = true; logger.debug("[{}] was found in dangling INIT or ABORTED state", snapshot); } else { - if (snapshot.state().completed() || completed(snapshot.shards().values())) { + if (snapshot.state().completed() || completed(snapshot.shards().values(), snapshot.clones())) { finishedSnapshots.add(snapshot); } updatedSnapshotEntries.add(snapshot); @@ -1936,7 +1931,7 @@ private static ImmutableOpenMap builder = ImmutableOpenMap.builder(); final ShardGenerations shardGenerations = repositoryData.shardGenerations(); - final Set inProgressShards = busyShardsForRepo(repoName, snapshotsInProgress); + final Set inProgressShards = busyShardsForRepo(repoName, snapshotsInProgress, metadata); final boolean readyToExecute = deletionsInProgress == null || deletionsInProgress.getEntries().stream() .noneMatch(entry -> entry.repository().equals(repoName) && entry.state() == SnapshotDeletionsInProgress.State.STARTED); for (IndexId index : indices) { @@ -1997,16 +1992,27 @@ private static ImmutableOpenMap busyShardsForRepo(String repoName, @Nullable SnapshotsInProgress snapshots) { + private static Set busyShardsForRepo(String repoName, @Nullable SnapshotsInProgress snapshots, Metadata metadata) { final List runningSnapshots = snapshots == null ? List.of() : snapshots.entries(); final Set inProgressShards = new HashSet<>(); for (SnapshotsInProgress.Entry runningSnapshot : runningSnapshots) { if (runningSnapshot.repository().equals(repoName) == false) { continue; } - for (ObjectObjectCursor shard : runningSnapshot.shards()) { - if (shard.value.isActive()) { - inProgressShards.add(shard.key); + if (runningSnapshot.source() == null) { + for (ObjectObjectCursor shard : runningSnapshot.shards()) { + if (shard.value.isActive()) { + inProgressShards.add(shard.key); + } + } + } else { + for (ObjectObjectCursor clone : runningSnapshot.clones()) { + final ShardSnapshotStatus shardState = clone.value; + final IndexMetadata index = metadata.index(clone.key.indexName()); + if (shardState.isActive()) { + // TODO: there is some weirdness here on index delete that needs to be accounted for + inProgressShards.add(new ShardId(index.getIndex(), clone.key.shardId())); + } } } } @@ -2148,10 +2154,11 @@ private static class SnapshotStateExecutor implements ClusterStateTaskExecutor entries = new ArrayList<>(); final Map> reusedShardIdsByRepo = new HashMap<>(); + final Map> reusedRepoShardIdsByRepo = new HashMap<>(); for (SnapshotsInProgress.Entry entry : currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY).entries()) { boolean updated = false; final ImmutableOpenMap.Builder shards = ImmutableOpenMap.builder(); - final ImmutableOpenMap.Builder> clones = ImmutableOpenMap.builder(); + final ImmutableOpenMap.Builder clones = ImmutableOpenMap.builder(); for (ShardSnapshotUpdate updateSnapshotState : tasks) { if (updateSnapshotState.isClone()) { final RepoShardId finishedShardId = updateSnapshotState.repoShardId(); @@ -2162,12 +2169,29 @@ private static class SnapshotStateExecutor implements ClusterStateTaskExecutor indexStatuses = new ArrayList<>(clones.get(finishedShardId.indexName())); - indexStatuses.set(finishedShardId.shardId(), updateSnapshotState.status); changedCount++; - clones.put(finishedShardId.indexName(), indexStatuses); + clones.put(finishedShardId, updateSnapshotState.status); } else { - // TODO: update on changes + final String updatedRepository = updateSnapshotState.snapshot().getRepository(); + final Set reusedShardIds = + reusedRepoShardIdsByRepo.computeIfAbsent(updatedRepository, k -> new HashSet<>()); + if (entry.repository().equals(updatedRepository) && + entry.state().completed() == false && reusedShardIds.contains(finishedShardId) == false + && entry.clones().keys().contains(finishedShardId)) { + final ShardSnapshotStatus existingStatus = entry.clones().get(finishedShardId); + if (existingStatus.state() != ShardState.QUEUED) { + continue; + } + if (updated == false) { + shards.putAll(entry.shards()); + updated = true; + } + final ShardSnapshotStatus finishedStatus = updateSnapshotState.status(); + logger.trace("Starting [{}] on [{}] with generation [{}]", finishedShardId, + finishedStatus.nodeId(), finishedStatus.generation()); + clones.put(finishedShardId, new ShardSnapshotStatus(finishedStatus.nodeId(), finishedStatus.generation())); + reusedShardIds.add(finishedShardId); + } } } else { final ShardId finishedShardId = updateSnapshotState.shardId(); diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotsInProgressSerializationTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotsInProgressSerializationTests.java index 9ad7756fa440f..bd1f9fc67d7b2 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotsInProgressSerializationTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotsInProgressSerializationTests.java @@ -139,7 +139,7 @@ protected Custom mutateInstance(Custom instance) { } public static State randomState(ImmutableOpenMap shards) { - return SnapshotsInProgress.completed(shards.values()) + return SnapshotsInProgress.completed(shards.values(), ImmutableOpenMap.of()) ? randomFrom(State.SUCCESS, State.FAILED) : randomFrom(State.STARTED, State.INIT, State.ABORTED); } } From 82808e560f1e0a84ebeaaca654027eae7a2608de Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 31 Aug 2020 20:15:37 +0200 Subject: [PATCH 22/94] better --- .../org/elasticsearch/cluster/SnapshotsInProgress.java | 5 +++-- .../org/elasticsearch/snapshots/SnapshotsService.java | 10 ++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index 4d78b7a741d26..d707a9cf51b91 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -227,8 +227,9 @@ public Entry withRepoGen(long newRepoGen) { } public Entry withClones(ImmutableOpenMap updatedClones) { - return new Entry(snapshot, includeGlobalState, partial, state, this.indices, dataStreams, startTime, repositoryStateId, shards, - failure, userMetadata, version, source, updatedClones); + return new Entry(snapshot, includeGlobalState, partial, + completed(shards.values(), updatedClones) ? State.SUCCESS : state, + indices, dataStreams, startTime, repositoryStateId, shards, failure, userMetadata, version, source, updatedClones); } /** diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 9afb2aef59ef0..1449bf68fadfc 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -469,19 +469,21 @@ public void onFailure(String source, Exception e) { @Override public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { if (updatedEntry != null) { + final Snapshot target = updatedEntry.snapshot(); final SnapshotId sourceSnapshot = updatedEntry.source(); - final SnapshotId targetSnapshot = updatedEntry.snapshot().getSnapshotId(); + final SnapshotId targetSnapshot = target.getSnapshotId(); for (ObjectObjectCursor indexClone : updatedEntry.clones()) { final IndexId indexId = repositoryData.resolveIndexId(indexClone.key.indexName()); final ShardSnapshotStatus shardStatusBefore = indexClone.value; if (shardStatusBefore.state() != ShardState.INIT) { continue; } - repository.cloneShardSnapshot(sourceSnapshot, targetSnapshot, indexId, indexClone.key.shardId(), + final RepoShardId repoShardId = indexClone.key; + repository.cloneShardSnapshot(sourceSnapshot, targetSnapshot, indexId, repoShardId.shardId(), shardStatusBefore.generation(), ActionListener.wrap( generation -> innerUpdateSnapshotState( - new ShardSnapshotUpdate(updatedEntry.snapshot(), - indexClone.key, null, + new ShardSnapshotUpdate(target, + repoShardId, null, new ShardSnapshotStatus(clusterService.localNode().getId(), ShardState.SUCCESS, generation)), ActionListener.wrap( From 95638b21548a038ef7206d58e81d483500956047 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 31 Aug 2020 20:58:45 +0200 Subject: [PATCH 23/94] bck --- .../snapshots/CloneSnapshotIT.java | 2 - .../cluster/SnapshotsInProgress.java | 5 +- .../blobstore/BlobStoreRepository.java | 7 +- .../snapshots/SnapshotsService.java | 192 +++++++++++------- 4 files changed, 127 insertions(+), 79 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index c8438d2a6ae8c..366cfe5e75f9f 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -18,10 +18,8 @@ */ package org.elasticsearch.snapshots; -import org.elasticsearch.action.ActionFuture; import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotIndexStatus; import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus; -import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.test.ESIntegTestCase; import java.util.List; diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index d707a9cf51b91..1d6c326398a35 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -33,7 +33,6 @@ import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.index.Index; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.repositories.IndexId; import org.elasticsearch.repositories.RepositoryOperation; @@ -743,6 +742,10 @@ private RepoShardId(StreamInput in) throws IOException { this(new IndexId(in), in.readVInt()); } + public IndexId index() { + return index; + } + public String indexName() { return index.getName(); } diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index 79d3e543a5447..a0c01750cb1bd 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -403,6 +403,11 @@ public void cloneShardSnapshot(SnapshotId source, SnapshotId target, IndexId ind final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); executor.execute(ActionRunnable.supply(listener, () -> { final BlobContainer shardContainer = shardContainer(index, shardId); + final BlobStoreIndexShardSnapshots existingSnapshots = + buildBlobStoreIndexShardSnapshots(Collections.emptySet(), shardContainer, shardGeneration).v1(); + if (existingSnapshots.snapshots().stream().anyMatch(snapshotFiles -> snapshotFiles.snapshot().equals(target.getName()))) { + return shardGeneration; + } final BlobStoreIndexShardSnapshot sourceMeta = loadShardSnapshot(shardContainer, source); final String newGen; if (shardGeneration == null) { @@ -411,8 +416,6 @@ public void cloneShardSnapshot(SnapshotId source, SnapshotId target, IndexId ind newGen = UUIDs.randomBase64UUID(); logger.trace("[{}] [{}] writing shard snapshot file for clone", shardId, target); INDEX_SHARD_SNAPSHOT_FORMAT.write(sourceMeta.clone(target.getName()), shardContainer, target.getUUID(), compress); - final BlobStoreIndexShardSnapshots existingSnapshots = - buildBlobStoreIndexShardSnapshots(Collections.emptySet(), shardContainer, shardGeneration).v1(); INDEX_SHARD_SNAPSHOTS_FORMAT.write(existingSnapshots.withClone(source.getName(), target.getName()), shardContainer, newGen, compress); } diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 1449bf68fadfc..281a55474bcea 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -146,7 +146,7 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus private final ThreadPool threadPool; private final Map>>> snapshotCompletionListeners = - new ConcurrentHashMap<>(); + new ConcurrentHashMap<>(); /** * Listeners for snapshot deletion keyed by delete uuid as returned from {@link SnapshotDeletionsInProgress.Entry#uuid()} @@ -172,7 +172,7 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus * {@link SnapshotsInProgress#entries()} and {@link SnapshotDeletionsInProgress#getEntries()}. */ public static final Setting MAX_CONCURRENT_SNAPSHOT_OPERATIONS_SETTING = - Setting.intSetting("snapshot.max_concurrent_operations", 1000, 1, Setting.Property.NodeScope, Setting.Property.Dynamic); + Setting.intSetting("snapshot.max_concurrent_operations", 1000, 1, Setting.Property.NodeScope, Setting.Property.Dynamic); private volatile int maxConcurrentOperations; @@ -192,7 +192,7 @@ public SnapshotsService(Settings settings, ClusterService clusterService, IndexN clusterService.addLowPriorityApplier(this); maxConcurrentOperations = MAX_CONCURRENT_SNAPSHOT_OPERATIONS_SETTING.get(settings); clusterService.getClusterSettings().addSettingsUpdateConsumer(MAX_CONCURRENT_SNAPSHOT_OPERATIONS_SETTING, - i -> maxConcurrentOperations = i); + i -> maxConcurrentOperations = i); } } @@ -200,12 +200,12 @@ public SnapshotsService(Settings settings, ClusterService clusterService, IndexN * Same as {@link #createSnapshot(CreateSnapshotRequest, ActionListener)} but invokes its callback on completion of * the snapshot. * - * @param request snapshot request + * @param request snapshot request * @param listener snapshot completion listener */ public void executeSnapshot(final CreateSnapshotRequest request, final ActionListener listener) { createSnapshot(request, - ActionListener.wrap(snapshot -> addListener(snapshot, ActionListener.map(listener, Tuple::v2)), listener::onFailure)); + ActionListener.wrap(snapshot -> addListener(snapshot, ActionListener.map(listener, Tuple::v2)), listener::onFailure)); } /** @@ -257,7 +257,7 @@ public ClusterState execute(ClusterState currentState) { currentState.custom(RepositoryCleanupInProgress.TYPE, RepositoryCleanupInProgress.EMPTY); if (repositoryCleanupInProgress.hasCleanupInProgress()) { throw new ConcurrentSnapshotExecutionException(repositoryName, snapshotName, - "cannot snapshot while a repository cleanup is in-progress in [" + repositoryCleanupInProgress + "]"); + "cannot snapshot while a repository cleanup is in-progress in [" + repositoryCleanupInProgress + "]"); } ensureBelowConcurrencyLimit(repositoryName, snapshotName, snapshots, deletionsInProgress); // Store newSnapshot here to be processed in clusterStateProcessed @@ -274,7 +274,7 @@ public ClusterState execute(ClusterState currentState) { .collect(Collectors.toMap(IndexId::getName, Function.identity()))); final Version version = minCompatibleVersion(currentState.nodes().getMinNodeVersion(), repositoryData, null); ImmutableOpenMap shards = shards(snapshots, deletionsInProgress, currentState.metadata(), - currentState.routingTable(), indexIds, useShardGenerations(version), repositoryData, repositoryName); + currentState.routingTable(), indexIds, useShardGenerations(version), repositoryData, repositoryName); if (request.partial() == false) { Set missing = new HashSet<>(); for (ObjectObjectCursor entry : shards) { @@ -409,9 +409,9 @@ public TimeValue timeout() { /** * Determine the number of shards in each index of a clone operation and update the cluster state accordingly. * - * @param repository repository to run operation on - * @param repositoryData repository data at the time the clone operation was started - * @param cloneEntry clone operation in the cluster state + * @param repository repository to run operation on + * @param repositoryData repository data at the time the clone operation was started + * @param cloneEntry clone operation in the cluster state */ private void startCloning(Repository repository, RepositoryData repositoryData, SnapshotsInProgress.Entry cloneEntry) { final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); @@ -471,33 +471,14 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS if (updatedEntry != null) { final Snapshot target = updatedEntry.snapshot(); final SnapshotId sourceSnapshot = updatedEntry.source(); - final SnapshotId targetSnapshot = target.getSnapshotId(); for (ObjectObjectCursor indexClone : updatedEntry.clones()) { final IndexId indexId = repositoryData.resolveIndexId(indexClone.key.indexName()); - final ShardSnapshotStatus shardStatusBefore = indexClone.value; - if (shardStatusBefore.state() != ShardState.INIT) { - continue; - } - final RepoShardId repoShardId = indexClone.key; - repository.cloneShardSnapshot(sourceSnapshot, targetSnapshot, indexId, repoShardId.shardId(), - shardStatusBefore.generation(), ActionListener.wrap( - generation -> innerUpdateSnapshotState( - new ShardSnapshotUpdate(target, - repoShardId, null, - new ShardSnapshotStatus(clusterService.localNode().getId(), - ShardState.SUCCESS, generation)), - ActionListener.wrap( - v -> { - - }, e -> { - //TODO: Error handling - throw new AssertionError(e); - } - - )), e -> { - //TODO: Error handling - throw new AssertionError(e); - })); + final ShardSnapshotStatus shardStatusBefore = indexClone.value; + if (shardStatusBefore.state() != ShardState.INIT) { + continue; + } + final RepoShardId repoShardId = indexClone.key; + runReadyClone(target, sourceSnapshot, shardStatusBefore, repoShardId, repository); } } else { // TODO: this is broken, we should error somehow maybe @@ -514,6 +495,31 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS } } + private void runReadyClone(Snapshot target, SnapshotId sourceSnapshot, + ShardSnapshotStatus shardStatusBefore, RepoShardId repoShardId, Repository repository) { + SnapshotId targetSnapshot = target.getSnapshotId(); + repository.cloneShardSnapshot(sourceSnapshot, targetSnapshot, repoShardId.index(), repoShardId.shardId(), + shardStatusBefore.generation(), ActionListener.wrap( + generation -> innerUpdateSnapshotState( + new ShardSnapshotUpdate(target, + repoShardId, null, + new ShardSnapshotStatus(clusterService.localNode().getId(), + ShardState.SUCCESS, generation)), + ActionListener.wrap( + v -> logger.trace( + "Marked [{}] as successfully cloned from [{}] to [{}]", + repoShardId, sourceSnapshot, targetSnapshot), + e -> { + //TODO: Error handling + throw new AssertionError(e); + } + + )), e -> { + //TODO: Error handling + throw new AssertionError(e); + })); + } + private void ensureBelowConcurrencyLimit(String repository, String name, SnapshotsInProgress snapshotsInProgress, SnapshotDeletionsInProgress deletionsInProgress) { final int inProgressOperations = snapshotsInProgress.entries().size() + deletionsInProgress.getEntries().size(); @@ -529,8 +535,8 @@ private void ensureBelowConcurrencyLimit(String repository, String name, Snapsho * Validates snapshot request * * @param repositoryName repository name - * @param snapshotName snapshot name - * @param state current cluster state + * @param snapshotName snapshot name + * @param state current cluster state */ private static void validate(String repositoryName, String snapshotName, ClusterState state) { RepositoriesMetadata repositoriesMetadata = state.getMetadata().custom(RepositoriesMetadata.TYPE); @@ -1125,7 +1131,7 @@ private void handleFinalizationFailure(Exception e, SnapshotsInProgress.Entry en // Failure due to not being master any more, don't try to remove snapshot from cluster state the next master // will try ending this snapshot again logger.debug(() -> new ParameterizedMessage( - "[{}] failed to update cluster state during snapshot finalization", snapshot), e); + "[{}] failed to update cluster state during snapshot finalization", snapshot), e); failSnapshotCompletionListeners(snapshot, new SnapshotException(snapshot, "Failed to update cluster state during snapshot finalization", e)); failAllListenersOnMasterFailOver(e); @@ -1319,7 +1325,7 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS * @param deletions snapshot deletions to update * @param snapshotIds snapshot ids to remove * @param repository repository that the snapshot ids belong to - * @return updated {@link SnapshotDeletionsInProgress} or {@code null} if unchanged + * @return updated {@link SnapshotDeletionsInProgress} or {@code null} if unchanged */ @Nullable private static SnapshotDeletionsInProgress deletionsWithoutSnapshots(SnapshotDeletionsInProgress deletions, @@ -1592,7 +1598,8 @@ public static boolean useIndexGenerations(Version repositoryMetaVersion) { return repositoryMetaVersion.onOrAfter(INDEX_GEN_IN_REPO_DATA_VERSION); } - /** Deletes snapshot from repository + /** + * Deletes snapshot from repository * * @param deleteEntry delete entry in cluster state * @param minNodeVersion minimum node version in the cluster @@ -1616,7 +1623,8 @@ public void onFailure(Exception e) { }); } - /** Deletes snapshot from repository + /** + * Deletes snapshot from repository * * @param deleteEntry delete entry in cluster state * @param repositoryData the {@link RepositoryData} of the repository to delete from @@ -1935,7 +1943,7 @@ private static ImmutableOpenMap inProgressShards = busyShardsForRepo(repoName, snapshotsInProgress, metadata); final boolean readyToExecute = deletionsInProgress == null || deletionsInProgress.getEntries().stream() - .noneMatch(entry -> entry.repository().equals(repoName) && entry.state() == SnapshotDeletionsInProgress.State.STARTED); + .noneMatch(entry -> entry.repository().equals(repoName) && entry.state() == SnapshotDeletionsInProgress.State.STARTED); for (IndexId index : indices) { final String indexName = index.getName(); final boolean isNewIndex = repositoryData.getIndices().containsKey(indexName) == false; @@ -1943,7 +1951,7 @@ private static ImmutableOpenMap busyShardsForRepo(String repoName, @Nullable SnapshotsInProgress snapshots, Metadata metadata) { @@ -2095,15 +2103,15 @@ protected void doClose() { public boolean assertAllListenersResolved() { final DiscoveryNode localNode = clusterService.localNode(); assert endingSnapshots.isEmpty() : "Found leaked ending snapshots " + endingSnapshots - + " on [" + localNode + "]"; + + " on [" + localNode + "]"; assert snapshotCompletionListeners.isEmpty() : "Found leaked snapshot completion listeners " + snapshotCompletionListeners - + " on [" + localNode + "]"; + + " on [" + localNode + "]"; assert currentlyFinalizing.isEmpty() : "Found leaked finalizations " + currentlyFinalizing - + " on [" + localNode + "]"; + + " on [" + localNode + "]"; assert snapshotDeletionListeners.isEmpty() : "Found leaked snapshot delete listeners " + snapshotDeletionListeners - + " on [" + localNode + "]"; + + " on [" + localNode + "]"; assert repositoryOperations.isEmpty() : "Found leaked snapshots to finalize " + repositoryOperations - + " on [" + localNode + "]"; + + " on [" + localNode + "]"; return true; } @@ -2151,8 +2159,7 @@ public ShardSnapshotStatus status() { private static class SnapshotStateExecutor implements ClusterStateTaskExecutor { @Override - public ClusterTasksResult - execute(ClusterState currentState, List tasks) { + public ClusterTasksResult execute(ClusterState currentState, List tasks) { int changedCount = 0; final List entries = new ArrayList<>(); final Map> reusedShardIdsByRepo = new HashMap<>(); @@ -2211,21 +2218,48 @@ private static class SnapshotStateExecutor implements ClusterStateTaskExecutor reusedShardIds = reusedShardIdsByRepo.computeIfAbsent(updatedRepository, k -> new HashSet<>()); if (entry.repository().equals(updatedRepository) && - entry.state().completed() == false && reusedShardIds.contains(finishedShardId) == false - && entry.shards().keys().contains(finishedShardId)) { - final ShardSnapshotStatus existingStatus = entry.shards().get(finishedShardId); - if (existingStatus.state() != ShardState.QUEUED) { - continue; - } - if (updated == false) { - shards.putAll(entry.shards()); - updated = true; + entry.state().completed() == false && reusedShardIds.contains(finishedShardId) == false) { + if (entry.source() == null) { + if (entry.shards().keys().contains(finishedShardId)) { + final ShardSnapshotStatus existingStatus = entry.shards().get(finishedShardId); + if (existingStatus.state() != ShardState.QUEUED) { + continue; + } + if (updated == false) { + shards.putAll(entry.shards()); + updated = true; + } + final ShardSnapshotStatus finishedStatus = updateSnapshotState.status(); + logger.trace("Starting [{}] on [{}] with generation [{}]", finishedShardId, + finishedStatus.nodeId(), finishedStatus.generation()); + shards.put(finishedShardId, new ShardSnapshotStatus(finishedStatus.nodeId(), finishedStatus.generation())); + reusedShardIds.add(finishedShardId); + } + } else { + // TODO: horribly inefficient obv. + final Map indicesLookup = + entry.indices().stream().collect(Collectors.toMap(IndexId::getName, Function.identity())); + if (indicesLookup.containsKey(finishedShardId.getIndexName())) { + final RepoShardId repoShardId = SnapshotsInProgress.repoShardId( + indicesLookup.get(finishedShardId.getIndexName()), finishedShardId.getId()); + if (entry.clones().containsKey(repoShardId)) { + final ShardSnapshotStatus existingStatus = entry.clones().get(repoShardId); + if (existingStatus.state() != ShardState.QUEUED) { + continue; + } + if (updated == false) { + shards.putAll(entry.shards()); + updated = true; + } + final ShardSnapshotStatus finishedStatus = updateSnapshotState.status(); + logger.trace("Starting [{}] on [{}] with generation [{}]", finishedShardId, + finishedStatus.nodeId(), finishedStatus.generation()); + clones.put(repoShardId, new ShardSnapshotStatus( + currentState.nodes().getLocalNodeId(), finishedStatus.generation())); + reusedShardIds.add(finishedShardId); + } + } } - final ShardSnapshotStatus finishedStatus = updateSnapshotState.status(); - logger.trace("Starting [{}] on [{}] with generation [{}]", finishedShardId, - finishedStatus.nodeId(), finishedStatus.generation()); - shards.put(finishedShardId, new ShardSnapshotStatus(finishedStatus.nodeId(), finishedStatus.generation())); - reusedShardIds.add(finishedShardId); } } } @@ -2277,8 +2311,18 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS final SnapshotsInProgress snapshotsInProgress = newState.custom(SnapshotsInProgress.TYPE); final SnapshotsInProgress.Entry updatedEntry = snapshotsInProgress.snapshot(update.snapshot()); // If the entry is still in the cluster state and is completed, try finalizing the snapshot in the repo - if (updatedEntry != null && updatedEntry.state().completed()) { - endSnapshot(updatedEntry, newState.metadata(), null); + if (updatedEntry != null) { + if (updatedEntry.state().completed()) { + endSnapshot(updatedEntry, newState.metadata(), null); + } else if (updatedEntry.source() != null) { + // this is a clone, see if new work is ready + for (ObjectObjectCursor clone : updatedEntry.clones()) { + if (clone.value.state() == ShardState.INIT) { + runReadyClone(updatedEntry.snapshot(), updatedEntry.source(), clone.value, clone.key, + repositoriesService.repository(updatedEntry.repository())); + } + } + } } } } From 862e8ca5bab7eaec7aec64fbd1f6fd7ff76dc34f Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 31 Aug 2020 23:17:59 +0200 Subject: [PATCH 24/94] green --- .../cluster/SnapshotsInProgress.java | 3 + .../blobstore/BlobStoreRepository.java | 3 - .../snapshots/SnapshotsService.java | 77 +++++++++++-------- 3 files changed, 47 insertions(+), 36 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index 1d6c326398a35..ef769220def24 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -226,6 +226,9 @@ public Entry withRepoGen(long newRepoGen) { } public Entry withClones(ImmutableOpenMap updatedClones) { + if (updatedClones.equals(clones)) { + return this; + } return new Entry(snapshot, includeGlobalState, partial, completed(shards.values(), updatedClones) ? State.SUCCESS : state, indices, dataStreams, startTime, repositoryStateId, shards, failure, userMetadata, version, source, updatedClones); diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index a0c01750cb1bd..4a1110bb30aa9 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -405,9 +405,6 @@ public void cloneShardSnapshot(SnapshotId source, SnapshotId target, IndexId ind final BlobContainer shardContainer = shardContainer(index, shardId); final BlobStoreIndexShardSnapshots existingSnapshots = buildBlobStoreIndexShardSnapshots(Collections.emptySet(), shardContainer, shardGeneration).v1(); - if (existingSnapshots.snapshots().stream().anyMatch(snapshotFiles -> snapshotFiles.snapshot().equals(target.getName()))) { - return shardGeneration; - } final BlobStoreIndexShardSnapshot sourceMeta = loadShardSnapshot(shardContainer, source); final String newGen; if (shardGeneration == null) { diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 281a55474bcea..ffcafcf6a4165 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -495,29 +495,38 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS } } + private final Set currentlyCloning = Collections.synchronizedSet(new HashSet<>()); + private void runReadyClone(Snapshot target, SnapshotId sourceSnapshot, ShardSnapshotStatus shardStatusBefore, RepoShardId repoShardId, Repository repository) { SnapshotId targetSnapshot = target.getSnapshotId(); - repository.cloneShardSnapshot(sourceSnapshot, targetSnapshot, repoShardId.index(), repoShardId.shardId(), - shardStatusBefore.generation(), ActionListener.wrap( - generation -> innerUpdateSnapshotState( - new ShardSnapshotUpdate(target, - repoShardId, null, - new ShardSnapshotStatus(clusterService.localNode().getId(), - ShardState.SUCCESS, generation)), - ActionListener.wrap( - v -> logger.trace( - "Marked [{}] as successfully cloned from [{}] to [{}]", - repoShardId, sourceSnapshot, targetSnapshot), - e -> { - //TODO: Error handling - throw new AssertionError(e); - } + if (currentlyCloning.add(repoShardId)) { + repository.cloneShardSnapshot(sourceSnapshot, targetSnapshot, repoShardId.index(), repoShardId.shardId(), + shardStatusBefore.generation(), ActionListener.wrap( + generation -> innerUpdateSnapshotState( + new ShardSnapshotUpdate(target, + repoShardId, null, + new ShardSnapshotStatus(clusterService.localNode().getId(), + ShardState.SUCCESS, generation)), + ActionListener.wrap( + v -> { + currentlyCloning.remove(repoShardId); + logger.trace( + "Marked [{}] as successfully cloned from [{}] to [{}]", + repoShardId, sourceSnapshot, targetSnapshot); + }, + e -> { + currentlyCloning.remove(repoShardId); + //TODO: Error handling + throw new AssertionError(e); + } - )), e -> { - //TODO: Error handling - throw new AssertionError(e); - })); + )), e -> { + currentlyCloning.remove(repoShardId); + //TODO: Error handling + throw new AssertionError(e); + })); + } } private void ensureBelowConcurrencyLimit(String repository, String name, SnapshotsInProgress snapshotsInProgress, @@ -2232,7 +2241,8 @@ public ClusterTasksResult execute(ClusterState currentState final ShardSnapshotStatus finishedStatus = updateSnapshotState.status(); logger.trace("Starting [{}] on [{}] with generation [{}]", finishedShardId, finishedStatus.nodeId(), finishedStatus.generation()); - shards.put(finishedShardId, new ShardSnapshotStatus(finishedStatus.nodeId(), finishedStatus.generation())); + shards.put(finishedShardId, new ShardSnapshotStatus(finishedStatus.nodeId(), + finishedStatus.generation())); reusedShardIds.add(finishedShardId); } } else { @@ -2248,7 +2258,7 @@ public ClusterTasksResult execute(ClusterState currentState continue; } if (updated == false) { - shards.putAll(entry.shards()); + clones.putAll(entry.clones()); updated = true; } final ShardSnapshotStatus finishedStatus = updateSnapshotState.status(); @@ -2307,21 +2317,22 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS } finally { // Maybe this state update completed the snapshot. If we are not already ending it because of a concurrent // state update we check if its state is completed and end it if it is. + final SnapshotsInProgress snapshotsInProgress = + newState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); if (endingSnapshots.contains(update.snapshot()) == false) { - final SnapshotsInProgress snapshotsInProgress = newState.custom(SnapshotsInProgress.TYPE); final SnapshotsInProgress.Entry updatedEntry = snapshotsInProgress.snapshot(update.snapshot()); // If the entry is still in the cluster state and is completed, try finalizing the snapshot in the repo - if (updatedEntry != null) { - if (updatedEntry.state().completed()) { - endSnapshot(updatedEntry, newState.metadata(), null); - } else if (updatedEntry.source() != null) { - // this is a clone, see if new work is ready - for (ObjectObjectCursor clone : updatedEntry.clones()) { - if (clone.value.state() == ShardState.INIT) { - runReadyClone(updatedEntry.snapshot(), updatedEntry.source(), clone.value, clone.key, - repositoriesService.repository(updatedEntry.repository())); - } - } + if (updatedEntry != null && updatedEntry.state().completed()) { + endSnapshot(updatedEntry, newState.metadata(), null); + } + } + // TODO: this is horrifically expensive, find a way of more efficiently transporting the state here + for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) { + // this is a clone, see if new work is ready + for (ObjectObjectCursor clone : entry.clones()) { + if (clone.value.state() == ShardState.INIT) { + runReadyClone(entry.snapshot(), entry.source(), clone.value, clone.key, + repositoriesService.repository(entry.repository())); } } } From ecd821d438cca3b36bdc407a4abff2f45813d81c Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 1 Sep 2020 13:03:04 +0200 Subject: [PATCH 25/94] bck --- .../cluster/RestCloneSnapshotAction.java | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestCloneSnapshotAction.java diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestCloneSnapshotAction.java b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestCloneSnapshotAction.java new file mode 100644 index 0000000000000..e89659fbcd1bd --- /dev/null +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestCloneSnapshotAction.java @@ -0,0 +1,69 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.rest.action.admin.cluster; + +import org.elasticsearch.action.admin.cluster.snapshots.clone.CloneSnapshotRequest; +import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotRequest; +import org.elasticsearch.client.node.NodeClient; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.action.RestToXContentListener; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.client.Requests.createSnapshotRequest; +import static org.elasticsearch.rest.RestRequest.Method.POST; +import static org.elasticsearch.rest.RestRequest.Method.PUT; + +/** + * Clones indices from one snapshot into another + */ +public class RestCloneSnapshotAction extends BaseRestHandler { + + @Override + public List routes() { + return List.of( + new Route(PUT, "/_snapshot/{repository}/{source_snapshot}/_clone/{target_snapshot}")); + } + + @Override + public String getName() { + return "clone_snapshot_action"; + } + + @Override + public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException { + Map body = request.contentParser().mapOrdered(); + final CloneSnapshotRequest cloneSnapshotRequest = new CloneSnapshotRequest( + request.param("repository"), request.param("source_snapshot"), request.param("target_snapshot"), + XContentMapValues.nodeStringArrayValue(body.getOrDefault("indices", Collections.emptyList())), + XContentMapValues.nodeStringArrayValue(body.getOrDefault("excluded_settings", Collections.emptyList())), + Settings.EMPTY + ); + cloneSnapshotRequest.masterNodeTimeout(request.paramAsTime("master_timeout", cloneSnapshotRequest.masterNodeTimeout())); + return channel -> client.admin().cluster().cloneSnapshot(cloneSnapshotRequest, new RestToXContentListener<>(channel)); + } +} From 8a74150b34159c59da76c16ec347a9250fb392ed Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 1 Sep 2020 14:56:35 +0200 Subject: [PATCH 26/94] bck --- .../apis/clone-snapshot-api.asciidoc | 71 +++++++++++++++++++ .../snapshots/CloneSnapshotIT.java | 31 ++++---- .../snapshots/clone/CloneSnapshotRequest.java | 19 +++-- .../clone/CloneSnapshotRequestBuilder.java | 5 ++ 4 files changed, 109 insertions(+), 17 deletions(-) create mode 100644 docs/reference/snapshot-restore/apis/clone-snapshot-api.asciidoc diff --git a/docs/reference/snapshot-restore/apis/clone-snapshot-api.asciidoc b/docs/reference/snapshot-restore/apis/clone-snapshot-api.asciidoc new file mode 100644 index 0000000000000..43f4b8ef35176 --- /dev/null +++ b/docs/reference/snapshot-restore/apis/clone-snapshot-api.asciidoc @@ -0,0 +1,71 @@ +[[clone-snapshot-api]] +=== Clone snapshot API +++++ +Clone snapshot +++++ + +Clones part or all of a snapshot into a new snapshot. + +//// +[source,console] +---- +PUT /_snapshot/my_repository/source_snapshot/_clone/target_snapshot +{ + "indices": "index_a, index_b", + "excluded_settings": "setting_a", + "index_settings": { + "setting_key": "setting_value" + } +} +---- +// TESTSETUP +//// + +[source,console] +---- +PUT /_snapshot/my_repository/source_snapshot/_clone/target_snapshot +---- + +[[clone-snapshot-api-request]] +==== {api-request-title} + +`PUT /_snapshot///_clone/` + +[[clone-snapshot-api-desc]] +==== {api-description-title} + +The clone snapshot API allows creating a copy of all or part of an existing snapshot +within the same repository. + +[[clone-snapshot-api-params]] +==== {api-path-parms-title} + +``:: +(Required, string) +Name of the snapshot repository that both source- and target snapshot belong to. + +[[clone-snapshot-api-query-params]] +==== {api-query-parms-title} + +`master_timeout`:: +(Optional, <>) Specifies the period of time to wait for +a connection to the master node. If no response is received before the timeout +expires, the request fails and returns an error. Defaults to `30s`. + +`timeout`:: +(Optional, <>) Specifies the period of time to wait for +a response. If no response is received before the timeout expires, the request +fails and returns an error. Defaults to `30s`. + +`indices`:: +(Required, string) +A comma-separated list of indices to include in the snapshot. +<> is supported. + +`settings`:: +(Optional, object) +Optional index settings to be applied to all index clones relative to the source index settings. + +`excluded_settings`:: +(Optional, string) +A comma-separated list of index settings that should not be included in the cloned index snapshots. \ No newline at end of file diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index 366cfe5e75f9f..2b585e0a5137e 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -20,6 +20,8 @@ import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotIndexStatus; import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.ESIntegTestCase; import java.util.List; @@ -30,7 +32,7 @@ @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase { - public void testCloneSnapshot() throws Exception { + public void testCloneSnapshotIndex() throws Exception { internalCluster().startMasterOnlyNode(); internalCluster().startDataOnlyNode(); final String repoName = "repo-name"; @@ -42,7 +44,9 @@ public void testCloneSnapshot() throws Exception { createFullSnapshot(repoName, sourceSnapshot); indexRandomDocs(indexName, randomIntBetween(20, 100)); - + if (randomBoolean()) { + assertAcked(client().admin().indices().prepareDelete(indexName)); + } final String targetSnapshot = "target-snapshot"; assertAcked(client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexName).get()); @@ -55,7 +59,8 @@ public void testCloneSnapshot() throws Exception { assertEquals(status1.getStats().getTotalSize(), status2.getStats().getTotalSize()); } - public void testCloneSnapshotIndexMissing() throws Exception { + @AwaitsFix(bugUrl = "TODO if we want it") + public void testCloneSnapshotWithIndexSettingUpdates() throws Exception { internalCluster().startMasterOnlyNode(); internalCluster().startDataOnlyNode(); final String repoName = "repo-name"; @@ -69,15 +74,17 @@ public void testCloneSnapshotIndexMissing() throws Exception { indexRandomDocs(indexName, randomIntBetween(20, 100)); final String targetSnapshot = "target-snapshot"; - assertAcked(client().admin().indices().prepareDelete(indexName)); - assertAcked(client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexName).get()); + assertAcked(client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexName) + .setIndexSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build()).get()); - final List status = client().admin().cluster().prepareSnapshotStatus(repoName) - .setSnapshots(sourceSnapshot, targetSnapshot).get().getSnapshots(); - assertThat(status, hasSize(2)); - final SnapshotIndexStatus status1 = status.get(0).getIndices().get(indexName); - final SnapshotIndexStatus status2 = status.get(1).getIndices().get(indexName); - assertEquals(status1.getStats().getTotalFileCount(), status2.getStats().getTotalFileCount()); - assertEquals(status1.getStats().getTotalSize(), status2.getStats().getTotalSize()); + final RestoreInfo restoreInfo = client().admin().cluster() + .prepareRestoreSnapshot(repoName, targetSnapshot).setIndices(indexName).setRenamePattern("(.+)") + .setRenameReplacement("$1-copy").setWaitForCompletion(true).get().getRestoreInfo(); + assertEquals(restoreInfo.successfulShards(), restoreInfo.totalShards()); + + final String restoredIndex = indexName + "-copy"; + final Settings settings = + client().admin().indices().prepareGetIndex().setIndices(restoredIndex).get().getSettings().get(restoredIndex); + assertEquals(settings.get(IndexMetadata.SETTING_NUMBER_OF_REPLICAS), "1"); } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java index c70ea2e6fb674..32949ff16ee05 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java @@ -43,7 +43,7 @@ public class CloneSnapshotRequest extends MasterNodeRequest Date: Tue, 1 Sep 2020 15:20:33 +0200 Subject: [PATCH 27/94] clone prevents delete --- .../snapshots/CloneSnapshotIT.java | 65 +++++++++++++++++++ .../snapshots/ConcurrentSnapshotsIT.java | 60 ----------------- .../snapshots/SnapshotsService.java | 12 +++- .../AbstractSnapshotIntegTestCase.java | 35 ++++++++++ 4 files changed, 111 insertions(+), 61 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index 2b585e0a5137e..8b25f93d5d419 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -18,15 +18,20 @@ */ package org.elasticsearch.snapshots; +import org.elasticsearch.action.ActionFuture; +import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotIndexStatus; import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus; +import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.test.ESIntegTestCase; import java.util.List; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.hasSize; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) @@ -59,6 +64,66 @@ public void testCloneSnapshotIndex() throws Exception { assertEquals(status1.getStats().getTotalSize(), status2.getStats().getTotalSize()); } + public void testClonePreventsSnapshotDelete() throws Exception { + final String masterName = internalCluster().startMasterOnlyNode(); + internalCluster().startDataOnlyNode(); + final String repoName = "repo-name"; + createRepository(repoName, "mock"); + + final String indexName = "index-1"; + createIndexWithRandomDocs(indexName, randomIntBetween(5, 10)); + final String sourceSnapshot = "source-snapshot"; + createFullSnapshot(repoName, sourceSnapshot); + + indexRandomDocs(indexName, randomIntBetween(20, 100)); + + final String targetSnapshot = "target-snapshot"; + blockNodeOnAnyFiles(repoName, masterName); + final ActionFuture cloneFuture = + client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexName).execute(); + waitForBlock(masterName, repoName, TimeValue.timeValueSeconds(30L)); + assertFalse(cloneFuture.isDone()); + + ConcurrentSnapshotExecutionException ex = expectThrows(ConcurrentSnapshotExecutionException.class, () -> + client().admin().cluster().prepareDeleteSnapshot(repoName, sourceSnapshot).execute().actionGet()); + assertThat(ex.getMessage(), containsString("cannot delete snapshot while it is being cloned")); + + unblockNode(repoName, masterName); + assertAcked(cloneFuture.get()); + final List status = client().admin().cluster().prepareSnapshotStatus(repoName) + .setSnapshots(sourceSnapshot, targetSnapshot).get().getSnapshots(); + assertThat(status, hasSize(2)); + final SnapshotIndexStatus status1 = status.get(0).getIndices().get(indexName); + final SnapshotIndexStatus status2 = status.get(1).getIndices().get(indexName); + assertEquals(status1.getStats().getTotalFileCount(), status2.getStats().getTotalFileCount()); + assertEquals(status1.getStats().getTotalSize(), status2.getStats().getTotalSize()); + } + + public void testConcurrentCloneAndSnapshot() throws Exception { + internalCluster().startMasterOnlyNode(); + final String dataNode = internalCluster().startDataOnlyNode(); + final String repoName = "repo-name"; + createRepository(repoName, "mock"); + + final String indexName = "index-1"; + createIndexWithRandomDocs(indexName, randomIntBetween(5, 10)); + final String sourceSnapshot = "source-snapshot"; + createFullSnapshot(repoName, sourceSnapshot); + + indexRandomDocs(indexName, randomIntBetween(20, 100)); + + final String targetSnapshot = "target-snapshot"; + final ActionFuture snapshot2Future = + startFullSnapshotBlockedOnDataNode("snapshot-2", repoName, dataNode); + waitForBlock(dataNode, repoName, TimeValue.timeValueSeconds(30L)); + final ActionFuture cloneFuture = + client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexName).execute(); + awaitNSnapshotsInProgress(2); + unblockNode(repoName, dataNode); + assertAcked(cloneFuture.get()); + assertSuccessful(snapshot2Future); + } + @AwaitsFix(bugUrl = "TODO if we want it") public void testCloneSnapshotWithIndexSettingUpdates() throws Exception { internalCluster().startMasterOnlyNode(); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java index 4953314c8c734..82fb52ba28b96 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java @@ -61,7 +61,6 @@ import java.util.List; import java.util.Locale; import java.util.concurrent.TimeUnit; -import java.util.function.Predicate; import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; @@ -1210,31 +1209,6 @@ public void testQueuedDeleteAfterFinalizationFailure() throws Exception { assertThat(sne.getCause().getMessage(), containsString("exception after block")); } - public void testConcurrentCloneAndSnapshot() throws Exception { - internalCluster().startMasterOnlyNode(); - final String dataNode = internalCluster().startDataOnlyNode(); - final String repoName = "repo-name"; - createRepository(repoName, "mock"); - - final String indexName = "index-1"; - createIndexWithRandomDocs(indexName, randomIntBetween(5, 10)); - final String sourceSnapshot = "source-snapshot"; - createFullSnapshot(repoName, sourceSnapshot); - - indexRandomDocs(indexName, randomIntBetween(20, 100)); - - final String targetSnapshot = "target-snapshot"; - final ActionFuture snapshot2Future = - startFullSnapshotBlockedOnDataNode("snapshot-2", repoName, dataNode); - waitForBlock(dataNode, repoName, TimeValue.timeValueSeconds(30L)); - final ActionFuture cloneFuture = - client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexName).execute(); - awaitNSnapshotsInProgress(2); - unblockNode(repoName, dataNode); - assertAcked(cloneFuture.get()); - assertSuccessful(snapshot2Future); - } - private static String startDataNodeWithLargeSnapshotPool() { return internalCluster().startDataOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS); } @@ -1284,20 +1258,6 @@ private ActionFuture startFullSnapshotFromMasterClient(S .setWaitForCompletion(true).execute(); } - private ActionFuture startFullSnapshot(String repoName, String snapshotName) { - return startFullSnapshot(repoName, snapshotName, false); - } - - private ActionFuture startFullSnapshot(String repoName, String snapshotName, boolean partial) { - logger.info("--> creating full snapshot [{}] to repo [{}]", snapshotName, repoName); - return client().admin().cluster().prepareCreateSnapshot(repoName, snapshotName).setWaitForCompletion(true) - .setPartial(partial).execute(); - } - - private void awaitClusterState(Predicate statePredicate) throws Exception { - awaitClusterState(internalCluster().getMasterName(), statePredicate); - } - // Large snapshot pool settings to set up nodes for tests involving multiple repositories that need to have enough // threads so that blocking some threads on one repository doesn't block other repositories from doing work private static final Settings LARGE_SNAPSHOT_POOL_SETTINGS = Settings.builder() @@ -1335,12 +1295,6 @@ private static boolean snapshotHasCompletedShard(String snapshot, SnapshotsInPro return false; } - private static SnapshotInfo assertSuccessful(ActionFuture future) throws Exception { - final SnapshotInfo snapshotInfo = future.get().getSnapshotInfo(); - assertThat(snapshotInfo.state(), is(SnapshotState.SUCCESS)); - return snapshotInfo; - } - private void corruptIndexN(Path repoPath, long generation) throws IOException { logger.info("--> corrupting [index-{}] in [{}]", generation, repoPath); Path indexNBlob = repoPath.resolve(BlobStoreRepository.INDEX_FILE_PREFIX + generation); @@ -1354,12 +1308,6 @@ private void awaitNDeletionsInProgress(int count) throws Exception { state.custom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY).getEntries().size() == count); } - private void awaitNSnapshotsInProgress(int count) throws Exception { - logger.info("--> wait for [{}] snapshots to show up in the cluster state", count); - awaitClusterState(state -> - state.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY).entries().size() == count); - } - private static List currentSnapshots(String repoName) { return client().admin().cluster().prepareGetSnapshots(repoName).setSnapshots(GetSnapshotsRequest.CURRENT_SNAPSHOT) .get().getSnapshots(repoName); @@ -1381,12 +1329,4 @@ private ActionFuture startAndBlockFailingFullSnapshot(St waitForBlock(internalCluster().getMasterName(), blockedRepoName, TimeValue.timeValueSeconds(30L)); return fut; } - - private ActionFuture startFullSnapshotBlockedOnDataNode(String snapshotName, String repoName, String dataNode) - throws InterruptedException { - blockDataNode(repoName, dataNode); - final ActionFuture fut = startFullSnapshot(repoName, snapshotName); - waitForBlock(dataNode, repoName, TimeValue.timeValueSeconds(30L)); - return fut; - } } diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 131950d79cd5f..950e43e48962c 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -333,7 +333,6 @@ public void cloneSnapshot(CloneSnapshotRequest request, ActionListener lis } final SnapshotId snapshotId = new SnapshotId(snapshotName, UUIDs.randomBase64UUID()); final Snapshot snapshot = new Snapshot(repositoryName, snapshotId); - // TODO: do not allow delete of source of in-progress-clone // TODO: Clone DS? (probably no, not relevant for searchable snapshots ...) // TODO: shards are snapshot shard-by-shard on the master node, no need for coordination here // TODO: throw when no indices match @@ -1395,6 +1394,17 @@ public ClusterState execute(ClusterState currentState) { if (snapshotIds.isEmpty()) { return currentState; } + final Set activeCloneSources = snapshots.entries() + .stream() + .filter(entry -> entry.source() != null) + .map(SnapshotsInProgress.Entry::source + ).collect(Collectors.toSet()); + for (SnapshotId snapshotId : snapshotIds) { + if (activeCloneSources.contains(snapshotId)) { + throw new ConcurrentSnapshotExecutionException(new Snapshot(repoName, snapshotId), + "cannot delete snapshot while it is being cloned"); + } + } final SnapshotDeletionsInProgress deletionsInProgress = currentState.custom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY); final RepositoryCleanupInProgress repositoryCleanupInProgress = diff --git a/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java index 8e07290bda18d..47bccc3ec5b93 100644 --- a/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java @@ -19,6 +19,7 @@ package org.elasticsearch.snapshots; import org.elasticsearch.Version; +import org.elasticsearch.action.ActionFuture; import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse; import org.elasticsearch.action.index.IndexRequestBuilder; @@ -428,6 +429,10 @@ protected void awaitNoMoreRunningOperations(String viaNode) throws Exception { state.custom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY).hasDeletionsInProgress() == false); } + protected void awaitClusterState(Predicate statePredicate) throws Exception { + awaitClusterState(internalCluster().getMasterName(), statePredicate); + } + protected void awaitClusterState(String viaNode, Predicate statePredicate) throws Exception { final ClusterService clusterService = internalCluster().getInstance(ClusterService.class, viaNode); final ThreadPool threadPool = internalCluster().getInstance(ThreadPool.class, viaNode); @@ -453,4 +458,34 @@ public void onTimeout(TimeValue timeout) { future.get(30L, TimeUnit.SECONDS); } } + + protected ActionFuture startFullSnapshotBlockedOnDataNode(String snapshotName, String repoName, + String dataNode) throws InterruptedException { + blockDataNode(repoName, dataNode); + final ActionFuture fut = startFullSnapshot(repoName, snapshotName); + waitForBlock(dataNode, repoName, TimeValue.timeValueSeconds(30L)); + return fut; + } + + protected ActionFuture startFullSnapshot(String repoName, String snapshotName) { + return startFullSnapshot(repoName, snapshotName, false); + } + + protected ActionFuture startFullSnapshot(String repoName, String snapshotName, boolean partial) { + logger.info("--> creating full snapshot [{}] to repo [{}]", snapshotName, repoName); + return client().admin().cluster().prepareCreateSnapshot(repoName, snapshotName).setWaitForCompletion(true) + .setPartial(partial).execute(); + } + + protected void awaitNSnapshotsInProgress(int count) throws Exception { + logger.info("--> wait for [{}] snapshots to show up in the cluster state", count); + awaitClusterState(state -> + state.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY).entries().size() == count); + } + + protected static SnapshotInfo assertSuccessful(ActionFuture future) throws Exception { + final SnapshotInfo snapshotInfo = future.get().getSnapshotInfo(); + assertThat(snapshotInfo.state(), is(SnapshotState.SUCCESS)); + return snapshotInfo; + } } From 2090a547a3e5bfa9c20db4e8f325d91045b4c9e0 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 1 Sep 2020 15:49:25 +0200 Subject: [PATCH 28/94] delete prevents clone --- .../snapshots/CloneSnapshotIT.java | 29 +++++++++++++++++++ .../snapshots/SnapshotsService.java | 21 ++++++++------ 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index 8b25f93d5d419..e261a5e84daf4 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -124,6 +124,35 @@ public void testConcurrentCloneAndSnapshot() throws Exception { assertSuccessful(snapshot2Future); } + public void testDeletePreventsClone() throws Exception { + final String masterName = internalCluster().startMasterOnlyNode(); + internalCluster().startDataOnlyNode(); + final String repoName = "repo-name"; + createRepository(repoName, "mock"); + + final String indexName = "index-1"; + createIndexWithRandomDocs(indexName, randomIntBetween(5, 10)); + final String sourceSnapshot = "source-snapshot"; + createFullSnapshot(repoName, sourceSnapshot); + + indexRandomDocs(indexName, randomIntBetween(20, 100)); + + final String targetSnapshot = "target-snapshot"; + blockNodeOnAnyFiles(repoName, masterName); + final ActionFuture deleteFuture = + client().admin().cluster().prepareDeleteSnapshot(repoName, sourceSnapshot).execute(); + waitForBlock(masterName, repoName, TimeValue.timeValueSeconds(30L)); + assertFalse(deleteFuture.isDone()); + + ConcurrentSnapshotExecutionException ex = expectThrows(ConcurrentSnapshotExecutionException.class, () -> + client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexName).execute() + .actionGet()); + assertThat(ex.getMessage(), containsString("cannot clone from snapshot that is being deleted")); + + unblockNode(repoName, masterName); + assertAcked(deleteFuture.get()); + } + @AwaitsFix(bugUrl = "TODO if we want it") public void testCloneSnapshotWithIndexSettingUpdates() throws Exception { internalCluster().startMasterOnlyNode(); diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 950e43e48962c..a01f27b45f8a1 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -334,7 +334,6 @@ public void cloneSnapshot(CloneSnapshotRequest request, ActionListener lis final SnapshotId snapshotId = new SnapshotId(snapshotName, UUIDs.randomBase64UUID()); final Snapshot snapshot = new Snapshot(repositoryName, snapshotId); // TODO: Clone DS? (probably no, not relevant for searchable snapshots ...) - // TODO: shards are snapshot shard-by-shard on the master node, no need for coordination here // TODO: throw when no indices match repository.executeConsistentStateUpdate(repositoryData -> new ClusterStateUpdateTask() { @@ -347,6 +346,12 @@ public ClusterState execute(ClusterState currentState) { throw new InvalidSnapshotNameException( repository.getMetadata().name(), snapshotName, "snapshot with the same name already exists"); } + final RepositoryCleanupInProgress repositoryCleanupInProgress = + currentState.custom(RepositoryCleanupInProgress.TYPE, RepositoryCleanupInProgress.EMPTY); + if (repositoryCleanupInProgress.hasCleanupInProgress()) { + throw new ConcurrentSnapshotExecutionException(repositoryName, snapshotName, + "cannot snapshot while a repository cleanup is in-progress in [" + repositoryCleanupInProgress + "]"); + } final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); final List runningSnapshots = snapshots.entries(); if (runningSnapshots.stream().anyMatch(s -> { @@ -357,19 +362,17 @@ public ClusterState execute(ClusterState currentState) { repository.getMetadata().name(), snapshotName, "snapshot with the same name is already in-progress"); } validate(repositoryName, snapshotName, currentState); - final SnapshotDeletionsInProgress deletionsInProgress = - currentState.custom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY); - // TODO: check that no delete for the source snapshot is running + final SnapshotId sourceSnapshotId = repositoryData.getSnapshotIds() .stream() .filter(src -> src.getName().equals(request.source())) .findAny() .orElseThrow(() -> new SnapshotMissingException(repositoryName, request.source())); - final RepositoryCleanupInProgress repositoryCleanupInProgress = - currentState.custom(RepositoryCleanupInProgress.TYPE, RepositoryCleanupInProgress.EMPTY); - if (repositoryCleanupInProgress.hasCleanupInProgress()) { - throw new ConcurrentSnapshotExecutionException(repositoryName, snapshotName, - "cannot snapshot while a repository cleanup is in-progress in [" + repositoryCleanupInProgress + "]"); + final SnapshotDeletionsInProgress deletionsInProgress = + currentState.custom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY); + if (deletionsInProgress.getEntries().stream().anyMatch(entry -> entry.getSnapshots().contains(sourceSnapshotId))) { + throw new ConcurrentSnapshotExecutionException(repositoryName, sourceSnapshotId.getName(), + "cannot clone from snapshot that is being deleted"); } ensureBelowConcurrencyLimit(repositoryName, snapshotName, snapshots, deletionsInProgress); From 24dfb6c6ddb15b9bb529d9cc15bfac0a89a04bc4 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 1 Sep 2020 17:22:54 +0200 Subject: [PATCH 29/94] bck --- .../snapshots/CloneSnapshotIT.java | 30 +++++++++++++++++++ .../snapshots/mockstore/MockRepository.java | 11 +++++++ 2 files changed, 41 insertions(+) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index e261a5e84daf4..a1fbee6d2bf23 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -33,6 +33,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase { @@ -124,6 +125,35 @@ public void testConcurrentCloneAndSnapshot() throws Exception { assertSuccessful(snapshot2Future); } + public void testLongRunningCloneAllowsConcurrentSnapshot() throws Exception { + final String masterNode = internalCluster().startMasterOnlyNode(); + internalCluster().startDataOnlyNode(); + final String repoName = "test-repo"; + createRepository(repoName, "mock"); + final String indexSlow = "index-slow"; + createIndexWithRandomDocs(indexSlow, randomIntBetween(20, 100)); + + final String sourceSnapshot = "source-snapshot"; + createFullSnapshot(repoName, sourceSnapshot); + + final String targetSnapshot = "target-snapshot"; + final ActionFuture cloneFuture = + client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexSlow).execute(); + + final String indexFast = "index-fast"; + createIndexWithRandomDocs(indexFast, randomIntBetween(20, 100)); + + assertSuccessful(client().admin().cluster().prepareCreateSnapshot(repoName, "fast-snapshot") + .setIndices(indexFast).setWaitForCompletion(true).execute()); + + assertThat(cloneFuture.isDone(), is(false)); + unblockNode(repoName, masterNode); + + assertAcked(cloneFuture.get()); + } + + + public void testDeletePreventsClone() throws Exception { final String masterName = internalCluster().startMasterOnlyNode(); internalCluster().startDataOnlyNode(); diff --git a/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java b/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java index 70c60b24309cb..8c84acfc39c08 100644 --- a/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java +++ b/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java @@ -118,6 +118,8 @@ public long getFailureCount() { /** Allows blocking on writing the snapshot file at the end of snapshot creation to simulate a died master node */ private volatile boolean blockAndFailOnWriteSnapFile; + private volatile boolean blockOnWriteShardLevelMeta; + /** * Writes to the blob {@code index.latest} at the repository root will fail with an {@link IOException} if {@code true}. */ @@ -183,6 +185,7 @@ public synchronized void unblock() { blockOnWriteIndexFile = false; blockAndFailOnWriteSnapFile = false; blockOnDeleteIndexN = false; + blockOnWriteShardLevelMeta = false; this.notifyAll(); } @@ -206,6 +209,10 @@ public void setBlockOnDeleteIndexFile() { blockOnDeleteIndexN = true; } + public void setBlockOnWriteShardLevelMeta() { + blockOnWriteShardLevelMeta = true; + } + public boolean blocked() { return blocked; } @@ -414,6 +421,10 @@ public void writeBlobAtomic(final String blobName, final InputStream inputStream if (failOnIndexLatest && BlobStoreRepository.INDEX_LATEST_BLOB.equals(blobName)) { throw new IOException("Random IOException"); } + if (blockOnWriteShardLevelMeta && blobName.startsWith(BlobStoreRepository.SNAPSHOT_PREFIX) && + path().equals(basePath()) == false) { + + } if (blobName.startsWith("index-") && blockOnWriteIndexFile) { blockExecutionAndFail(blobName); } From e363042965810cfc1582cf1281c3085afe6468b8 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 2 Sep 2020 12:11:57 +0200 Subject: [PATCH 30/94] bck --- .../snapshots/CloneSnapshotIT.java | 16 +++- .../snapshots/ConcurrentSnapshotsIT.java | 94 ++++++++----------- .../blobstore/BlobStoreRepository.java | 6 ++ .../AbstractSnapshotIntegTestCase.java | 18 ++++ .../snapshots/mockstore/MockRepository.java | 10 +- 5 files changed, 79 insertions(+), 65 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index a1fbee6d2bf23..013895fd2f6c1 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -26,6 +26,8 @@ import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.repositories.RepositoriesService; +import org.elasticsearch.snapshots.mockstore.MockRepository; import org.elasticsearch.test.ESIntegTestCase; import java.util.List; @@ -126,19 +128,22 @@ public void testConcurrentCloneAndSnapshot() throws Exception { } public void testLongRunningCloneAllowsConcurrentSnapshot() throws Exception { - final String masterNode = internalCluster().startMasterOnlyNode(); + // large snapshot pool so blocked snapshot threads from cloning don't prevent concurrent snapshot finalizations + final String masterNode = internalCluster().startMasterOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS); internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); final String indexSlow = "index-slow"; - createIndexWithRandomDocs(indexSlow, randomIntBetween(20, 100)); + createSingleShardIndexWithContent(indexSlow); final String sourceSnapshot = "source-snapshot"; createFullSnapshot(repoName, sourceSnapshot); final String targetSnapshot = "target-snapshot"; + blockMasterOnShardClone(repoName); final ActionFuture cloneFuture = client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexSlow).execute(); + waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L)); final String indexFast = "index-fast"; createIndexWithRandomDocs(indexFast, randomIntBetween(20, 100)); @@ -152,8 +157,6 @@ public void testLongRunningCloneAllowsConcurrentSnapshot() throws Exception { assertAcked(cloneFuture.get()); } - - public void testDeletePreventsClone() throws Exception { final String masterName = internalCluster().startMasterOnlyNode(); internalCluster().startDataOnlyNode(); @@ -211,4 +214,9 @@ public void testCloneSnapshotWithIndexSettingUpdates() throws Exception { client().admin().indices().prepareGetIndex().setIndices(restoredIndex).get().getSettings().get(restoredIndex); assertEquals(settings.get(IndexMetadata.SETTING_NUMBER_OF_REPLICAS), "1"); } + + private void blockMasterOnShardClone(String repoName) { + ((MockRepository)internalCluster().getCurrentMasterNodeInstance(RepositoriesService.class).repository(repoName)) + .setBlockOnWriteShardLevelMeta(); + } } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java index 82fb52ba28b96..46bf56a324fc7 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java @@ -98,7 +98,7 @@ public void testLongRunningSnapshotAllowsConcurrentSnapshot() throws Exception { final String dataNode = internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); - createIndexWithContent("index-slow"); + createSingleShardIndexWithContent("index-slow"); final ActionFuture createSlowFuture = startFullSnapshotBlockedOnDataNode("slow-snapshot", repoName, dataNode); @@ -106,7 +106,7 @@ public void testLongRunningSnapshotAllowsConcurrentSnapshot() throws Exception { final String dataNode2 = internalCluster().startDataOnlyNode(); ensureStableCluster(3); final String indexFast = "index-fast"; - createIndexWithContent(indexFast, dataNode2, dataNode); + createSingleShardIndexWithContent(indexFast, dataNode2, dataNode); assertSuccessful(client().admin().cluster().prepareCreateSnapshot(repoName, "fast-snapshot") .setIndices(indexFast).setWaitForCompletion(true).execute()); @@ -141,7 +141,7 @@ public void testDeletesAreBatched() throws Exception { assertThat(snapshotResponse.getSnapshotInfo().state(), is(SnapshotState.SUCCESS)); } - createIndexWithContent("index-slow"); + createSingleShardIndexWithContent("index-slow"); final ActionFuture createSlowFuture = startFullSnapshotBlockedOnDataNode("blocked-snapshot", repoName, dataNode); @@ -190,7 +190,7 @@ public void testBlockedRepoDoesNotBlockOtherRepos() throws Exception { createRepository(otherRepoName, "fs"); createIndex("foo"); ensureGreen(); - createIndexWithContent("index-slow"); + createSingleShardIndexWithContent("index-slow"); final ActionFuture createSlowFuture = startAndBlockFailingFullSnapshot(blockedRepoName, "blocked-snapshot"); @@ -214,7 +214,7 @@ public void testMultipleReposAreIndependent() throws Exception { final String otherRepoName = "test-repo"; createRepository(blockedRepoName, "mock"); createRepository(otherRepoName, "fs"); - createIndexWithContent("test-index"); + createSingleShardIndexWithContent("test-index"); final ActionFuture createSlowFuture = startFullSnapshotBlockedOnDataNode("blocked-snapshot", blockedRepoName, dataNode); @@ -235,7 +235,7 @@ public void testMultipleReposAreIndependent2() throws Exception { final String otherRepoName = "test-repo"; createRepository(blockedRepoName, "mock"); createRepository(otherRepoName, "fs"); - createIndexWithContent("test-index"); + createSingleShardIndexWithContent("test-index"); final ActionFuture createSlowFuture = startFullSnapshotBlockedOnDataNode("blocked-snapshot", blockedRepoName, dataNode); @@ -255,7 +255,7 @@ public void testMultipleReposAreIndependent3() throws Exception { final String otherRepoName = "test-repo"; createRepository(blockedRepoName, "mock"); createRepository(otherRepoName, "fs"); - createIndexWithContent("test-index"); + createSingleShardIndexWithContent("test-index"); createFullSnapshot( blockedRepoName, "blocked-snapshot"); blockNodeOnAnyFiles(blockedRepoName, masterNode); @@ -276,7 +276,7 @@ public void testSnapshotRunsAfterInProgressDelete() throws Exception { createRepository(repoName, "mock"); ensureGreen(); - createIndexWithContent("index-test"); + createSingleShardIndexWithContent("index-test"); final String firstSnapshot = "first-snapshot"; createFullSnapshot(repoName, firstSnapshot); @@ -300,7 +300,7 @@ public void testAbortOneOfMultipleSnapshots() throws Exception { final String repoName = "test-repo"; createRepository(repoName, "mock"); final String firstIndex = "index-one"; - createIndexWithContent(firstIndex); + createSingleShardIndexWithContent(firstIndex); final String firstSnapshot = "snapshot-one"; final ActionFuture firstSnapshotResponse = @@ -309,7 +309,7 @@ public void testAbortOneOfMultipleSnapshots() throws Exception { final String dataNode2 = internalCluster().startDataOnlyNode(); ensureStableCluster(3); final String secondIndex = "index-two"; - createIndexWithContent(secondIndex, dataNode2, dataNode); + createSingleShardIndexWithContent(secondIndex, dataNode2, dataNode); final String secondSnapshot = "snapshot-two"; final ActionFuture secondSnapshotResponse = startFullSnapshot(repoName, secondSnapshot); @@ -350,7 +350,7 @@ public void testCascadedAborts() throws Exception { final String dataNode = internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); - createIndexWithContent("index-one"); + createSingleShardIndexWithContent("index-one"); final String firstSnapshot = "snapshot-one"; final ActionFuture firstSnapshotResponse = @@ -358,7 +358,7 @@ public void testCascadedAborts() throws Exception { final String dataNode2 = internalCluster().startDataOnlyNode(); ensureStableCluster(3); - createIndexWithContent("index-two", dataNode2, dataNode); + createSingleShardIndexWithContent("index-two", dataNode2, dataNode); final String secondSnapshot = "snapshot-two"; final ActionFuture secondSnapshotResponse = startFullSnapshot(repoName, secondSnapshot); @@ -411,7 +411,7 @@ public void testMasterFailOverWithQueuedDeletes() throws Exception { createRepository(repoName, "mock"); final String firstIndex = "index-one"; - createIndexWithContent(firstIndex); + createSingleShardIndexWithContent(firstIndex); final String firstSnapshot = "snapshot-one"; blockDataNode(repoName, dataNode); @@ -421,7 +421,7 @@ public void testMasterFailOverWithQueuedDeletes() throws Exception { final String dataNode2 = internalCluster().startDataOnlyNode(); ensureStableCluster(5); final String secondIndex = "index-two"; - createIndexWithContent(secondIndex, dataNode2, dataNode); + createSingleShardIndexWithContent(secondIndex, dataNode2, dataNode); final String secondSnapshot = "snapshot-two"; final ActionFuture secondSnapshotResponse = startFullSnapshot(repoName, secondSnapshot); @@ -523,7 +523,7 @@ public void testQueuedDeletesWithFailures() throws Exception { internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); - createIndexWithContent("index-one"); + createSingleShardIndexWithContent("index-one"); createNSnapshots(repoName, randomIntBetween(2, 5)); blockMasterFromFinalizingSnapshotOnIndexFile(repoName); @@ -552,7 +552,7 @@ public void testQueuedDeletesWithOverlap() throws Exception { internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); - createIndexWithContent("index-one"); + createSingleShardIndexWithContent("index-one"); createNSnapshots(repoName, randomIntBetween(2, 5)); final ActionFuture firstDeleteFuture = startAndBlockOnDeleteSnapshot(repoName, "*"); @@ -578,7 +578,7 @@ public void testQueuedOperationsOnMasterRestart() throws Exception { internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); - createIndexWithContent("index-one"); + createSingleShardIndexWithContent("index-one"); createNSnapshots(repoName, randomIntBetween(2, 5)); startAndBlockOnDeleteSnapshot(repoName, "*"); @@ -599,7 +599,7 @@ public void testQueuedOperationsOnMasterDisconnect() throws Exception { final String dataNode = internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); - createIndexWithContent("index-one"); + createSingleShardIndexWithContent("index-one"); createNSnapshots(repoName, randomIntBetween(2, 5)); final String masterNode = internalCluster().getMasterName(); @@ -637,7 +637,7 @@ public void testQueuedOperationsOnMasterDisconnectAndRepoFailure() throws Except final String dataNode = internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); - createIndexWithContent("index-one"); + createSingleShardIndexWithContent("index-one"); createNSnapshots(repoName, randomIntBetween(2, 5)); final String masterNode = internalCluster().getMasterName(); @@ -677,7 +677,7 @@ public void testQueuedOperationsAndBrokenRepoOnMasterFailOver() throws Exception final String repoName = "test-repo"; final Path repoPath = randomRepoPath(); createRepository(repoName, "mock", repoPath); - createIndexWithContent("index-one"); + createSingleShardIndexWithContent("index-one"); createNSnapshots(repoName, randomIntBetween(2, 5)); final long generation = getRepositoryData(repoName).getGenId(); @@ -706,7 +706,7 @@ public void testQueuedSnapshotOperationsAndBrokenRepoOnMasterFailOver() throws E final String repoName = "test-repo"; final Path repoPath = randomRepoPath(); createRepository(repoName, "mock", repoPath); - createIndexWithContent("index-one"); + createSingleShardIndexWithContent("index-one"); createNSnapshots(repoName, randomIntBetween(2, 5)); final long generation = getRepositoryData(repoName).getGenId(); @@ -734,7 +734,7 @@ public void testQueuedSnapshotOperationsAndBrokenRepoOnMasterFailOver2() throws final String repoName = "test-repo"; final Path repoPath = randomRepoPath(); createRepository(repoName, "mock", repoPath); - createIndexWithContent("index-one"); + createSingleShardIndexWithContent("index-one"); createNSnapshots(repoName, randomIntBetween(2, 5)); final long generation = getRepositoryData(repoName).getGenId(); @@ -767,7 +767,7 @@ public void testQueuedSnapshotOperationsAndBrokenRepoOnMasterFailOverMultipleRep final String repoName = "test-repo"; final Path repoPath = randomRepoPath(); createRepository(repoName, "mock", repoPath); - createIndexWithContent("index-one"); + createSingleShardIndexWithContent("index-one"); createNSnapshots(repoName, randomIntBetween(2, 5)); final String masterNode = internalCluster().getMasterName(); @@ -804,7 +804,7 @@ public void testMultipleSnapshotsQueuedAfterDelete() throws Exception { internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); - createIndexWithContent("index-one"); + createSingleShardIndexWithContent("index-one"); createNSnapshots(repoName, randomIntBetween(1, 5)); final ActionFuture deleteFuture = startAndBlockOnDeleteSnapshot(repoName, "*"); @@ -823,8 +823,8 @@ public void testMultiplePartialSnapshotsQueuedAfterDelete() throws Exception { internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); - createIndexWithContent("index-one"); - createIndexWithContent("index-two"); + createSingleShardIndexWithContent("index-one"); + createSingleShardIndexWithContent("index-two"); createNSnapshots(repoName, randomIntBetween(1, 5)); final ActionFuture deleteFuture = startAndBlockOnDeleteSnapshot(repoName, "*"); @@ -890,7 +890,7 @@ public void testBackToBackQueuedDeletes() throws Exception { internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); - createIndexWithContent("index-test"); + createSingleShardIndexWithContent("index-test"); final List snapshots = createNSnapshots(repoName, 2); final String snapshotOne = snapshots.get(0); final String snapshotTwo = snapshots.get(1); @@ -914,7 +914,7 @@ public void testQueuedOperationsAfterFinalizationFailure() throws Exception { internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); - createIndexWithContent("index-test"); + createSingleShardIndexWithContent("index-test"); final List snapshotNames = createNSnapshots(repoName, randomIntBetween(2, 5)); @@ -937,7 +937,7 @@ public void testStartDeleteDuringFinalizationCleanup() throws Exception { internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); - createIndexWithContent("index-test"); + createSingleShardIndexWithContent("index-test"); createNSnapshots(repoName, randomIntBetween(1, 5)); final String snapshotName = "snap-name"; blockMasterFromDeletingIndexNFile(repoName); @@ -955,7 +955,7 @@ public void testEquivalentDeletesAreDeduplicated() throws Exception { internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); - createIndexWithContent("index-test"); + createSingleShardIndexWithContent("index-test"); createNSnapshots(repoName, randomIntBetween(1, 5)); blockNodeOnAnyFiles(repoName, masterName); @@ -981,7 +981,7 @@ public void testMasterFailoverOnFinalizationLoop() throws Exception { final String dataNode = internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); - createIndexWithContent("index-test"); + createSingleShardIndexWithContent("index-test"); final NetworkDisruption networkDisruption = isolateMasterDisruption(NetworkDisruption.DISCONNECT); internalCluster().setDisruptionScheme(networkDisruption); @@ -1019,7 +1019,7 @@ public void testStatusMultipleSnapshotsMultipleRepos() throws Exception { final String otherBlockedRepoName = "test-repo-blocked-2"; createRepository(blockedRepoName, "mock"); createRepository(otherBlockedRepoName, "mock"); - createIndexWithContent("test-index"); + createSingleShardIndexWithContent("test-index"); final ActionFuture createSlowFuture1 = startFullSnapshotBlockedOnDataNode("blocked-snapshot", blockedRepoName, dataNode); @@ -1054,7 +1054,7 @@ public void testInterleavedAcrossMultipleRepos() throws Exception { final String otherBlockedRepoName = "test-repo-blocked-2"; createRepository(blockedRepoName, "mock"); createRepository(otherBlockedRepoName, "mock"); - createIndexWithContent("test-index"); + createSingleShardIndexWithContent("test-index"); final ActionFuture createSlowFuture1 = startFullSnapshotBlockedOnDataNode("blocked-snapshot", blockedRepoName, dataNode); @@ -1081,7 +1081,7 @@ public void testMasterFailoverAndMultipleQueuedUpSnapshotsAcrossTwoRepos() throw final Path repoPath = randomRepoPath(); createRepository(repoName, "mock", repoPath); createRepository(otherRepoName, "mock"); - createIndexWithContent("index-one"); + createSingleShardIndexWithContent("index-one"); createNSnapshots(repoName, randomIntBetween(2, 5)); final int countOtherRepo = randomIntBetween(2, 5); createNSnapshots(otherRepoName, countOtherRepo); @@ -1114,7 +1114,7 @@ public void testConcurrentOperationsLimit() throws Exception { internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); - createIndexWithContent("index-test"); + createSingleShardIndexWithContent("index-test"); final int limitToTest = randomIntBetween(1, 3); assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(Settings.builder().put( @@ -1170,7 +1170,7 @@ public void testConcurrentSnapshotWorksWithOldVersionRepo() throws Exception { .put("location", repoPath)); initWithSnapshotVersion(repoName, repoPath, SnapshotsService.OLD_SNAPSHOT_FORMAT); - createIndexWithContent("index-slow"); + createSingleShardIndexWithContent("index-slow"); final ActionFuture createSlowFuture = startFullSnapshotBlockedOnDataNode("slow-snapshot", repoName, dataNode); @@ -1178,7 +1178,7 @@ public void testConcurrentSnapshotWorksWithOldVersionRepo() throws Exception { final String dataNode2 = internalCluster().startDataOnlyNode(); ensureStableCluster(3); final String indexFast = "index-fast"; - createIndexWithContent(indexFast, dataNode2, dataNode); + createSingleShardIndexWithContent(indexFast, dataNode2, dataNode); final ActionFuture createFastSnapshot = client().admin().cluster().prepareCreateSnapshot(repoName, "fast-snapshot").setWaitForCompletion(true).execute(); @@ -1258,30 +1258,12 @@ private ActionFuture startFullSnapshotFromMasterClient(S .setWaitForCompletion(true).execute(); } - // Large snapshot pool settings to set up nodes for tests involving multiple repositories that need to have enough - // threads so that blocking some threads on one repository doesn't block other repositories from doing work - private static final Settings LARGE_SNAPSHOT_POOL_SETTINGS = Settings.builder() - .put("thread_pool.snapshot.core", 5).put("thread_pool.snapshot.max", 5).build(); - - private static final Settings SINGLE_SHARD_NO_REPLICA = indexSettingsNoReplicas(1).build(); - - private void createIndexWithContent(String indexName) { - createIndexWithContent(indexName, SINGLE_SHARD_NO_REPLICA); - } - - private void createIndexWithContent(String indexName, String nodeInclude, String nodeExclude) { + private void createSingleShardIndexWithContent(String indexName, String nodeInclude, String nodeExclude) { createIndexWithContent(indexName, indexSettingsNoReplicas(1) .put("index.routing.allocation.include._name", nodeInclude) .put("index.routing.allocation.exclude._name", nodeExclude).build()); } - private void createIndexWithContent(String indexName, Settings indexSettings) { - logger.info("--> creating index [{}]", indexName); - createIndex(indexName, indexSettings); - ensureGreen(indexName); - indexDoc(indexName, "some_id", "foo", "bar"); - } - private static boolean snapshotHasCompletedShard(String snapshot, SnapshotsInProgress snapshotsInProgress) { for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) { if (entry.snapshot().getSnapshotId().getName().equals(snapshot)) { diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index 4a1110bb30aa9..d37867f3cc938 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -405,6 +405,12 @@ public void cloneShardSnapshot(SnapshotId source, SnapshotId target, IndexId ind final BlobContainer shardContainer = shardContainer(index, shardId); final BlobStoreIndexShardSnapshots existingSnapshots = buildBlobStoreIndexShardSnapshots(Collections.emptySet(), shardContainer, shardGeneration).v1(); + for (SnapshotFiles existingSnapshot : existingSnapshots) { + if (existingSnapshot.snapshot().equals(target.getName())) { + throw new RepositoryException(metadata.name(), "Can't create clone of [" + index + "][" + shardId + "] for snapshot [" + + target + "]. A snapshot by that name already exists for this shard."); + } + } final BlobStoreIndexShardSnapshot sourceMeta = loadShardSnapshot(shardContainer, source); final String newGen; if (shardGeneration == null) { diff --git a/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java index 47bccc3ec5b93..1d4bb753b9ace 100644 --- a/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java @@ -88,6 +88,11 @@ public abstract class AbstractSnapshotIntegTestCase extends ESIntegTestCase { private static final String OLD_VERSION_SNAPSHOT_PREFIX = "old-version-snapshot-"; + // Large snapshot pool settings to set up nodes for tests involving multiple repositories that need to have enough + // threads so that blocking some threads on one repository doesn't block other repositories from doing work + protected static final Settings LARGE_SNAPSHOT_POOL_SETTINGS = Settings.builder() + .put("thread_pool.snapshot.core", 5).put("thread_pool.snapshot.max", 5).build(); + @Override protected Settings nodeSettings(int nodeOrdinal) { return Settings.builder().put(super.nodeSettings(nodeOrdinal)) @@ -488,4 +493,17 @@ protected static SnapshotInfo assertSuccessful(ActionFuture creating index [{}]", indexName); + createIndex(indexName, indexSettings); + ensureGreen(indexName); + indexDoc(indexName, "some_id", "foo", "bar"); + } } diff --git a/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java b/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java index 8c84acfc39c08..68959fc1150f7 100644 --- a/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java +++ b/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java @@ -226,7 +226,7 @@ private synchronized boolean blockExecution() { boolean wasBlocked = false; try { while (blockOnDataFiles || blockOnAnyFiles || blockOnWriteIndexFile || - blockAndFailOnWriteSnapFile || blockOnDeleteIndexN) { + blockAndFailOnWriteSnapFile || blockOnDeleteIndexN || blockOnWriteShardLevelMeta) { blocked = true; this.wait(); wasBlocked = true; @@ -406,6 +406,10 @@ public Map listBlobsByPrefix(String blobNamePrefix) throws public void writeBlob(String blobName, InputStream inputStream, long blobSize, boolean failIfAlreadyExists) throws IOException { maybeIOExceptionOrBlock(blobName); + if (blockOnWriteShardLevelMeta && blobName.startsWith(BlobStoreRepository.SNAPSHOT_PREFIX) && + path().equals(basePath()) == false) { + blockExecutionAndMaybeWait(blobName); + } super.writeBlob(blobName, inputStream, blobSize, failIfAlreadyExists); if (RandomizedContext.current().getRandom().nextBoolean()) { // for network based repositories, the blob may have been written but we may still @@ -421,10 +425,6 @@ public void writeBlobAtomic(final String blobName, final InputStream inputStream if (failOnIndexLatest && BlobStoreRepository.INDEX_LATEST_BLOB.equals(blobName)) { throw new IOException("Random IOException"); } - if (blockOnWriteShardLevelMeta && blobName.startsWith(BlobStoreRepository.SNAPSHOT_PREFIX) && - path().equals(basePath()) == false) { - - } if (blobName.startsWith("index-") && blockOnWriteIndexFile) { blockExecutionAndFail(blobName); } From 4c6b0d5fb3335aead8472b2bb4652e8892f07379 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 2 Sep 2020 12:14:26 +0200 Subject: [PATCH 31/94] cleanup rest action --- .../cluster/RestCloneSnapshotAction.java | 27 +++---------------- 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestCloneSnapshotAction.java b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestCloneSnapshotAction.java index d90d1714906e1..a1e7a2fb3c871 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestCloneSnapshotAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestCloneSnapshotAction.java @@ -19,14 +19,9 @@ package org.elasticsearch.rest.action.admin.cluster; -import org.elasticsearch.ElasticsearchGenerationException; import org.elasticsearch.action.admin.cluster.snapshots.clone.CloneSnapshotRequest; import org.elasticsearch.client.node.NodeClient; -import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentFactory; -import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestRequest; @@ -58,30 +53,14 @@ public String getName() { @Override public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException { Map body = request.contentParser().mapOrdered(); + final Object indexSettings = body.get("index_settings"); final CloneSnapshotRequest cloneSnapshotRequest = new CloneSnapshotRequest( request.param("repository"), request.param("source_snapshot"), request.param("target_snapshot"), XContentMapValues.nodeStringArrayValue(body.getOrDefault("indices", Collections.emptyList())), XContentMapValues.nodeStringArrayValue(body.getOrDefault("excluded_settings", Collections.emptyList())), - settings(body.get("index_settings"))); + indexSettings == null ? Settings.EMPTY : + Settings.builder().loadFromMap(XContentMapValues.nodeMapValue(indexSettings, "index_settings")).build()); cloneSnapshotRequest.masterNodeTimeout(request.paramAsTime("master_timeout", cloneSnapshotRequest.masterNodeTimeout())); return channel -> client.admin().cluster().cloneSnapshot(cloneSnapshotRequest, new RestToXContentListener<>(channel)); } - - // TODO: Dry up via https://github.com/elastic/elasticsearch/pull/61778 - @SuppressWarnings("unchecked") - private static Settings settings(Object raw) { - if (raw == null) { - return Settings.EMPTY; - } - if (raw instanceof Map) { - try { - XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); - builder.map((Map) raw); - return Settings.builder().loadFromSource(Strings.toString(builder), XContentType.JSON).build(); - } catch (IOException e) { - throw new ElasticsearchGenerationException("Failed to generate [" + raw + "]", e); - } - } - throw new IllegalArgumentException("[settings] must be a map or empty"); - } } From daeb2fadd9aaf30b2fa83f3ce1d117700fd5c20c Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 2 Sep 2020 12:33:03 +0200 Subject: [PATCH 32/94] shorter diff --- .../snapshots/SnapshotsService.java | 56 +++++++++---------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index a01f27b45f8a1..b9feb82959e5b 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -146,7 +146,7 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus private final ThreadPool threadPool; private final Map>>> snapshotCompletionListeners = - new ConcurrentHashMap<>(); + new ConcurrentHashMap<>(); /** * Listeners for snapshot deletion keyed by delete uuid as returned from {@link SnapshotDeletionsInProgress.Entry#uuid()} @@ -172,7 +172,7 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus * {@link SnapshotsInProgress#entries()} and {@link SnapshotDeletionsInProgress#getEntries()}. */ public static final Setting MAX_CONCURRENT_SNAPSHOT_OPERATIONS_SETTING = - Setting.intSetting("snapshot.max_concurrent_operations", 1000, 1, Setting.Property.NodeScope, Setting.Property.Dynamic); + Setting.intSetting("snapshot.max_concurrent_operations", 1000, 1, Setting.Property.NodeScope, Setting.Property.Dynamic); private volatile int maxConcurrentOperations; @@ -192,7 +192,7 @@ public SnapshotsService(Settings settings, ClusterService clusterService, IndexN clusterService.addLowPriorityApplier(this); maxConcurrentOperations = MAX_CONCURRENT_SNAPSHOT_OPERATIONS_SETTING.get(settings); clusterService.getClusterSettings().addSettingsUpdateConsumer(MAX_CONCURRENT_SNAPSHOT_OPERATIONS_SETTING, - i -> maxConcurrentOperations = i); + i -> maxConcurrentOperations = i); } } @@ -200,12 +200,12 @@ public SnapshotsService(Settings settings, ClusterService clusterService, IndexN * Same as {@link #createSnapshot(CreateSnapshotRequest, ActionListener)} but invokes its callback on completion of * the snapshot. * - * @param request snapshot request + * @param request snapshot request * @param listener snapshot completion listener */ public void executeSnapshot(final CreateSnapshotRequest request, final ActionListener listener) { createSnapshot(request, - ActionListener.wrap(snapshot -> addListener(snapshot, ActionListener.map(listener, Tuple::v2)), listener::onFailure)); + ActionListener.wrap(snapshot -> addListener(snapshot, ActionListener.map(listener, Tuple::v2)), listener::onFailure)); } /** @@ -257,7 +257,7 @@ public ClusterState execute(ClusterState currentState) { currentState.custom(RepositoryCleanupInProgress.TYPE, RepositoryCleanupInProgress.EMPTY); if (repositoryCleanupInProgress.hasCleanupInProgress()) { throw new ConcurrentSnapshotExecutionException(repositoryName, snapshotName, - "cannot snapshot while a repository cleanup is in-progress in [" + repositoryCleanupInProgress + "]"); + "cannot snapshot while a repository cleanup is in-progress in [" + repositoryCleanupInProgress + "]"); } ensureBelowConcurrencyLimit(repositoryName, snapshotName, snapshots, deletionsInProgress); // Store newSnapshot here to be processed in clusterStateProcessed @@ -274,7 +274,7 @@ public ClusterState execute(ClusterState currentState) { .collect(Collectors.toMap(IndexId::getName, Function.identity()))); final Version version = minCompatibleVersion(currentState.nodes().getMinNodeVersion(), repositoryData, null); ImmutableOpenMap shards = shards(snapshots, deletionsInProgress, currentState.metadata(), - currentState.routingTable(), indexIds, useShardGenerations(version), repositoryData, repositoryName); + currentState.routingTable(), indexIds, useShardGenerations(version), repositoryData, repositoryName); if (request.partial() == false) { Set missing = new HashSet<>(); for (ObjectObjectCursor entry : shards) { @@ -546,8 +546,8 @@ private void ensureBelowConcurrencyLimit(String repository, String name, Snapsho * Validates snapshot request * * @param repositoryName repository name - * @param snapshotName snapshot name - * @param state current cluster state + * @param snapshotName snapshot name + * @param state current cluster state */ private static void validate(String repositoryName, String snapshotName, ClusterState state) { RepositoriesMetadata repositoriesMetadata = state.getMetadata().custom(RepositoriesMetadata.TYPE); @@ -1142,7 +1142,7 @@ private void handleFinalizationFailure(Exception e, SnapshotsInProgress.Entry en // Failure due to not being master any more, don't try to remove snapshot from cluster state the next master // will try ending this snapshot again logger.debug(() -> new ParameterizedMessage( - "[{}] failed to update cluster state during snapshot finalization", snapshot), e); + "[{}] failed to update cluster state during snapshot finalization", snapshot), e); failSnapshotCompletionListeners(snapshot, new SnapshotException(snapshot, "Failed to update cluster state during snapshot finalization", e)); failAllListenersOnMasterFailOver(e); @@ -1336,7 +1336,7 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS * @param deletions snapshot deletions to update * @param snapshotIds snapshot ids to remove * @param repository repository that the snapshot ids belong to - * @return updated {@link SnapshotDeletionsInProgress} or {@code null} if unchanged + * @return updated {@link SnapshotDeletionsInProgress} or {@code null} if unchanged */ @Nullable private static SnapshotDeletionsInProgress deletionsWithoutSnapshots(SnapshotDeletionsInProgress deletions, @@ -1620,8 +1620,7 @@ public static boolean useIndexGenerations(Version repositoryMetaVersion) { return repositoryMetaVersion.onOrAfter(INDEX_GEN_IN_REPO_DATA_VERSION); } - /** - * Deletes snapshot from repository + /** Deletes snapshot from repository * * @param deleteEntry delete entry in cluster state * @param minNodeVersion minimum node version in the cluster @@ -1645,8 +1644,7 @@ public void onFailure(Exception e) { }); } - /** - * Deletes snapshot from repository + /** Deletes snapshot from repository * * @param deleteEntry delete entry in cluster state * @param repositoryData the {@link RepositoryData} of the repository to delete from @@ -1973,7 +1971,7 @@ private static ImmutableOpenMap inProgressShards = busyShardsForRepo(repoName, snapshotsInProgress, metadata); final boolean readyToExecute = deletionsInProgress == null || deletionsInProgress.getEntries().stream() - .noneMatch(entry -> entry.repository().equals(repoName) && entry.state() == SnapshotDeletionsInProgress.State.STARTED); + .noneMatch(entry -> entry.repository().equals(repoName) && entry.state() == SnapshotDeletionsInProgress.State.STARTED); for (IndexId index : indices) { final String indexName = index.getName(); final boolean isNewIndex = repositoryData.getIndices().containsKey(indexName) == false; @@ -1990,7 +1988,7 @@ private static ImmutableOpenMap busyShardsForRepo(String repoName, @Nullable SnapshotsInProgress snapshots, Metadata metadata) { @@ -2132,15 +2130,15 @@ protected void doClose() { public boolean assertAllListenersResolved() { final DiscoveryNode localNode = clusterService.localNode(); assert endingSnapshots.isEmpty() : "Found leaked ending snapshots " + endingSnapshots - + " on [" + localNode + "]"; + + " on [" + localNode + "]"; assert snapshotCompletionListeners.isEmpty() : "Found leaked snapshot completion listeners " + snapshotCompletionListeners - + " on [" + localNode + "]"; + + " on [" + localNode + "]"; assert currentlyFinalizing.isEmpty() : "Found leaked finalizations " + currentlyFinalizing - + " on [" + localNode + "]"; + + " on [" + localNode + "]"; assert snapshotDeletionListeners.isEmpty() : "Found leaked snapshot delete listeners " + snapshotDeletionListeners - + " on [" + localNode + "]"; + + " on [" + localNode + "]"; assert repositoryOperations.isEmpty() : "Found leaked snapshots to finalize " + repositoryOperations - + " on [" + localNode + "]"; + + " on [" + localNode + "]"; return true; } From 6999ea22e4a33e3e47374c02ec4948056a9be343 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 2 Sep 2020 13:17:24 +0200 Subject: [PATCH 33/94] bck --- .../reference/snapshot-restore/apis/clone-snapshot-api.asciidoc | 2 +- docs/reference/snapshot-restore/take-snapshot.asciidoc | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/reference/snapshot-restore/apis/clone-snapshot-api.asciidoc b/docs/reference/snapshot-restore/apis/clone-snapshot-api.asciidoc index 43f4b8ef35176..63fafd82e47ab 100644 --- a/docs/reference/snapshot-restore/apis/clone-snapshot-api.asciidoc +++ b/docs/reference/snapshot-restore/apis/clone-snapshot-api.asciidoc @@ -18,7 +18,7 @@ PUT /_snapshot/my_repository/source_snapshot/_clone/target_snapshot } } ---- -// TESTSETUP +// TEST[skip:TODO must add relevant test setup] //// [source,console] diff --git a/docs/reference/snapshot-restore/take-snapshot.asciidoc b/docs/reference/snapshot-restore/take-snapshot.asciidoc index 4adbfce304d67..74b6e49647b1d 100644 --- a/docs/reference/snapshot-restore/take-snapshot.asciidoc +++ b/docs/reference/snapshot-restore/take-snapshot.asciidoc @@ -124,3 +124,5 @@ PUT /_snapshot/my_backup/ PUT /_snapshot/my_backup/%3Csnapshot-%7Bnow%2Fd%7D%3E ----------------------------------- // TEST[continued] + +NOTE: You can also create snapshots that are copies of part of an existing snapshot using the <>. \ No newline at end of file From 94ba717f068aedfdc3793cb713a2f0bde086e7cf Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 2 Sep 2020 13:53:52 +0200 Subject: [PATCH 34/94] docs test --- .../apis/clone-snapshot-api.asciidoc | 9 +---- .../snapshot-restore/take-snapshot.asciidoc | 2 +- .../elasticsearch/action/ActionModule.java | 37 +------------------ .../cluster/RestCloneSnapshotAction.java | 4 +- 4 files changed, 6 insertions(+), 46 deletions(-) diff --git a/docs/reference/snapshot-restore/apis/clone-snapshot-api.asciidoc b/docs/reference/snapshot-restore/apis/clone-snapshot-api.asciidoc index 63fafd82e47ab..60d6ccbc67fb8 100644 --- a/docs/reference/snapshot-restore/apis/clone-snapshot-api.asciidoc +++ b/docs/reference/snapshot-restore/apis/clone-snapshot-api.asciidoc @@ -6,7 +6,6 @@ Clones part or all of a snapshot into a new snapshot. -//// [source,console] ---- PUT /_snapshot/my_repository/source_snapshot/_clone/target_snapshot @@ -18,13 +17,7 @@ PUT /_snapshot/my_repository/source_snapshot/_clone/target_snapshot } } ---- -// TEST[skip:TODO must add relevant test setup] -//// - -[source,console] ----- -PUT /_snapshot/my_repository/source_snapshot/_clone/target_snapshot ----- +// TEST[skip:TODO] [[clone-snapshot-api-request]] ==== {api-request-title} diff --git a/docs/reference/snapshot-restore/take-snapshot.asciidoc b/docs/reference/snapshot-restore/take-snapshot.asciidoc index 74b6e49647b1d..ddc2812dbe280 100644 --- a/docs/reference/snapshot-restore/take-snapshot.asciidoc +++ b/docs/reference/snapshot-restore/take-snapshot.asciidoc @@ -125,4 +125,4 @@ PUT /_snapshot/my_backup/%3Csnapshot-%7Bnow%2Fd%7D%3E ----------------------------------- // TEST[continued] -NOTE: You can also create snapshots that are copies of part of an existing snapshot using the <>. \ No newline at end of file +NOTE: You can also create snapshots that are copies of part of an existing snapshot using the <>. \ No newline at end of file diff --git a/server/src/main/java/org/elasticsearch/action/ActionModule.java b/server/src/main/java/org/elasticsearch/action/ActionModule.java index 8833fcba6ce20..938db93f00157 100644 --- a/server/src/main/java/org/elasticsearch/action/ActionModule.java +++ b/server/src/main/java/org/elasticsearch/action/ActionModule.java @@ -263,41 +263,7 @@ import org.elasticsearch.rest.RestHeaderDefinition; import org.elasticsearch.rest.action.RestFieldCapabilitiesAction; import org.elasticsearch.rest.action.RestMainAction; -import org.elasticsearch.rest.action.admin.cluster.RestAddVotingConfigExclusionAction; -import org.elasticsearch.rest.action.admin.cluster.RestCancelTasksAction; -import org.elasticsearch.rest.action.admin.cluster.RestCleanupRepositoryAction; -import org.elasticsearch.rest.action.admin.cluster.RestClearVotingConfigExclusionsAction; -import org.elasticsearch.rest.action.admin.cluster.RestClusterAllocationExplainAction; -import org.elasticsearch.rest.action.admin.cluster.RestClusterGetSettingsAction; -import org.elasticsearch.rest.action.admin.cluster.RestClusterHealthAction; -import org.elasticsearch.rest.action.admin.cluster.RestClusterRerouteAction; -import org.elasticsearch.rest.action.admin.cluster.RestClusterSearchShardsAction; -import org.elasticsearch.rest.action.admin.cluster.RestClusterStateAction; -import org.elasticsearch.rest.action.admin.cluster.RestClusterStatsAction; -import org.elasticsearch.rest.action.admin.cluster.RestClusterUpdateSettingsAction; -import org.elasticsearch.rest.action.admin.cluster.RestCreateSnapshotAction; -import org.elasticsearch.rest.action.admin.cluster.RestDeleteRepositoryAction; -import org.elasticsearch.rest.action.admin.cluster.RestDeleteSnapshotAction; -import org.elasticsearch.rest.action.admin.cluster.RestDeleteStoredScriptAction; -import org.elasticsearch.rest.action.admin.cluster.RestGetRepositoriesAction; -import org.elasticsearch.rest.action.admin.cluster.RestGetScriptContextAction; -import org.elasticsearch.rest.action.admin.cluster.RestGetScriptLanguageAction; -import org.elasticsearch.rest.action.admin.cluster.RestGetSnapshotsAction; -import org.elasticsearch.rest.action.admin.cluster.RestGetStoredScriptAction; -import org.elasticsearch.rest.action.admin.cluster.RestGetTaskAction; -import org.elasticsearch.rest.action.admin.cluster.RestListTasksAction; -import org.elasticsearch.rest.action.admin.cluster.RestNodesHotThreadsAction; -import org.elasticsearch.rest.action.admin.cluster.RestNodesInfoAction; -import org.elasticsearch.rest.action.admin.cluster.RestNodesStatsAction; -import org.elasticsearch.rest.action.admin.cluster.RestNodesUsageAction; -import org.elasticsearch.rest.action.admin.cluster.RestPendingClusterTasksAction; -import org.elasticsearch.rest.action.admin.cluster.RestPutRepositoryAction; -import org.elasticsearch.rest.action.admin.cluster.RestPutStoredScriptAction; -import org.elasticsearch.rest.action.admin.cluster.RestReloadSecureSettingsAction; -import org.elasticsearch.rest.action.admin.cluster.RestRemoteClusterInfoAction; -import org.elasticsearch.rest.action.admin.cluster.RestRestoreSnapshotAction; -import org.elasticsearch.rest.action.admin.cluster.RestSnapshotsStatusAction; -import org.elasticsearch.rest.action.admin.cluster.RestVerifyRepositoryAction; +import org.elasticsearch.rest.action.admin.cluster.*; import org.elasticsearch.rest.action.admin.cluster.dangling.RestDeleteDanglingIndexAction; import org.elasticsearch.rest.action.admin.cluster.dangling.RestImportDanglingIndexAction; import org.elasticsearch.rest.action.admin.cluster.dangling.RestListDanglingIndicesAction; @@ -662,6 +628,7 @@ public void initRestHandlers(Supplier nodesInCluster) { registerHandler.accept(new RestCleanupRepositoryAction()); registerHandler.accept(new RestGetSnapshotsAction()); registerHandler.accept(new RestCreateSnapshotAction()); + registerHandler.accept(new RestCloneSnapshotAction()); registerHandler.accept(new RestRestoreSnapshotAction()); registerHandler.accept(new RestDeleteSnapshotAction()); registerHandler.accept(new RestSnapshotsStatusAction()); diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestCloneSnapshotAction.java b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestCloneSnapshotAction.java index a1e7a2fb3c871..adb0815f95cb2 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestCloneSnapshotAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestCloneSnapshotAction.java @@ -42,7 +42,7 @@ public class RestCloneSnapshotAction extends BaseRestHandler { @Override public List routes() { return List.of( - new Route(PUT, "/_snapshot/{repository}/{source_snapshot}/_clone/{target_snapshot}")); + new Route(PUT, "/_snapshot/{repository}/{snapshot}/_clone/{target_snapshot}")); } @Override @@ -55,7 +55,7 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC Map body = request.contentParser().mapOrdered(); final Object indexSettings = body.get("index_settings"); final CloneSnapshotRequest cloneSnapshotRequest = new CloneSnapshotRequest( - request.param("repository"), request.param("source_snapshot"), request.param("target_snapshot"), + request.param("repository"), request.param("snapshot"), request.param("target_snapshot"), XContentMapValues.nodeStringArrayValue(body.getOrDefault("indices", Collections.emptyList())), XContentMapValues.nodeStringArrayValue(body.getOrDefault("excluded_settings", Collections.emptyList())), indexSettings == null ? Settings.EMPTY : From 96f2617292d9dca33630d28a879bb5fe5dae9bae Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 2 Sep 2020 14:21:42 +0200 Subject: [PATCH 35/94] one more case handled --- .../snapshots/CloneSnapshotIT.java | 28 ++++ .../elasticsearch/action/ActionModule.java | 37 ++++- .../snapshots/SnapshotsService.java | 154 ++++++++++-------- 3 files changed, 147 insertions(+), 72 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index 013895fd2f6c1..6e4b98b62e237 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -157,6 +157,34 @@ public void testLongRunningCloneAllowsConcurrentSnapshot() throws Exception { assertAcked(cloneFuture.get()); } + public void testLongRunningSnapshotAllowsConcurrentClone() throws Exception { + internalCluster().startMasterOnlyNode(); + final String dataNode = internalCluster().startDataOnlyNode(); + final String repoName = "test-repo"; + createRepository(repoName, "mock"); + final String indexSlow = "index-slow"; + createSingleShardIndexWithContent(indexSlow); + + final String sourceSnapshot = "source-snapshot"; + createFullSnapshot(repoName, sourceSnapshot); + + final String indexFast = "index-fast"; + createIndexWithRandomDocs(indexFast, randomIntBetween(20, 100)); + + blockDataNode(repoName, dataNode); + final ActionFuture snapshotFuture = client().admin().cluster() + .prepareCreateSnapshot(repoName, "fast-snapshot").setIndices(indexFast).setWaitForCompletion(true).execute(); + waitForBlock(dataNode, repoName, TimeValue.timeValueSeconds(30L)); + + final String targetSnapshot = "target-snapshot"; + assertAcked(client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexSlow).get()); + + assertThat(snapshotFuture.isDone(), is(false)); + unblockNode(repoName, dataNode); + + assertSuccessful(snapshotFuture); + } + public void testDeletePreventsClone() throws Exception { final String masterName = internalCluster().startMasterOnlyNode(); internalCluster().startDataOnlyNode(); diff --git a/server/src/main/java/org/elasticsearch/action/ActionModule.java b/server/src/main/java/org/elasticsearch/action/ActionModule.java index 938db93f00157..5260d40bd536f 100644 --- a/server/src/main/java/org/elasticsearch/action/ActionModule.java +++ b/server/src/main/java/org/elasticsearch/action/ActionModule.java @@ -263,7 +263,42 @@ import org.elasticsearch.rest.RestHeaderDefinition; import org.elasticsearch.rest.action.RestFieldCapabilitiesAction; import org.elasticsearch.rest.action.RestMainAction; -import org.elasticsearch.rest.action.admin.cluster.*; +import org.elasticsearch.rest.action.admin.cluster.RestAddVotingConfigExclusionAction; +import org.elasticsearch.rest.action.admin.cluster.RestCancelTasksAction; +import org.elasticsearch.rest.action.admin.cluster.RestCleanupRepositoryAction; +import org.elasticsearch.rest.action.admin.cluster.RestClearVotingConfigExclusionsAction; +import org.elasticsearch.rest.action.admin.cluster.RestCloneSnapshotAction; +import org.elasticsearch.rest.action.admin.cluster.RestClusterAllocationExplainAction; +import org.elasticsearch.rest.action.admin.cluster.RestClusterGetSettingsAction; +import org.elasticsearch.rest.action.admin.cluster.RestClusterHealthAction; +import org.elasticsearch.rest.action.admin.cluster.RestClusterRerouteAction; +import org.elasticsearch.rest.action.admin.cluster.RestClusterSearchShardsAction; +import org.elasticsearch.rest.action.admin.cluster.RestClusterStateAction; +import org.elasticsearch.rest.action.admin.cluster.RestClusterStatsAction; +import org.elasticsearch.rest.action.admin.cluster.RestClusterUpdateSettingsAction; +import org.elasticsearch.rest.action.admin.cluster.RestCreateSnapshotAction; +import org.elasticsearch.rest.action.admin.cluster.RestDeleteRepositoryAction; +import org.elasticsearch.rest.action.admin.cluster.RestDeleteSnapshotAction; +import org.elasticsearch.rest.action.admin.cluster.RestDeleteStoredScriptAction; +import org.elasticsearch.rest.action.admin.cluster.RestGetRepositoriesAction; +import org.elasticsearch.rest.action.admin.cluster.RestGetScriptContextAction; +import org.elasticsearch.rest.action.admin.cluster.RestGetScriptLanguageAction; +import org.elasticsearch.rest.action.admin.cluster.RestGetSnapshotsAction; +import org.elasticsearch.rest.action.admin.cluster.RestGetStoredScriptAction; +import org.elasticsearch.rest.action.admin.cluster.RestGetTaskAction; +import org.elasticsearch.rest.action.admin.cluster.RestListTasksAction; +import org.elasticsearch.rest.action.admin.cluster.RestNodesHotThreadsAction; +import org.elasticsearch.rest.action.admin.cluster.RestNodesInfoAction; +import org.elasticsearch.rest.action.admin.cluster.RestNodesStatsAction; +import org.elasticsearch.rest.action.admin.cluster.RestNodesUsageAction; +import org.elasticsearch.rest.action.admin.cluster.RestPendingClusterTasksAction; +import org.elasticsearch.rest.action.admin.cluster.RestPutRepositoryAction; +import org.elasticsearch.rest.action.admin.cluster.RestPutStoredScriptAction; +import org.elasticsearch.rest.action.admin.cluster.RestReloadSecureSettingsAction; +import org.elasticsearch.rest.action.admin.cluster.RestRemoteClusterInfoAction; +import org.elasticsearch.rest.action.admin.cluster.RestRestoreSnapshotAction; +import org.elasticsearch.rest.action.admin.cluster.RestSnapshotsStatusAction; +import org.elasticsearch.rest.action.admin.cluster.RestVerifyRepositoryAction; import org.elasticsearch.rest.action.admin.cluster.dangling.RestDeleteDanglingIndexAction; import org.elasticsearch.rest.action.admin.cluster.dangling.RestImportDanglingIndexAction; import org.elasticsearch.rest.action.admin.cluster.dangling.RestListDanglingIndicesAction; diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index b9feb82959e5b..f271b94ca6877 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -269,9 +269,7 @@ public ClusterState execute(ClusterState currentState) { logger.trace("[{}][{}] creating snapshot for indices [{}]", repositoryName, snapshotName, indices); final List indexIds = repositoryData.resolveNewIndices( - indices, runningSnapshots.stream().filter(entry -> entry.repository().equals(repositoryName)) - .flatMap(entry -> entry.indices().stream()).distinct() - .collect(Collectors.toMap(IndexId::getName, Function.identity()))); + indices, getInFlightIndexIds(runningSnapshots, repositoryName)); final Version version = minCompatibleVersion(currentState.nodes().getMinNodeVersion(), repositoryData, null); ImmutableOpenMap shards = shards(snapshots, deletionsInProgress, currentState.metadata(), currentState.routingTable(), indexIds, useShardGenerations(version), repositoryData, repositoryName); @@ -321,6 +319,12 @@ public TimeValue timeout() { }, "create_snapshot [" + snapshotName + ']', listener::onFailure); } + private static Map getInFlightIndexIds(List runningSnapshots, String repositoryName) { + return runningSnapshots.stream().filter(entry -> entry.repository().equals(repositoryName)) + .flatMap(entry -> entry.indices().stream()).distinct() + .collect(Collectors.toMap(IndexId::getName, Function.identity())); + } + public void cloneSnapshot(CloneSnapshotRequest request, ActionListener listener) { final String repositoryName = request.repository(); final String snapshotName = indexNameExpressionResolver.resolveDateMathExpression(request.target()); @@ -398,7 +402,7 @@ public void onFailure(String source, Exception e) { public void clusterStateProcessed(String source, ClusterState oldState, final ClusterState newState) { logger.info("snapshot clone [{}] started", snapshot); addListener(snapshot, ActionListener.wrap(r -> listener.onResponse(null), listener::onFailure)); - startCloning(repository, repositoryData, newEntry); + startCloning(repository, newEntry); } @Override @@ -412,89 +416,97 @@ public TimeValue timeout() { * Determine the number of shards in each index of a clone operation and update the cluster state accordingly. * * @param repository repository to run operation on - * @param repositoryData repository data at the time the clone operation was started * @param cloneEntry clone operation in the cluster state */ - private void startCloning(Repository repository, RepositoryData repositoryData, SnapshotsInProgress.Entry cloneEntry) { + private void startCloning(Repository repository, SnapshotsInProgress.Entry cloneEntry) { final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); final List indices = cloneEntry.indices(); final GroupedActionListener> shardCountListener = new GroupedActionListener<>( - ActionListener.wrap(counts -> { - clusterService.submitStateUpdateTask("start snapshot clone", new ClusterStateUpdateTask() { - - private SnapshotsInProgress.Entry updatedEntry; - - @Override - public ClusterState execute(ClusterState currentState) { - final SnapshotsInProgress snapshotsInProgress = - currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); - final List updatedEntries = new ArrayList<>(snapshotsInProgress.entries()); - boolean changed = false; - for (int i = 0; i < updatedEntries.size(); i++) { - if (cloneEntry.equals(updatedEntries.get(i))) { - final ImmutableOpenMap.Builder clonesBuilder = - ImmutableOpenMap.builder(); - final Set busyShards = busyShardsForRepo( - repository.getMetadata().name(), snapshotsInProgress, currentState.metadata()); - final Set busyShardsInRepo = busyShards.stream().map(shardId -> - SnapshotsInProgress.repoShardId( - repositoryData.resolveIndexId(shardId.getIndexName()), shardId.getId())) - .collect(Collectors.toSet()); - for (Tuple count : counts) { - for (int shardId = 0; shardId < count.v2(); shardId++) { - final RepoShardId repoShardId = SnapshotsInProgress.repoShardId(count.v1(), shardId); - if (busyShardsInRepo.contains(repoShardId)) { - clonesBuilder.put(repoShardId, ShardSnapshotStatus.UNASSIGNED_QUEUED); - } else { - clonesBuilder.put(repoShardId, - new ShardSnapshotStatus(currentState.nodes().getLocalNodeId(), - repositoryData.shardGenerations().getShardGen(count.v1(), shardId))); + ActionListener.wrap(counts -> repository.executeConsistentStateUpdate( + repositoryData -> new ClusterStateUpdateTask() { + + private SnapshotsInProgress.Entry updatedEntry; + + @Override + public ClusterState execute(ClusterState currentState) { + final SnapshotsInProgress snapshotsInProgress = + currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); + final List updatedEntries = new ArrayList<>(snapshotsInProgress.entries()); + boolean changed = false; + final Map inFlightIndexIds = + getInFlightIndexIds(updatedEntries, repository.getMetadata().name()); + for (int i = 0; i < updatedEntries.size(); i++) { + if (cloneEntry.equals(updatedEntries.get(i))) { + final ImmutableOpenMap.Builder clonesBuilder = + ImmutableOpenMap.builder(); + final Set busyShards = busyShardsForRepo( + repository.getMetadata().name(), snapshotsInProgress, currentState.metadata()); + final Set busyShardsInRepo = busyShards + .stream() + .map(shardId -> SnapshotsInProgress.repoShardId( + inFlightIndexIds.get(shardId.getIndexName()), shardId.getId())) + .collect(Collectors.toSet()); + for (Tuple count : counts) { + for (int shardId = 0; shardId < count.v2(); shardId++) { + final RepoShardId repoShardId = SnapshotsInProgress.repoShardId(count.v1(), shardId); + if (busyShardsInRepo.contains(repoShardId)) { + clonesBuilder.put(repoShardId, ShardSnapshotStatus.UNASSIGNED_QUEUED); + } else { + clonesBuilder.put(repoShardId, + new ShardSnapshotStatus(currentState.nodes().getLocalNodeId(), + repositoryData.shardGenerations().getShardGen(count.v1(), shardId))); + } } } + updatedEntry = cloneEntry.withClones(clonesBuilder.build()); + updatedEntries.set(i, updatedEntry); + changed = true; + break; } - updatedEntry = cloneEntry.withClones(clonesBuilder.build()); - updatedEntries.set(i, updatedEntry); - changed = true; - break; } + return updateWithSnapshots( + currentState, changed ? SnapshotsInProgress.of(updatedEntries) : null, null); } - return updateWithSnapshots( - currentState, changed ? SnapshotsInProgress.of(updatedEntries) : null, null); - } - @Override - public void onFailure(String source, Exception e) { - logger.info(() -> new ParameterizedMessage("Failed to start snapshot clone [{}]", cloneEntry), e); - failAllListenersOnMasterFailOver(e); - } + @Override + public void onFailure(String source, Exception e) { + logger.info(() -> new ParameterizedMessage("Failed to start snapshot clone [{}]", cloneEntry), e); + failAllListenersOnMasterFailOver(e); + } - @Override - public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { - if (updatedEntry != null) { - final Snapshot target = updatedEntry.snapshot(); - final SnapshotId sourceSnapshot = updatedEntry.source(); - for (ObjectObjectCursor indexClone : updatedEntry.clones()) { - final IndexId indexId = repositoryData.resolveIndexId(indexClone.key.indexName()); - final ShardSnapshotStatus shardStatusBefore = indexClone.value; - if (shardStatusBefore.state() != ShardState.INIT) { - continue; + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + if (updatedEntry != null) { + final Snapshot target = updatedEntry.snapshot(); + final SnapshotId sourceSnapshot = updatedEntry.source(); + for (ObjectObjectCursor indexClone : updatedEntry.clones()) { + final ShardSnapshotStatus shardStatusBefore = indexClone.value; + if (shardStatusBefore.state() != ShardState.INIT) { + continue; + } + final RepoShardId repoShardId = indexClone.key; + runReadyClone(target, sourceSnapshot, shardStatusBefore, repoShardId, repository); } - final RepoShardId repoShardId = indexClone.key; - runReadyClone(target, sourceSnapshot, shardStatusBefore, repoShardId, repository); + } else { + // TODO: this is broken, we should error somehow maybe } - } else { - // TODO: this is broken, we should error somehow maybe } - } - }); - }, e -> removeFailedSnapshotFromClusterState(cloneEntry.snapshot(), e, repositoryData)), indices.size()); + }, "start snapshot clone", e -> { + // TODO: handle + throw new AssertionError(e); + }), e -> removeFailedSnapshotFromClusterState(cloneEntry.snapshot(), e, null)), indices.size()); final SnapshotId sourceSnapshot = cloneEntry.source(); - for (IndexId index : cloneEntry.indices()) { - executor.execute(ActionRunnable.supply(shardCountListener, () -> { - final IndexMetadata metadata = repository.getSnapshotIndexMetaData(repositoryData, sourceSnapshot, index); - return Tuple.tuple(index, metadata.getNumberOfShards()); - })); - } + repository.getRepositoryData(ActionListener.wrap(repositoryData -> { + for (IndexId index : cloneEntry.indices()) { + executor.execute(ActionRunnable.supply(shardCountListener, () -> { + final IndexMetadata metadata = repository.getSnapshotIndexMetaData(repositoryData, sourceSnapshot, index); + return Tuple.tuple(index, metadata.getNumberOfShards()); + })); + } + }, e -> { + // TODO: handle + throw new AssertionError(e); + })); } private final Set currentlyCloning = Collections.synchronizedSet(new HashSet<>()); From 09de3d62cfa96e1ff29504d765b1b99c2d71621a Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 2 Sep 2020 15:04:36 +0200 Subject: [PATCH 36/94] error handling --- .../snapshots/SnapshotsService.java | 151 +++++++++--------- 1 file changed, 75 insertions(+), 76 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index f271b94ca6877..84e8955c21e6a 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -337,8 +337,7 @@ public void cloneSnapshot(CloneSnapshotRequest request, ActionListener lis } final SnapshotId snapshotId = new SnapshotId(snapshotName, UUIDs.randomBase64UUID()); final Snapshot snapshot = new Snapshot(repositoryName, snapshotId); - // TODO: Clone DS? (probably no, not relevant for searchable snapshots ...) - // TODO: throw when no indices match + // TODO: throw when no indices match or source snapshot was not successful for the matching indices repository.executeConsistentStateUpdate(repositoryData -> new ClusterStateUpdateTask() { private SnapshotsInProgress.Entry newEntry; @@ -394,7 +393,7 @@ public ClusterState execute(ClusterState currentState) { @Override public void onFailure(String source, Exception e) { - logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to create snapshot", repositoryName, snapshotName), e); + logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to clone snapshot", repositoryName, snapshotName), e); listener.onFailure(e); } @@ -422,79 +421,80 @@ private void startCloning(Repository repository, SnapshotsInProgress.Entry clone final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); final List indices = cloneEntry.indices(); final GroupedActionListener> shardCountListener = new GroupedActionListener<>( - ActionListener.wrap(counts -> repository.executeConsistentStateUpdate( - repositoryData -> new ClusterStateUpdateTask() { - - private SnapshotsInProgress.Entry updatedEntry; - - @Override - public ClusterState execute(ClusterState currentState) { - final SnapshotsInProgress snapshotsInProgress = - currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); - final List updatedEntries = new ArrayList<>(snapshotsInProgress.entries()); - boolean changed = false; - final Map inFlightIndexIds = - getInFlightIndexIds(updatedEntries, repository.getMetadata().name()); - for (int i = 0; i < updatedEntries.size(); i++) { - if (cloneEntry.equals(updatedEntries.get(i))) { - final ImmutableOpenMap.Builder clonesBuilder = - ImmutableOpenMap.builder(); - final Set busyShards = busyShardsForRepo( - repository.getMetadata().name(), snapshotsInProgress, currentState.metadata()); - final Set busyShardsInRepo = busyShards - .stream() - .map(shardId -> SnapshotsInProgress.repoShardId( - inFlightIndexIds.get(shardId.getIndexName()), shardId.getId())) - .collect(Collectors.toSet()); - for (Tuple count : counts) { - for (int shardId = 0; shardId < count.v2(); shardId++) { - final RepoShardId repoShardId = SnapshotsInProgress.repoShardId(count.v1(), shardId); - if (busyShardsInRepo.contains(repoShardId)) { - clonesBuilder.put(repoShardId, ShardSnapshotStatus.UNASSIGNED_QUEUED); - } else { - clonesBuilder.put(repoShardId, - new ShardSnapshotStatus(currentState.nodes().getLocalNodeId(), - repositoryData.shardGenerations().getShardGen(count.v1(), shardId))); - } - } + ActionListener.wrap(counts -> repository.executeConsistentStateUpdate(repositoryData -> new ClusterStateUpdateTask() { + + private SnapshotsInProgress.Entry updatedEntry; + + @Override + public ClusterState execute(ClusterState currentState) { + final SnapshotsInProgress snapshotsInProgress = + currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); + final List updatedEntries = new ArrayList<>(snapshotsInProgress.entries()); + boolean changed = false; + final Map inFlightIndexIds = + getInFlightIndexIds(updatedEntries, repository.getMetadata().name()); + final ShardGenerations shardGenerations = repositoryData.shardGenerations(); + for (int i = 0; i < updatedEntries.size(); i++) { + if (cloneEntry.equals(updatedEntries.get(i))) { + final ImmutableOpenMap.Builder clonesBuilder = + ImmutableOpenMap.builder(); + final Set busyShards = busyShardsForRepo( + repository.getMetadata().name(), snapshotsInProgress, currentState.metadata()); + final Set busyShardsInRepo = busyShards + .stream() + .map(shardId -> SnapshotsInProgress.repoShardId( + inFlightIndexIds.get(shardId.getIndexName()), shardId.getId())) + .collect(Collectors.toSet()); + for (Tuple count : counts) { + for (int shardId = 0; shardId < count.v2(); shardId++) { + final RepoShardId repoShardId = SnapshotsInProgress.repoShardId(count.v1(), shardId); + if (busyShardsInRepo.contains(repoShardId)) { + clonesBuilder.put(repoShardId, ShardSnapshotStatus.UNASSIGNED_QUEUED); + } else { + clonesBuilder.put(repoShardId, new ShardSnapshotStatus(currentState.nodes().getLocalNodeId(), + shardGenerations.getShardGen(count.v1(), shardId))); } - updatedEntry = cloneEntry.withClones(clonesBuilder.build()); - updatedEntries.set(i, updatedEntry); - changed = true; - break; } } - return updateWithSnapshots( - currentState, changed ? SnapshotsInProgress.of(updatedEntries) : null, null); + updatedEntry = cloneEntry.withClones(clonesBuilder.build()); + updatedEntries.set(i, updatedEntry); + changed = true; + break; } + } + return updateWithSnapshots( + currentState, changed ? SnapshotsInProgress.of(updatedEntries) : null, null); + } - @Override - public void onFailure(String source, Exception e) { - logger.info(() -> new ParameterizedMessage("Failed to start snapshot clone [{}]", cloneEntry), e); - failAllListenersOnMasterFailOver(e); - } + @Override + public void onFailure(String source, Exception e) { + logger.info(() -> new ParameterizedMessage("Failed to start snapshot clone [{}]", cloneEntry), e); + failAllListenersOnMasterFailOver(e); + } - @Override - public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { - if (updatedEntry != null) { - final Snapshot target = updatedEntry.snapshot(); - final SnapshotId sourceSnapshot = updatedEntry.source(); - for (ObjectObjectCursor indexClone : updatedEntry.clones()) { - final ShardSnapshotStatus shardStatusBefore = indexClone.value; - if (shardStatusBefore.state() != ShardState.INIT) { - continue; - } - final RepoShardId repoShardId = indexClone.key; - runReadyClone(target, sourceSnapshot, shardStatusBefore, repoShardId, repository); - } - } else { - // TODO: this is broken, we should error somehow maybe + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + if (updatedEntry != null) { + final Snapshot target = updatedEntry.snapshot(); + final SnapshotId sourceSnapshot = updatedEntry.source(); + for (ObjectObjectCursor indexClone : updatedEntry.clones()) { + final ShardSnapshotStatus shardStatusBefore = indexClone.value; + if (shardStatusBefore.state() != ShardState.INIT) { + continue; } + final RepoShardId repoShardId = indexClone.key; + runReadyClone(target, sourceSnapshot, shardStatusBefore, repoShardId, repository); } - }, "start snapshot clone", e -> { - // TODO: handle - throw new AssertionError(e); - }), e -> removeFailedSnapshotFromClusterState(cloneEntry.snapshot(), e, null)), indices.size()); + } else { + // Extremely unlikely corner case of master failing over between between starting the clone and starting + // shard clones. + logger.warn("Did not find expected entry [{}] in the cluster state", cloneEntry); + } + } + }, "start snapshot clone", e -> { + // TODO: handle + throw new AssertionError(e); + }), e -> removeFailedSnapshotFromClusterState(cloneEntry.snapshot(), e, null)), indices.size()); final SnapshotId sourceSnapshot = cloneEntry.source(); repository.getRepositoryData(ActionListener.wrap(repositoryData -> { for (IndexId index : cloneEntry.indices()) { @@ -520,24 +520,23 @@ private void runReadyClone(Snapshot target, SnapshotId sourceSnapshot, generation -> innerUpdateSnapshotState( new ShardSnapshotUpdate(target, repoShardId, null, - new ShardSnapshotStatus(clusterService.localNode().getId(), - ShardState.SUCCESS, generation)), + new ShardSnapshotStatus(clusterService.localNode().getId(), ShardState.SUCCESS, generation)), ActionListener.wrap( v -> { currentlyCloning.remove(repoShardId); - logger.trace( - "Marked [{}] as successfully cloned from [{}] to [{}]", - repoShardId, sourceSnapshot, targetSnapshot); + logger.trace("Marked [{}] as successfully cloned from [{}] to [{}]", repoShardId, + sourceSnapshot, targetSnapshot); }, e -> { currentlyCloning.remove(repoShardId); - //TODO: Error handling - throw new AssertionError(e); + logger.warn( + "Cluster state update after successful shard clone [{}] failed", repoShardId); + failAllListenersOnMasterFailOver(e); } )), e -> { currentlyCloning.remove(repoShardId); - //TODO: Error handling + // TODO: error handling, cleanup clone right away on partial failure? throw new AssertionError(e); })); } From 9db152ff741cbcdbe6c8847be6b06a9e9b47c4e6 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 3 Sep 2020 13:51:15 +0200 Subject: [PATCH 37/94] more fixes --- .../snapshots/CloneSnapshotIT.java | 38 +++ .../snapshots/SnapshotsService.java | 233 +++++++++--------- 2 files changed, 156 insertions(+), 115 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index 6e4b98b62e237..a40ea3f45f653 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -30,6 +30,7 @@ import org.elasticsearch.snapshots.mockstore.MockRepository; import org.elasticsearch.test.ESIntegTestCase; +import java.util.ArrayList; import java.util.List; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; @@ -214,6 +215,43 @@ public void testDeletePreventsClone() throws Exception { assertAcked(deleteFuture.get()); } + public void testBackToBackClonesForIndexNotInCluster() throws Exception { + // large snapshot pool so blocked snapshot threads from cloning don't prevent concurrent snapshot finalizations + final String masterNode = internalCluster().startMasterOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS); + internalCluster().startDataOnlyNode(); + final String repoName = "test-repo"; + createRepository(repoName, "mock"); + final String indexBlocked = "index-blocked"; + createSingleShardIndexWithContent(indexBlocked); + + final String sourceSnapshot = "source-snapshot"; + createFullSnapshot(repoName, sourceSnapshot); + + assertAcked(client().admin().indices().prepareDelete(indexBlocked).get()); + + final String targetSnapshot1 = "target-snapshot"; + blockMasterOnShardClone(repoName); + final ActionFuture cloneFuture1 = client().admin().cluster() + .prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot1).setIndices(indexBlocked).execute(); + waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L)); + assertThat(cloneFuture1.isDone(), is(false)); + + final int extraClones = randomIntBetween(1, 5); + final List> extraCloneFutures = new ArrayList<>(extraClones); + for (int i = 0; i < extraClones; i++) { + extraCloneFutures.add(client().admin().cluster() + .prepareCloneSnapshot(repoName, sourceSnapshot, "target-snapshot-" + i).setIndices(indexBlocked).execute()); + } + + awaitNSnapshotsInProgress(1 + extraClones); + + unblockNode(repoName, masterNode); + assertAcked(cloneFuture1.get()); + for (ActionFuture extraCloneFuture : extraCloneFutures) { + assertAcked(extraCloneFuture.get()); + } + } + @AwaitsFix(bugUrl = "TODO if we want it") public void testCloneSnapshotWithIndexSettingUpdates() throws Exception { internalCluster().startMasterOnlyNode(); diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 84e8955c21e6a..8966e857fcda6 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -109,6 +109,7 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.Executor; +import java.util.function.Consumer; import java.util.function.Function; import java.util.function.Predicate; import java.util.stream.Collectors; @@ -236,29 +237,14 @@ public void createSnapshot(final CreateSnapshotRequest request, final ActionList @Override public ClusterState execute(ClusterState currentState) { - // check if the snapshot name already exists in the repository - if (repositoryData.getSnapshotIds().stream().anyMatch(s -> s.getName().equals(snapshotName))) { - throw new InvalidSnapshotNameException( - repository.getMetadata().name(), snapshotName, "snapshot with the same name already exists"); - } + ensureSnapshotNameAvailableInRepo(repositoryData, snapshotName, repository); final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); final List runningSnapshots = snapshots.entries(); - if (runningSnapshots.stream().anyMatch(s -> { - final Snapshot running = s.snapshot(); - return running.getRepository().equals(repositoryName) && running.getSnapshotId().getName().equals(snapshotName); - })) { - throw new InvalidSnapshotNameException( - repository.getMetadata().name(), snapshotName, "snapshot with the same name is already in-progress"); - } + ensureSnapshotNameNotRunning(runningSnapshots, repositoryName, snapshotName); validate(repositoryName, snapshotName, currentState); final SnapshotDeletionsInProgress deletionsInProgress = currentState.custom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY); - final RepositoryCleanupInProgress repositoryCleanupInProgress = - currentState.custom(RepositoryCleanupInProgress.TYPE, RepositoryCleanupInProgress.EMPTY); - if (repositoryCleanupInProgress.hasCleanupInProgress()) { - throw new ConcurrentSnapshotExecutionException(repositoryName, snapshotName, - "cannot snapshot while a repository cleanup is in-progress in [" + repositoryCleanupInProgress + "]"); - } + ensureNoCleanupInProgress(currentState, repositoryName, snapshotName); ensureBelowConcurrencyLimit(repositoryName, snapshotName, snapshots, deletionsInProgress); // Store newSnapshot here to be processed in clusterStateProcessed List indices = Arrays.asList(indexNameExpressionResolver.concreteIndexNames(currentState, request)); @@ -319,6 +305,16 @@ public TimeValue timeout() { }, "create_snapshot [" + snapshotName + ']', listener::onFailure); } + private static void ensureSnapshotNameNotRunning(List runningSnapshots, String repositoryName, + String snapshotName) { + if (runningSnapshots.stream().anyMatch(s -> { + final Snapshot running = s.snapshot(); + return running.getRepository().equals(repositoryName) && running.getSnapshotId().getName().equals(snapshotName); + })) { + throw new InvalidSnapshotNameException(repositoryName, snapshotName, "snapshot with the same name is already in-progress"); + } + } + private static Map getInFlightIndexIds(List runningSnapshots, String repositoryName) { return runningSnapshots.stream().filter(entry -> entry.repository().equals(repositoryName)) .flatMap(entry -> entry.indices().stream()).distinct() @@ -344,26 +340,11 @@ public void cloneSnapshot(CloneSnapshotRequest request, ActionListener lis @Override public ClusterState execute(ClusterState currentState) { - // check if the snapshot name already exists in the repository - if (repositoryData.getSnapshotIds().stream().anyMatch(s -> s.getName().equals(snapshotName))) { - throw new InvalidSnapshotNameException( - repository.getMetadata().name(), snapshotName, "snapshot with the same name already exists"); - } - final RepositoryCleanupInProgress repositoryCleanupInProgress = - currentState.custom(RepositoryCleanupInProgress.TYPE, RepositoryCleanupInProgress.EMPTY); - if (repositoryCleanupInProgress.hasCleanupInProgress()) { - throw new ConcurrentSnapshotExecutionException(repositoryName, snapshotName, - "cannot snapshot while a repository cleanup is in-progress in [" + repositoryCleanupInProgress + "]"); - } + ensureSnapshotNameAvailableInRepo(repositoryData, snapshotName, repository); + ensureNoCleanupInProgress(currentState, repositoryName, snapshotName); final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); final List runningSnapshots = snapshots.entries(); - if (runningSnapshots.stream().anyMatch(s -> { - final Snapshot running = s.snapshot(); - return running.getRepository().equals(repositoryName) && running.getSnapshotId().getName().equals(snapshotName); - })) { - throw new InvalidSnapshotNameException( - repository.getMetadata().name(), snapshotName, "snapshot with the same name is already in-progress"); - } + ensureSnapshotNameNotRunning(runningSnapshots, repositoryName, snapshotName); validate(repositoryName, snapshotName, currentState); final SnapshotId sourceSnapshotId = repositoryData.getSnapshotIds() @@ -411,6 +392,23 @@ public TimeValue timeout() { }, "clone_snapshot [" + request.source() + "][" + snapshotName + ']', listener::onFailure); } + private static void ensureNoCleanupInProgress(ClusterState currentState, String repositoryName, String snapshotName) { + final RepositoryCleanupInProgress repositoryCleanupInProgress = + currentState.custom(RepositoryCleanupInProgress.TYPE, RepositoryCleanupInProgress.EMPTY); + if (repositoryCleanupInProgress.hasCleanupInProgress()) { + throw new ConcurrentSnapshotExecutionException(repositoryName, snapshotName, + "cannot snapshot while a repository cleanup is in-progress in [" + repositoryCleanupInProgress + "]"); + } + } + + private static void ensureSnapshotNameAvailableInRepo(RepositoryData repositoryData, String snapshotName, Repository repository) { + // check if the snapshot name already exists in the repository + if (repositoryData.getSnapshotIds().stream().anyMatch(s -> s.getName().equals(snapshotName))) { + throw new InvalidSnapshotNameException( + repository.getMetadata().name(), snapshotName, "snapshot with the same name already exists"); + } + } + /** * Determine the number of shards in each index of a clone operation and update the cluster state accordingly. * @@ -420,93 +418,93 @@ public TimeValue timeout() { private void startCloning(Repository repository, SnapshotsInProgress.Entry cloneEntry) { final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); final List indices = cloneEntry.indices(); - final GroupedActionListener> shardCountListener = new GroupedActionListener<>( - ActionListener.wrap(counts -> repository.executeConsistentStateUpdate(repositoryData -> new ClusterStateUpdateTask() { + final StepListener>> allShardCountsListener = new StepListener<>(); - private SnapshotsInProgress.Entry updatedEntry; + final GroupedActionListener> shardCountListener = + new GroupedActionListener<>(allShardCountsListener, indices.size()); + final SnapshotId sourceSnapshot = cloneEntry.source(); + final Snapshot targetSnapshot = cloneEntry.snapshot(); + final Consumer onFailure = e -> removeFailedSnapshotFromClusterState(targetSnapshot, e, null); + repository.getRepositoryData(ActionListener.wrap(repositoryData -> { + for (IndexId index : cloneEntry.indices()) { + executor.execute(ActionRunnable.supply(shardCountListener, () -> { + final IndexMetadata metadata = repository.getSnapshotIndexMetaData(repositoryData, sourceSnapshot, index); + return Tuple.tuple(index, metadata.getNumberOfShards()); + })); + } + }, onFailure)); - @Override - public ClusterState execute(ClusterState currentState) { - final SnapshotsInProgress snapshotsInProgress = - currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); - final List updatedEntries = new ArrayList<>(snapshotsInProgress.entries()); - boolean changed = false; - final Map inFlightIndexIds = - getInFlightIndexIds(updatedEntries, repository.getMetadata().name()); - final ShardGenerations shardGenerations = repositoryData.shardGenerations(); - for (int i = 0; i < updatedEntries.size(); i++) { - if (cloneEntry.equals(updatedEntries.get(i))) { - final ImmutableOpenMap.Builder clonesBuilder = - ImmutableOpenMap.builder(); - final Set busyShards = busyShardsForRepo( - repository.getMetadata().name(), snapshotsInProgress, currentState.metadata()); - final Set busyShardsInRepo = busyShards - .stream() - .map(shardId -> SnapshotsInProgress.repoShardId( - inFlightIndexIds.get(shardId.getIndexName()), shardId.getId())) - .collect(Collectors.toSet()); - for (Tuple count : counts) { - for (int shardId = 0; shardId < count.v2(); shardId++) { - final RepoShardId repoShardId = SnapshotsInProgress.repoShardId(count.v1(), shardId); - if (busyShardsInRepo.contains(repoShardId)) { - clonesBuilder.put(repoShardId, ShardSnapshotStatus.UNASSIGNED_QUEUED); - } else { - clonesBuilder.put(repoShardId, new ShardSnapshotStatus(currentState.nodes().getLocalNodeId(), - shardGenerations.getShardGen(count.v1(), shardId))); - } - } + allShardCountsListener.whenComplete(counts -> repository.executeConsistentStateUpdate(repoData -> new ClusterStateUpdateTask() { + + private SnapshotsInProgress.Entry updatedEntry; + + @Override + public ClusterState execute(ClusterState currentState) { + final SnapshotsInProgress snapshotsInProgress = + currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); + final List updatedEntries = new ArrayList<>(snapshotsInProgress.entries()); + boolean changed = false; + final Map inFlightIndexIds = + getInFlightIndexIds(updatedEntries, repository.getMetadata().name()); + final ShardGenerations shardGenerations = repoData.shardGenerations(); + for (int i = 0; i < updatedEntries.size(); i++) { + if (cloneEntry.equals(updatedEntries.get(i))) { + final ImmutableOpenMap.Builder clonesBuilder = + ImmutableOpenMap.builder(); + final Set busyShards = busyShardsForRepo( + repository.getMetadata().name(), snapshotsInProgress, currentState.metadata()); + final Set busyShardsInRepo = busyShards + .stream() + .map(shardId -> SnapshotsInProgress.repoShardId( + inFlightIndexIds.get(shardId.getIndexName()), shardId.getId())) + .collect(Collectors.toSet()); + for (Tuple count : counts) { + for (int shardId = 0; shardId < count.v2(); shardId++) { + final RepoShardId repoShardId = SnapshotsInProgress.repoShardId(count.v1(), shardId); + if (busyShardsInRepo.contains(repoShardId)) { + clonesBuilder.put(repoShardId, ShardSnapshotStatus.UNASSIGNED_QUEUED); + } else { + clonesBuilder.put(repoShardId, new ShardSnapshotStatus(currentState.nodes().getLocalNodeId(), + shardGenerations.getShardGen(count.v1(), shardId))); } - updatedEntry = cloneEntry.withClones(clonesBuilder.build()); - updatedEntries.set(i, updatedEntry); - changed = true; - break; } } - return updateWithSnapshots( - currentState, changed ? SnapshotsInProgress.of(updatedEntries) : null, null); + updatedEntry = cloneEntry.withClones(clonesBuilder.build()); + updatedEntries.set(i, updatedEntry); + changed = true; + break; } + } + return updateWithSnapshots( + currentState, changed ? SnapshotsInProgress.of(updatedEntries) : null, null); + } - @Override - public void onFailure(String source, Exception e) { - logger.info(() -> new ParameterizedMessage("Failed to start snapshot clone [{}]", cloneEntry), e); - failAllListenersOnMasterFailOver(e); - } + @Override + public void onFailure(String source, Exception e) { + logger.info(() -> new ParameterizedMessage("Failed to start snapshot clone [{}]", cloneEntry), e); + failAllListenersOnMasterFailOver(e); + } - @Override - public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { - if (updatedEntry != null) { - final Snapshot target = updatedEntry.snapshot(); - final SnapshotId sourceSnapshot = updatedEntry.source(); - for (ObjectObjectCursor indexClone : updatedEntry.clones()) { - final ShardSnapshotStatus shardStatusBefore = indexClone.value; - if (shardStatusBefore.state() != ShardState.INIT) { - continue; - } - final RepoShardId repoShardId = indexClone.key; - runReadyClone(target, sourceSnapshot, shardStatusBefore, repoShardId, repository); - } - } else { - // Extremely unlikely corner case of master failing over between between starting the clone and starting - // shard clones. - logger.warn("Did not find expected entry [{}] in the cluster state", cloneEntry); + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + if (updatedEntry != null) { + final Snapshot target = updatedEntry.snapshot(); + final SnapshotId sourceSnapshot = updatedEntry.source(); + for (ObjectObjectCursor indexClone : updatedEntry.clones()) { + final ShardSnapshotStatus shardStatusBefore = indexClone.value; + if (shardStatusBefore.state() != ShardState.INIT) { + continue; } + final RepoShardId repoShardId = indexClone.key; + runReadyClone(target, sourceSnapshot, shardStatusBefore, repoShardId, repository); } - }, "start snapshot clone", e -> { - // TODO: handle - throw new AssertionError(e); - }), e -> removeFailedSnapshotFromClusterState(cloneEntry.snapshot(), e, null)), indices.size()); - final SnapshotId sourceSnapshot = cloneEntry.source(); - repository.getRepositoryData(ActionListener.wrap(repositoryData -> { - for (IndexId index : cloneEntry.indices()) { - executor.execute(ActionRunnable.supply(shardCountListener, () -> { - final IndexMetadata metadata = repository.getSnapshotIndexMetaData(repositoryData, sourceSnapshot, index); - return Tuple.tuple(index, metadata.getNumberOfShards()); - })); + } else { + // Extremely unlikely corner case of master failing over between between starting the clone and + // starting shard clones. + logger.warn("Did not find expected entry [{}] in the cluster state", cloneEntry); + } } - }, e -> { - // TODO: handle - throw new AssertionError(e); - })); + }, "start snapshot clone", onFailure), onFailure); } private final Set currentlyCloning = Collections.synchronizedSet(new HashSet<>()); @@ -2056,10 +2054,15 @@ private static Set busyShardsForRepo(String repoName, @Nullable Snapsho } else { for (ObjectObjectCursor clone : runningSnapshot.clones()) { final ShardSnapshotStatus shardState = clone.value; - final IndexMetadata index = metadata.index(clone.key.indexName()); if (shardState.isActive()) { - // TODO: there is some weirdness here on index delete that needs to be accounted for - inProgressShards.add(new ShardId(index.getIndex(), clone.key.shardId())); + IndexMetadata indexMeta = metadata.index(clone.key.indexName()); + final Index index; + if (indexMeta == null) { + index = new Index(clone.key.indexName(), IndexMetadata.INDEX_UUID_NA_VALUE); + } else { + index = indexMeta.getIndex(); + } + inProgressShards.add(new ShardId(index, clone.key.shardId())); } } } From 10b3516a44be2b580b6f914db197b86b2352e2d3 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 3 Sep 2020 16:01:30 +0200 Subject: [PATCH 38/94] last one? --- .../snapshots/CloneSnapshotIT.java | 23 +++- .../cluster/SnapshotsInProgress.java | 1 + .../snapshots/SnapshotsService.java | 109 ++++++++++++++---- 3 files changed, 107 insertions(+), 26 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index a40ea3f45f653..b63c068a5646a 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -242,14 +242,35 @@ public void testBackToBackClonesForIndexNotInCluster() throws Exception { extraCloneFutures.add(client().admin().cluster() .prepareCloneSnapshot(repoName, sourceSnapshot, "target-snapshot-" + i).setIndices(indexBlocked).execute()); } - awaitNSnapshotsInProgress(1 + extraClones); + for (ActionFuture extraCloneFuture : extraCloneFutures) { + assertFalse(extraCloneFuture.isDone()); + } + + final int extraSnapshots = randomIntBetween(0, 5); + if (extraSnapshots > 0) { + createSingleShardIndexWithContent(indexBlocked); + } + + final List> extraSnapshotFutures = new ArrayList<>(extraSnapshots); + for (int i = 0; i < extraSnapshots; i++) { + extraSnapshotFutures.add(startFullSnapshot(repoName, "extra-snap-" + i)); + } + + awaitNSnapshotsInProgress(1 + extraClones + extraSnapshots); + for (ActionFuture extraSnapshotFuture : extraSnapshotFutures) { + assertFalse(extraSnapshotFuture.isDone()); + } unblockNode(repoName, masterNode); assertAcked(cloneFuture1.get()); + for (ActionFuture extraCloneFuture : extraCloneFutures) { assertAcked(extraCloneFuture.get()); } + for (ActionFuture extraSnapshotFuture : extraSnapshotFutures) { + assertSuccessful(extraSnapshotFuture); + } } @AwaitsFix(bugUrl = "TODO if we want it") diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index 628d373ff3945..a91e1dfbf01f9 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -744,6 +744,7 @@ public static final class RepoShardId implements Writeable { private final int shard; private RepoShardId(IndexId index, int shard) { + assert index != null; this.index = index; this.shard = shard; } diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 8966e857fcda6..f0b6ef7f5c9c7 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -2210,6 +2210,11 @@ public ClusterTasksResult execute(ClusterState currentState final ImmutableOpenMap.Builder shards = ImmutableOpenMap.builder(); final ImmutableOpenMap.Builder clones = ImmutableOpenMap.builder(); for (ShardSnapshotUpdate updateSnapshotState : tasks) { + final String updatedRepository = updateSnapshotState.snapshot().getRepository(); + final Set reusedShardIds = + reusedRepoShardIdsByRepo.computeIfAbsent(updatedRepository, k -> new HashSet<>()); + final Set reusedConcreteShardIds = + reusedShardIdsByRepo.computeIfAbsent(updatedRepository, k -> new HashSet<>()); if (updateSnapshotState.isClone()) { final RepoShardId finishedShardId = updateSnapshotState.repoShardId(); if (entry.snapshot().equals(updateSnapshotState.snapshot())) { @@ -2222,25 +2227,78 @@ public ClusterTasksResult execute(ClusterState currentState changedCount++; clones.put(finishedShardId, updateSnapshotState.status); } else { - final String updatedRepository = updateSnapshotState.snapshot().getRepository(); - final Set reusedShardIds = - reusedRepoShardIdsByRepo.computeIfAbsent(updatedRepository, k -> new HashSet<>()); - if (entry.repository().equals(updatedRepository) && - entry.state().completed() == false && reusedShardIds.contains(finishedShardId) == false - && entry.clones().keys().contains(finishedShardId)) { - final ShardSnapshotStatus existingStatus = entry.clones().get(finishedShardId); - if (existingStatus.state() != ShardState.QUEUED) { + if (entry.source() == null) { + final IndexMetadata indexMeta = currentState.metadata().index(finishedShardId.indexName()); + if (indexMeta == null) { continue; } - if (updated == false) { - shards.putAll(entry.shards()); - updated = true; + final ShardId finishedConcreteShardId = new ShardId(indexMeta.getIndex(), finishedShardId.shardId()); + if (entry.repository().equals(updatedRepository) && + entry.state().completed() == false && entry.shards().keys().contains(finishedConcreteShardId) + && reusedConcreteShardIds.contains(finishedConcreteShardId) == false) { + final ShardSnapshotStatus existingStatus = entry.shards().get(finishedConcreteShardId); + if (existingStatus.state() != ShardState.QUEUED) { + continue; + } + if (updated == false) { + shards.putAll(entry.shards()); + updated = true; + } + final ShardSnapshotStatus finishedStatus = updateSnapshotState.status(); + logger.trace("Starting [{}] on [{}] with generation [{}]", finishedShardId, + finishedStatus.nodeId(), finishedStatus.generation()); + // A clone was updated, so we must use a node id different than that in the current update for the + // reassignment to actual shard snapshots + ShardRouting primary = currentState.routingTable().index(finishedConcreteShardId.getIndex()) + .shard(finishedConcreteShardId.id()).primaryShard(); + final ShardSnapshotStatus shardSnapshotStatus; + if (primary == null || !primary.assignedToNode()) { + shardSnapshotStatus = + new ShardSnapshotStatus(null, ShardState.MISSING, "primary shard is not allocated", + updateSnapshotState.status().generation()); + } else if (primary.relocating() || primary.initializing()) { + shardSnapshotStatus = new ShardSnapshotStatus( + primary.currentNodeId(), ShardState.WAITING, updateSnapshotState.status().generation()); + } else if (!primary.started()) { + shardSnapshotStatus = + new ShardSnapshotStatus(primary.currentNodeId(), ShardState.MISSING, + "primary shard hasn't been started yet", + updateSnapshotState.status().generation()); + } else { + shardSnapshotStatus = + new ShardSnapshotStatus(primary.currentNodeId(), updateSnapshotState.status().generation()); + } + shards.put(finishedConcreteShardId, shardSnapshotStatus); + if (shardSnapshotStatus.isActive()) { + reusedShardIds.add(finishedShardId); + reusedConcreteShardIds.add(finishedConcreteShardId); + } + } + } else { + if (entry.repository().equals(updatedRepository) && + entry.state().completed() == false && reusedShardIds.contains(finishedShardId) == false + && entry.clones().keys().contains(finishedShardId)) { + final ShardSnapshotStatus existingStatus = entry.clones().get(finishedShardId); + if (existingStatus.state() != ShardState.QUEUED) { + continue; + } + if (updated == false) { + shards.putAll(entry.shards()); + updated = true; + } + final ShardSnapshotStatus finishedStatus = updateSnapshotState.status(); + logger.trace("Starting [{}] on [{}] with generation [{}]", finishedShardId, + finishedStatus.nodeId(), finishedStatus.generation()); + clones.put(finishedShardId, new ShardSnapshotStatus(finishedStatus.nodeId(), + finishedStatus.generation())); + reusedShardIds.add(finishedShardId); + final IndexMetadata indexMeta = currentState.metadata().index(finishedShardId.indexName()); + if (indexMeta != null) { + final ShardId finishedConcreteShardId = + new ShardId(indexMeta.getIndex(), finishedShardId.shardId()); + reusedConcreteShardIds.add(finishedConcreteShardId); + } } - final ShardSnapshotStatus finishedStatus = updateSnapshotState.status(); - logger.trace("Starting [{}] on [{}] with generation [{}]", finishedShardId, - finishedStatus.nodeId(), finishedStatus.generation()); - clones.put(finishedShardId, new ShardSnapshotStatus(finishedStatus.nodeId(), finishedStatus.generation())); - reusedShardIds.add(finishedShardId); } } } else { @@ -2255,11 +2313,11 @@ public ClusterTasksResult execute(ClusterState currentState shards.put(finishedShardId, updateSnapshotState.status()); changedCount++; } else { - final String updatedRepository = updateSnapshotState.snapshot().getRepository(); - final Set reusedShardIds = - reusedShardIdsByRepo.computeIfAbsent(updatedRepository, k -> new HashSet<>()); + // TODO: horribly inefficient obv. + final Map indicesLookup = + entry.indices().stream().collect(Collectors.toMap(IndexId::getName, Function.identity())); if (entry.repository().equals(updatedRepository) && - entry.state().completed() == false && reusedShardIds.contains(finishedShardId) == false) { + entry.state().completed() == false && reusedConcreteShardIds.contains(finishedShardId) == false) { if (entry.source() == null) { if (entry.shards().keys().contains(finishedShardId)) { final ShardSnapshotStatus existingStatus = entry.shards().get(finishedShardId); @@ -2275,12 +2333,12 @@ public ClusterTasksResult execute(ClusterState currentState finishedStatus.nodeId(), finishedStatus.generation()); shards.put(finishedShardId, new ShardSnapshotStatus(finishedStatus.nodeId(), finishedStatus.generation())); - reusedShardIds.add(finishedShardId); + reusedConcreteShardIds.add(finishedShardId); + final RepoShardId repoShardId = SnapshotsInProgress.repoShardId( + indicesLookup.get(finishedShardId.getIndexName()), finishedShardId.getId()); + reusedShardIds.add(repoShardId); } } else { - // TODO: horribly inefficient obv. - final Map indicesLookup = - entry.indices().stream().collect(Collectors.toMap(IndexId::getName, Function.identity())); if (indicesLookup.containsKey(finishedShardId.getIndexName())) { final RepoShardId repoShardId = SnapshotsInProgress.repoShardId( indicesLookup.get(finishedShardId.getIndexName()), finishedShardId.getId()); @@ -2298,7 +2356,8 @@ public ClusterTasksResult execute(ClusterState currentState finishedStatus.nodeId(), finishedStatus.generation()); clones.put(repoShardId, new ShardSnapshotStatus( currentState.nodes().getLocalNodeId(), finishedStatus.generation())); - reusedShardIds.add(finishedShardId); + reusedConcreteShardIds.add(finishedShardId); + reusedShardIds.add(repoShardId); } } } From 1cf9a6736759f977f315ad3d587d0fe66cc13329 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Fri, 4 Sep 2020 00:10:03 +0200 Subject: [PATCH 39/94] bck --- .../main/java/org/elasticsearch/snapshots/SnapshotsService.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index f0b6ef7f5c9c7..06df05064fb31 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -451,6 +451,7 @@ public ClusterState execute(ClusterState currentState) { if (cloneEntry.equals(updatedEntries.get(i))) { final ImmutableOpenMap.Builder clonesBuilder = ImmutableOpenMap.builder(); + // TODO: needlessly complex, just deal with repo shard id directly final Set busyShards = busyShardsForRepo( repository.getMetadata().name(), snapshotsInProgress, currentState.metadata()); final Set busyShardsInRepo = busyShards From a51f0a31d0f97570ed5aedddb16b5d3e554b58de Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 7 Sep 2020 17:00:18 +0200 Subject: [PATCH 40/94] fix state machine --- .../snapshots/SnapshotsService.java | 100 ++++++++---------- 1 file changed, 46 insertions(+), 54 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 8b26eb0f33fb8..5c96a4081b057 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -2311,77 +2311,69 @@ public ClusterTasksResult execute(ClusterState currentState if (entry.snapshot().equals(updateSnapshotState.snapshot())) { logger.trace("[{}] Updating shard [{}] with status [{}]", updateSnapshotState.snapshot(), finishedShardId, updateSnapshotState.status().state()); - if (entry.state().completed() == false && entry.repository().equals(updatedRepository) - && reusedConcreteShardIds.contains(finishedShardId) == false) { - final ShardSnapshotStatus existingStatus = entry.shards().get(finishedShardId); - if (existingStatus == null || existingStatus.state() != ShardState.QUEUED) { - continue; - } - if (updated == false) { - shards.putAll(entry.shards()); - updated = true; - } - shards.put(finishedShardId, updateSnapshotState.status()); - changedCount++; - } else { - // TODO: horribly inefficient obv. - final Map indicesLookup = - entry.indices().stream().collect(Collectors.toMap(IndexId::getName, Function.identity())); - if (entry.repository().equals(updatedRepository) && - entry.state().completed() == false && reusedConcreteShardIds.contains(finishedShardId) == false) { - if (entry.source() == null) { - if (entry.shards().keys().contains(finishedShardId)) { - final ShardSnapshotStatus existingStatus = entry.shards().get(finishedShardId); + if (updated == false) { + shards.putAll(entry.shards()); + updated = true; + } + shards.put(finishedShardId, updateSnapshotState.status()); + changedCount++; + } else { + // TODO: horribly inefficient obv. + final Map indicesLookup = + entry.indices().stream().collect(Collectors.toMap(IndexId::getName, Function.identity())); + if (entry.repository().equals(updatedRepository) && + entry.state().completed() == false && reusedConcreteShardIds.contains(finishedShardId) == false) { + if (entry.source() == null) { + if (entry.shards().keys().contains(finishedShardId)) { + final ShardSnapshotStatus existingStatus = entry.shards().get(finishedShardId); + if (existingStatus.state() != ShardState.QUEUED) { + continue; + } + if (updated == false) { + shards.putAll(entry.shards()); + updated = true; + } + final ShardSnapshotStatus finishedStatus = updateSnapshotState.status(); + logger.trace("Starting [{}] on [{}] with generation [{}]", finishedShardId, + finishedStatus.nodeId(), finishedStatus.generation()); + shards.put(finishedShardId, new ShardSnapshotStatus(finishedStatus.nodeId(), + finishedStatus.generation())); + reusedConcreteShardIds.add(finishedShardId); + final RepoShardId repoShardId = SnapshotsInProgress.repoShardId( + indicesLookup.get(finishedShardId.getIndexName()), finishedShardId.getId()); + reusedShardIds.add(repoShardId); + } + } else { + if (indicesLookup.containsKey(finishedShardId.getIndexName())) { + final RepoShardId repoShardId = SnapshotsInProgress.repoShardId( + indicesLookup.get(finishedShardId.getIndexName()), finishedShardId.getId()); + if (entry.clones().containsKey(repoShardId)) { + final ShardSnapshotStatus existingStatus = entry.clones().get(repoShardId); if (existingStatus.state() != ShardState.QUEUED) { continue; } if (updated == false) { - shards.putAll(entry.shards()); + clones.putAll(entry.clones()); updated = true; } final ShardSnapshotStatus finishedStatus = updateSnapshotState.status(); logger.trace("Starting [{}] on [{}] with generation [{}]", finishedShardId, finishedStatus.nodeId(), finishedStatus.generation()); - shards.put(finishedShardId, new ShardSnapshotStatus(finishedStatus.nodeId(), - finishedStatus.generation())); + clones.put(repoShardId, new ShardSnapshotStatus( + currentState.nodes().getLocalNodeId(), finishedStatus.generation())); reusedConcreteShardIds.add(finishedShardId); - final RepoShardId repoShardId = SnapshotsInProgress.repoShardId( - indicesLookup.get(finishedShardId.getIndexName()), finishedShardId.getId()); reusedShardIds.add(repoShardId); } - } else { - if (indicesLookup.containsKey(finishedShardId.getIndexName())) { - final RepoShardId repoShardId = SnapshotsInProgress.repoShardId( - indicesLookup.get(finishedShardId.getIndexName()), finishedShardId.getId()); - if (entry.clones().containsKey(repoShardId)) { - final ShardSnapshotStatus existingStatus = entry.clones().get(repoShardId); - if (existingStatus.state() != ShardState.QUEUED) { - continue; - } - if (updated == false) { - clones.putAll(entry.clones()); - updated = true; - } - final ShardSnapshotStatus finishedStatus = updateSnapshotState.status(); - logger.trace("Starting [{}] on [{}] with generation [{}]", finishedShardId, - finishedStatus.nodeId(), finishedStatus.generation()); - clones.put(repoShardId, new ShardSnapshotStatus( - currentState.nodes().getLocalNodeId(), finishedStatus.generation())); - reusedConcreteShardIds.add(finishedShardId); - reusedShardIds.add(repoShardId); - } - } } } } } } - - if (updated) { - entries.add(entry.withShardStates(shards.build()).withClones(clones.build())); - } else { - entries.add(entry); - } + } + if (updated) { + entries.add(entry.withShardStates(shards.build()).withClones(clones.build())); + } else { + entries.add(entry); } } if (changedCount > 0) { From 6f149f75a56942f2fffde94802721159a5e58d02 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 7 Sep 2020 17:10:16 +0200 Subject: [PATCH 41/94] optimize --- .../snapshots/SnapshotsService.java | 59 +++++++++---------- 1 file changed, 28 insertions(+), 31 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 5c96a4081b057..756153b3cc829 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -2282,7 +2282,7 @@ public ClusterTasksResult execute(ClusterState currentState } else { if (entry.repository().equals(updatedRepository) && entry.state().completed() == false && reusedShardIds.contains(finishedShardId) == false - && entry.clones().keys().contains(finishedShardId)) { + && entry.clones().containsKey(finishedShardId)) { final ShardSnapshotStatus existingStatus = entry.clones().get(finishedShardId); if (existingStatus.state() != ShardState.QUEUED) { continue; @@ -2307,6 +2307,7 @@ public ClusterTasksResult execute(ClusterState currentState } } } else { + // Standard (non-clone) shard snapshot was updated final ShardId finishedShardId = updateSnapshotState.shardId(); if (entry.snapshot().equals(updateSnapshotState.snapshot())) { logger.trace("[{}] Updating shard [{}] with status [{}]", updateSnapshotState.snapshot(), @@ -2324,47 +2325,43 @@ public ClusterTasksResult execute(ClusterState currentState if (entry.repository().equals(updatedRepository) && entry.state().completed() == false && reusedConcreteShardIds.contains(finishedShardId) == false) { if (entry.source() == null) { - if (entry.shards().keys().contains(finishedShardId)) { - final ShardSnapshotStatus existingStatus = entry.shards().get(finishedShardId); - if (existingStatus.state() != ShardState.QUEUED) { + final ShardSnapshotStatus existingStatus = entry.shards().get(finishedShardId); + if (existingStatus == null || existingStatus.state() != ShardState.QUEUED) { + continue; + } + if (updated == false) { + shards.putAll(entry.shards()); + updated = true; + } + final ShardSnapshotStatus finishedStatus = updateSnapshotState.status(); + logger.trace("Starting [{}] on [{}] with generation [{}]", finishedShardId, + finishedStatus.nodeId(), finishedStatus.generation()); + shards.put(finishedShardId, new ShardSnapshotStatus(finishedStatus.nodeId(), + finishedStatus.generation())); + reusedConcreteShardIds.add(finishedShardId); + final RepoShardId repoShardId = SnapshotsInProgress.repoShardId( + indicesLookup.get(finishedShardId.getIndexName()), finishedShardId.getId()); + reusedShardIds.add(repoShardId); + } else { + final IndexId indexId = indicesLookup.get(finishedShardId.getIndexName()); + if (indexId != null) { + final RepoShardId repoShardId = SnapshotsInProgress.repoShardId(indexId, finishedShardId.getId()); + final ShardSnapshotStatus existingStatus = entry.clones().get(repoShardId); + if (existingStatus == null || existingStatus.state() != ShardState.QUEUED) { continue; } if (updated == false) { - shards.putAll(entry.shards()); + clones.putAll(entry.clones()); updated = true; } final ShardSnapshotStatus finishedStatus = updateSnapshotState.status(); logger.trace("Starting [{}] on [{}] with generation [{}]", finishedShardId, finishedStatus.nodeId(), finishedStatus.generation()); - shards.put(finishedShardId, new ShardSnapshotStatus(finishedStatus.nodeId(), - finishedStatus.generation())); + clones.put(repoShardId, new ShardSnapshotStatus( + currentState.nodes().getLocalNodeId(), finishedStatus.generation())); reusedConcreteShardIds.add(finishedShardId); - final RepoShardId repoShardId = SnapshotsInProgress.repoShardId( - indicesLookup.get(finishedShardId.getIndexName()), finishedShardId.getId()); reusedShardIds.add(repoShardId); } - } else { - if (indicesLookup.containsKey(finishedShardId.getIndexName())) { - final RepoShardId repoShardId = SnapshotsInProgress.repoShardId( - indicesLookup.get(finishedShardId.getIndexName()), finishedShardId.getId()); - if (entry.clones().containsKey(repoShardId)) { - final ShardSnapshotStatus existingStatus = entry.clones().get(repoShardId); - if (existingStatus.state() != ShardState.QUEUED) { - continue; - } - if (updated == false) { - clones.putAll(entry.clones()); - updated = true; - } - final ShardSnapshotStatus finishedStatus = updateSnapshotState.status(); - logger.trace("Starting [{}] on [{}] with generation [{}]", finishedShardId, - finishedStatus.nodeId(), finishedStatus.generation()); - clones.put(repoShardId, new ShardSnapshotStatus( - currentState.nodes().getLocalNodeId(), finishedStatus.generation())); - reusedConcreteShardIds.add(finishedShardId); - reusedShardIds.add(repoShardId); - } - } } } } From 52020af79b65fb0589b103f3414ca7ca7e310cf7 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 7 Sep 2020 18:30:14 +0200 Subject: [PATCH 42/94] better not great --- .../snapshots/SnapshotsService.java | 38 +++++++++---------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 756153b3cc829..7a50baed28734 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -444,6 +444,7 @@ public ClusterState execute(ClusterState currentState) { currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); final List updatedEntries = new ArrayList<>(snapshotsInProgress.entries()); boolean changed = false; + final String localNodeId = currentState.nodes().getLocalNodeId(); final Map inFlightIndexIds = getInFlightIndexIds(updatedEntries, repository.getMetadata().name()); final ShardGenerations shardGenerations = repoData.shardGenerations(); @@ -465,7 +466,7 @@ public ClusterState execute(ClusterState currentState) { if (busyShardsInRepo.contains(repoShardId)) { clonesBuilder.put(repoShardId, ShardSnapshotStatus.UNASSIGNED_QUEUED); } else { - clonesBuilder.put(repoShardId, new ShardSnapshotStatus(currentState.nodes().getLocalNodeId(), + clonesBuilder.put(repoShardId, new ShardSnapshotStatus(localNodeId, shardGenerations.getShardGen(count.v1(), shardId))); } } @@ -2210,6 +2211,7 @@ public ClusterTasksResult execute(ClusterState currentState final List entries = new ArrayList<>(); final Map> reusedShardIdsByRepo = new HashMap<>(); final Map> reusedRepoShardIdsByRepo = new HashMap<>(); + final String localNodeId = currentState.nodes().getLocalNodeId(); for (SnapshotsInProgress.Entry entry : currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY).entries()) { boolean updated = false; final ImmutableOpenMap.Builder shards = ImmutableOpenMap.builder(); @@ -2230,7 +2232,7 @@ public ClusterTasksResult execute(ClusterState currentState updated = true; } changedCount++; - clones.put(finishedShardId, updateSnapshotState.status); + clones.put(finishedShardId, updateSnapshotState.status()); } else { if (entry.source() == null) { final IndexMetadata indexMeta = currentState.metadata().index(finishedShardId.indexName()); @@ -2238,11 +2240,10 @@ public ClusterTasksResult execute(ClusterState currentState continue; } final ShardId finishedConcreteShardId = new ShardId(indexMeta.getIndex(), finishedShardId.shardId()); - if (entry.repository().equals(updatedRepository) && - entry.state().completed() == false && entry.shards().keys().contains(finishedConcreteShardId) + if (entry.repository().equals(updatedRepository) && entry.state().completed() == false && reusedConcreteShardIds.contains(finishedConcreteShardId) == false) { final ShardSnapshotStatus existingStatus = entry.shards().get(finishedConcreteShardId); - if (existingStatus.state() != ShardState.QUEUED) { + if (existingStatus == null || existingStatus.state() != ShardState.QUEUED) { continue; } if (updated == false) { @@ -2256,22 +2257,19 @@ public ClusterTasksResult execute(ClusterState currentState // reassignment to actual shard snapshots ShardRouting primary = currentState.routingTable().index(finishedConcreteShardId.getIndex()) .shard(finishedConcreteShardId.id()).primaryShard(); + final String newGeneration = updateSnapshotState.status().generation(); final ShardSnapshotStatus shardSnapshotStatus; if (primary == null || !primary.assignedToNode()) { - shardSnapshotStatus = - new ShardSnapshotStatus(null, ShardState.MISSING, "primary shard is not allocated", - updateSnapshotState.status().generation()); - } else if (primary.relocating() || primary.initializing()) { shardSnapshotStatus = new ShardSnapshotStatus( - primary.currentNodeId(), ShardState.WAITING, updateSnapshotState.status().generation()); - } else if (!primary.started()) { + null, ShardState.MISSING, "primary shard is not allocated", newGeneration); + } else if (primary.relocating() || primary.initializing()) { shardSnapshotStatus = - new ShardSnapshotStatus(primary.currentNodeId(), ShardState.MISSING, - "primary shard hasn't been started yet", - updateSnapshotState.status().generation()); + new ShardSnapshotStatus(primary.currentNodeId(), ShardState.WAITING, newGeneration); + } else if (primary.started() == false) { + shardSnapshotStatus = new ShardSnapshotStatus(primary.currentNodeId(), ShardState.MISSING, + "primary shard hasn't been started yet", newGeneration); } else { - shardSnapshotStatus = - new ShardSnapshotStatus(primary.currentNodeId(), updateSnapshotState.status().generation()); + shardSnapshotStatus = new ShardSnapshotStatus(primary.currentNodeId(), newGeneration); } shards.put(finishedConcreteShardId, shardSnapshotStatus); if (shardSnapshotStatus.isActive()) { @@ -2281,10 +2279,9 @@ public ClusterTasksResult execute(ClusterState currentState } } else { if (entry.repository().equals(updatedRepository) && - entry.state().completed() == false && reusedShardIds.contains(finishedShardId) == false - && entry.clones().containsKey(finishedShardId)) { + entry.state().completed() == false && reusedShardIds.contains(finishedShardId) == false) { final ShardSnapshotStatus existingStatus = entry.clones().get(finishedShardId); - if (existingStatus.state() != ShardState.QUEUED) { + if (existingStatus == null || existingStatus.state() != ShardState.QUEUED) { continue; } if (updated == false) { @@ -2357,8 +2354,7 @@ public ClusterTasksResult execute(ClusterState currentState final ShardSnapshotStatus finishedStatus = updateSnapshotState.status(); logger.trace("Starting [{}] on [{}] with generation [{}]", finishedShardId, finishedStatus.nodeId(), finishedStatus.generation()); - clones.put(repoShardId, new ShardSnapshotStatus( - currentState.nodes().getLocalNodeId(), finishedStatus.generation())); + clones.put(repoShardId, new ShardSnapshotStatus(localNodeId, finishedStatus.generation())); reusedConcreteShardIds.add(finishedShardId); reusedShardIds.add(repoShardId); } From 5b7b795af272431e1eed23757c4e0ba99893c5f8 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 14 Sep 2020 07:24:31 +0200 Subject: [PATCH 43/94] mroe efficient --- .../elasticsearch/snapshots/SnapshotsService.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index d4ee322eb1596..aa9a9cf6a9b6f 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -2441,11 +2441,13 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS } // TODO: this is horrifically expensive, find a way of more efficiently transporting the state here for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) { - // this is a clone, see if new work is ready - for (ObjectObjectCursor clone : entry.clones()) { - if (clone.value.state() == ShardState.INIT) { - runReadyClone(entry.snapshot(), entry.source(), clone.value, clone.key, - repositoriesService.repository(entry.repository())); + if (entry.source() != null && entry.state() == State.STARTED) { + // this is a clone, see if new work is ready + for (ObjectObjectCursor clone : entry.clones()) { + if (clone.value.state() == ShardState.INIT) { + runReadyClone(entry.snapshot(), entry.source(), clone.value, clone.key, + repositoriesService.repository(entry.repository())); + } } } } From b756cc256bf956a2ec77c16c42812dd4b1e6e0b8 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 14 Sep 2020 07:42:57 +0200 Subject: [PATCH 44/94] fix --- .../java/org/elasticsearch/snapshots/SnapshotsService.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index aa9a9cf6a9b6f..d78df277ba84f 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -2439,9 +2439,9 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS endSnapshot(updatedEntry, newState.metadata(), null); } } - // TODO: this is horrifically expensive, find a way of more efficiently transporting the state here for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) { - if (entry.source() != null && entry.state() == State.STARTED) { + if (entry.source() != null && entry.state() == State.STARTED + && entry.repository().equals(update.snapshot.getRepository())) { // this is a clone, see if new work is ready for (ObjectObjectCursor clone : entry.clones()) { if (clone.value.state() == ShardState.INIT) { From b26c47c7ce344c66c79a28ddd5e82f01c8b2b112 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 15 Sep 2020 11:08:11 +0200 Subject: [PATCH 45/94] fix --- .../java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java index 740ee622db468..d6a984bb9d928 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java @@ -1221,7 +1221,7 @@ public void testAbortNotStartedSnapshotWithoutIO() throws Exception { final String dataNode = internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); - createIndexWithContent("test-index"); + createSingleShardIndexWithContent("test-index"); final ActionFuture createSnapshot1Future = startFullSnapshotBlockedOnDataNode("first-snapshot", repoName, dataNode); From d2dc4f8f1752a9559f68a88859d272580ab167d1 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 17 Sep 2020 11:33:26 +0200 Subject: [PATCH 46/94] reproducer --- .../snapshots/CloneSnapshotIT.java | 28 ++++++++++ .../snapshots/SnapshotsService.java | 55 +++++++++++-------- 2 files changed, 61 insertions(+), 22 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index b63c068a5646a..ae0008348f001 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -31,6 +31,7 @@ import org.elasticsearch.test.ESIntegTestCase; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; @@ -302,6 +303,33 @@ public void testCloneSnapshotWithIndexSettingUpdates() throws Exception { assertEquals(settings.get(IndexMetadata.SETTING_NUMBER_OF_REPLICAS), "1"); } + public void testMasterFailoverDuringClone() throws Exception { + // large snapshot pool so blocked snapshot threads from cloning don't prevent concurrent snapshot finalizations + internalCluster().startMasterOnlyNodes(3, LARGE_SNAPSHOT_POOL_SETTINGS); + internalCluster().startDataOnlyNode(); + final String repoName = "test-repo"; + createRepository(repoName, "mock"); + final String testIndex = "index-test"; + createSingleShardIndexWithContent(testIndex); + + final String sourceSnapshot = "source-snapshot"; + createFullSnapshot(repoName, sourceSnapshot); + + final String targetSnapshot1 = "target-snapshot"; + blockMasterOnShardClone(repoName); + final ActionFuture cloneFuture = dataNodeClient().admin().cluster() + .prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot1).setIndices(testIndex).execute(); + awaitNSnapshotsInProgress(1); + final String masterNode = internalCluster().getMasterName(); + waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L)); + internalCluster().restartNode(masterNode); + expectThrows(SnapshotException.class, cloneFuture::actionGet); + awaitNoMoreRunningOperations(internalCluster().getMasterName()); + + final Collection snapshotIds = getRepositoryData(repoName).getSnapshotIds(); + assertThat(snapshotIds, hasSize(2)); + } + private void blockMasterOnShardClone(String repoName) { ((MockRepository)internalCluster().getCurrentMasterNodeInstance(RepositoriesService.class).repository(repoName)) .setBlockOnWriteShardLevelMeta(); diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 14df72864fd4e..2d48b2f29b7ca 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -161,6 +161,9 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus // Set of snapshots that are currently being ended by this node private final Set endingSnapshots = Collections.synchronizedSet(new HashSet<>()); + // Set of currently initializing clone operations + private final Set initializingClones = Collections.synchronizedSet(new HashSet<>()); + private final SnapshotStateExecutor snapshotStateExecutor = new SnapshotStateExecutor(); private final UpdateSnapshotStatusAction updateSnapshotStatusHandler; @@ -831,17 +834,21 @@ public ClusterState execute(ClusterState currentState) { for (final SnapshotsInProgress.Entry snapshot : snapshots.entries()) { if (statesToUpdate.contains(snapshot.state())) { - ImmutableOpenMap shards = processWaitingShardsAndRemovedNodes(snapshot.shards(), - routingTable, nodes, knownFailures.computeIfAbsent(snapshot.repository(), k -> new HashMap<>())); - if (shards != null) { - final SnapshotsInProgress.Entry updatedSnapshot = snapshot.withShardStates(shards); - changed = true; - if (updatedSnapshot.state().completed()) { - finishedSnapshots.add(updatedSnapshot); - } - updatedSnapshotEntries.add(updatedSnapshot); + if (snapshot.source() != null && snapshot.clones().isEmpty()) { + throw new AssertionError("TODO"); } else { - updatedSnapshotEntries.add(snapshot); + ImmutableOpenMap shards = processWaitingShardsAndRemovedNodes(snapshot.shards(), + routingTable, nodes, knownFailures.computeIfAbsent(snapshot.repository(), k -> new HashMap<>())); + if (shards != null) { + final SnapshotsInProgress.Entry updatedSnapshot = snapshot.withShardStates(shards); + changed = true; + if (updatedSnapshot.state().completed()) { + finishedSnapshots.add(updatedSnapshot); + } + updatedSnapshotEntries.add(updatedSnapshot); + } else { + updatedSnapshotEntries.add(snapshot); + } } } else if (snapshot.repositoryStateId() == RepositoryData.UNKNOWN_REPO_GEN) { // BwC path, older versions could create entries with unknown repo GEN in INIT or ABORTED state that did not yet @@ -893,6 +900,7 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS } } } + startExecutableClones(newState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY), null); // run newly ready deletes for (SnapshotDeletionsInProgress.Entry entry : deletionsToExecute) { if (tryEnterRepoLoop(entry.repository())) { @@ -2468,23 +2476,26 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS endSnapshot(updatedEntry, newState.metadata(), null); } } - for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) { - if (entry.source() != null && entry.state() == State.STARTED - && entry.repository().equals(update.snapshot.getRepository())) { - // this is a clone, see if new work is ready - for (ObjectObjectCursor clone : entry.clones()) { - if (clone.value.state() == ShardState.INIT) { - runReadyClone(entry.snapshot(), entry.source(), clone.value, clone.key, - repositoriesService.repository(entry.repository())); - } - } - } - } + startExecutableClones(snapshotsInProgress, update.snapshot.getRepository()); } } }); } + private void startExecutableClones(SnapshotsInProgress snapshotsInProgress, @Nullable String repoName) { + for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) { + if (entry.source() != null && entry.state() == State.STARTED && (repoName == null || entry.repository().equals(repoName))) { + // this is a clone, see if new work is ready + for (ObjectObjectCursor clone : entry.clones()) { + if (clone.value.state() == ShardState.INIT) { + runReadyClone(entry.snapshot(), entry.source(), clone.value, clone.key, + repositoriesService.repository(entry.repository())); + } + } + } + } + } + private class UpdateSnapshotStatusAction extends TransportMasterNodeAction { UpdateSnapshotStatusAction(TransportService transportService, ClusterService clusterService, From 43242d8be70a96ec15a724e33f440ffa8c74bd39 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 17 Sep 2020 12:41:31 +0200 Subject: [PATCH 47/94] fix master failover part 1 --- .../org/elasticsearch/snapshots/CloneSnapshotIT.java | 7 ++++++- .../elasticsearch/cluster/SnapshotsInProgress.java | 4 ++++ .../elasticsearch/snapshots/SnapshotsService.java | 12 +++++++++++- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index ae0008348f001..7dc688177f263 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -27,6 +27,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.repositories.RepositoriesService; +import org.elasticsearch.repositories.RepositoryData; import org.elasticsearch.snapshots.mockstore.MockRepository; import org.elasticsearch.test.ESIntegTestCase; @@ -326,8 +327,12 @@ public void testMasterFailoverDuringClone() throws Exception { expectThrows(SnapshotException.class, cloneFuture::actionGet); awaitNoMoreRunningOperations(internalCluster().getMasterName()); - final Collection snapshotIds = getRepositoryData(repoName).getSnapshotIds(); + final RepositoryData repositoryData = getRepositoryData(repoName); + final Collection snapshotIds = repositoryData.getSnapshotIds(); assertThat(snapshotIds, hasSize(2)); + for (SnapshotId snapshotId : snapshotIds) { + assertThat(repositoryData.getSnapshotState(snapshotId), is(SnapshotState.SUCCESS)); + } } private void blockMasterOnShardClone(String repoName) { diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index 927ce906fb67a..3c51fc574d883 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -383,6 +383,8 @@ public boolean equals(Object o) { if (state != entry.state) return false; if (repositoryStateId != entry.repositoryStateId) return false; if (version.equals(entry.version) == false) return false; + if (Objects.equals(source, ((Entry) o).source) == false) return false; + if (clones.equals(((Entry) o).clones) == false) return false; return true; } @@ -398,6 +400,8 @@ public int hashCode() { result = 31 * result + Long.hashCode(startTime); result = 31 * result + Long.hashCode(repositoryStateId); result = 31 * result + version.hashCode(); + result = 31 * result + (source == null ? 0 : source.hashCode()); + result = 31 * result + clones.hashCode(); return result; } diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 2d48b2f29b7ca..b3d4a78bc62c9 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -337,6 +337,7 @@ public void cloneSnapshot(CloneSnapshotRequest request, ActionListener lis } final SnapshotId snapshotId = new SnapshotId(snapshotName, UUIDs.randomBase64UUID()); final Snapshot snapshot = new Snapshot(repositoryName, snapshotId); + initializingClones.add(snapshot); // TODO: throw when no indices match or source snapshot was not successful for the matching indices repository.executeConsistentStateUpdate(repositoryData -> new ClusterStateUpdateTask() { @@ -378,6 +379,7 @@ public ClusterState execute(ClusterState currentState) { @Override public void onFailure(String source, Exception e) { + initializingClones.remove(snapshot); logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to clone snapshot", repositoryName, snapshotName), e); listener.onFailure(e); } @@ -391,6 +393,7 @@ public void clusterStateProcessed(String source, ClusterState oldState, final Cl @Override public TimeValue timeout() { + initializingClones.remove(snapshot); return request.masterNodeTimeout(); } }, "clone_snapshot [" + request.source() + "][" + snapshotName + ']', listener::onFailure); @@ -487,12 +490,14 @@ public ClusterState execute(ClusterState currentState) { @Override public void onFailure(String source, Exception e) { + initializingClones.remove(targetSnapshot); logger.info(() -> new ParameterizedMessage("Failed to start snapshot clone [{}]", cloneEntry), e); failAllListenersOnMasterFailOver(e); } @Override public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + initializingClones.remove(targetSnapshot); if (updatedEntry != null) { final Snapshot target = updatedEntry.snapshot(); final SnapshotId sourceSnapshot = updatedEntry.source(); @@ -834,8 +839,13 @@ public ClusterState execute(ClusterState currentState) { for (final SnapshotsInProgress.Entry snapshot : snapshots.entries()) { if (statesToUpdate.contains(snapshot.state())) { + // Currently initializing clone if (snapshot.source() != null && snapshot.clones().isEmpty()) { - throw new AssertionError("TODO"); + if (initializingClones.contains(snapshot.snapshot())) { + updatedSnapshotEntries.add(snapshot); + } else { + throw new AssertionError("TODO"); + } } else { ImmutableOpenMap shards = processWaitingShardsAndRemovedNodes(snapshot.shards(), routingTable, nodes, knownFailures.computeIfAbsent(snapshot.repository(), k -> new HashMap<>())); From 4f6eec8bf8b4cb6da1d36f6c40605ef03b1fdb0f Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 17 Sep 2020 12:54:40 +0200 Subject: [PATCH 48/94] fix master failover part 2 --- .../snapshots/CloneSnapshotIT.java | 38 ++++++++++++++++++- .../snapshots/SnapshotsService.java | 3 +- .../snapshots/mockstore/MockRepository.java | 19 ++++++++-- 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index 7dc688177f263..7f600ffb38d8e 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -304,7 +304,38 @@ public void testCloneSnapshotWithIndexSettingUpdates() throws Exception { assertEquals(settings.get(IndexMetadata.SETTING_NUMBER_OF_REPLICAS), "1"); } - public void testMasterFailoverDuringClone() throws Exception { + public void testMasterFailoverDuringCloneStep1() throws Exception { + // large snapshot pool so blocked snapshot threads from cloning don't prevent concurrent snapshot finalizations + internalCluster().startMasterOnlyNodes(3, LARGE_SNAPSHOT_POOL_SETTINGS); + internalCluster().startDataOnlyNode(); + final String repoName = "test-repo"; + createRepository(repoName, "mock"); + final String testIndex = "index-test"; + createSingleShardIndexWithContent(testIndex); + + final String sourceSnapshot = "source-snapshot"; + createFullSnapshot(repoName, sourceSnapshot); + + final String targetSnapshot1 = "target-snapshot"; + blockMasterOnReadIndexMeta(repoName); + final ActionFuture cloneFuture = dataNodeClient().admin().cluster() + .prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot1).setIndices(testIndex).execute(); + awaitNSnapshotsInProgress(1); + final String masterNode = internalCluster().getMasterName(); + waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L)); + internalCluster().restartNode(masterNode); + expectThrows(SnapshotException.class, cloneFuture::actionGet); + awaitNoMoreRunningOperations(internalCluster().getMasterName()); + + final RepositoryData repositoryData = getRepositoryData(repoName); + final Collection snapshotIds = repositoryData.getSnapshotIds(); + assertThat(snapshotIds, hasSize(1)); + for (SnapshotId snapshotId : snapshotIds) { + assertThat(repositoryData.getSnapshotState(snapshotId), is(SnapshotState.SUCCESS)); + } + } + + public void testMasterFailoverDuringCloneStep2() throws Exception { // large snapshot pool so blocked snapshot threads from cloning don't prevent concurrent snapshot finalizations internalCluster().startMasterOnlyNodes(3, LARGE_SNAPSHOT_POOL_SETTINGS); internalCluster().startDataOnlyNode(); @@ -335,6 +366,11 @@ public void testMasterFailoverDuringClone() throws Exception { } } + private void blockMasterOnReadIndexMeta(String repoName) { + ((MockRepository)internalCluster().getCurrentMasterNodeInstance(RepositoriesService.class).repository(repoName)) + .setBlockOnReadIndexMeta(); + } + private void blockMasterOnShardClone(String repoName) { ((MockRepository)internalCluster().getCurrentMasterNodeInstance(RepositoriesService.class).repository(repoName)) .setBlockOnWriteShardLevelMeta(); diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index b3d4a78bc62c9..663198834a03c 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -844,7 +844,8 @@ public ClusterState execute(ClusterState currentState) { if (initializingClones.contains(snapshot.snapshot())) { updatedSnapshotEntries.add(snapshot); } else { - throw new AssertionError("TODO"); + logger.debug("removing not yet start clone operation [{}]", snapshot); + changed = true; } } else { ImmutableOpenMap shards = processWaitingShardsAndRemovedNodes(snapshot.shards(), diff --git a/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java b/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java index 68959fc1150f7..40f3b6c052188 100644 --- a/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java +++ b/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java @@ -120,6 +120,8 @@ public long getFailureCount() { private volatile boolean blockOnWriteShardLevelMeta; + private volatile boolean blockOnReadIndexMeta; + /** * Writes to the blob {@code index.latest} at the repository root will fail with an {@link IOException} if {@code true}. */ @@ -186,6 +188,7 @@ public synchronized void unblock() { blockAndFailOnWriteSnapFile = false; blockOnDeleteIndexN = false; blockOnWriteShardLevelMeta = false; + blockOnReadIndexMeta = false; this.notifyAll(); } @@ -213,6 +216,10 @@ public void setBlockOnWriteShardLevelMeta() { blockOnWriteShardLevelMeta = true; } + public void setBlockOnReadIndexMeta() { + blockOnReadIndexMeta = true; + } + public boolean blocked() { return blocked; } @@ -226,7 +233,7 @@ private synchronized boolean blockExecution() { boolean wasBlocked = false; try { while (blockOnDataFiles || blockOnAnyFiles || blockOnWriteIndexFile || - blockAndFailOnWriteSnapFile || blockOnDeleteIndexN || blockOnWriteShardLevelMeta) { + blockAndFailOnWriteSnapFile || blockOnDeleteIndexN || blockOnWriteShardLevelMeta || blockOnReadIndexMeta) { blocked = true; this.wait(); wasBlocked = true; @@ -349,7 +356,11 @@ protected BlobContainer wrapChild(BlobContainer child) { @Override public InputStream readBlob(String name) throws IOException { - maybeIOExceptionOrBlock(name); + if (blockOnReadIndexMeta && name.startsWith(BlobStoreRepository.METADATA_PREFIX) && path().equals(basePath()) == false) { + blockExecutionAndMaybeWait(name); + } else { + maybeIOExceptionOrBlock(name); + } return super.readBlob(name); } @@ -406,8 +417,8 @@ public Map listBlobsByPrefix(String blobNamePrefix) throws public void writeBlob(String blobName, InputStream inputStream, long blobSize, boolean failIfAlreadyExists) throws IOException { maybeIOExceptionOrBlock(blobName); - if (blockOnWriteShardLevelMeta && blobName.startsWith(BlobStoreRepository.SNAPSHOT_PREFIX) && - path().equals(basePath()) == false) { + if (blockOnWriteShardLevelMeta && blobName.startsWith(BlobStoreRepository.SNAPSHOT_PREFIX) + && path().equals(basePath()) == false) { blockExecutionAndMaybeWait(blobName); } super.writeBlob(blobName, inputStream, blobSize, failIfAlreadyExists); From c01ebbcf2d2871d87c406435555024061bb64df3 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 17 Sep 2020 13:27:38 +0200 Subject: [PATCH 49/94] nicer --- .../cluster/SnapshotsInProgress.java | 69 ++------------- .../repositories/FilterRepository.java | 4 +- .../repositories/Repository.java | 5 +- .../repositories/RepositoryShardId.java | 87 +++++++++++++++++++ .../blobstore/BlobStoreRepository.java | 11 ++- .../snapshots/SnapshotsService.java | 46 +++++----- .../RepositoriesServiceTests.java | 2 +- .../index/shard/RestoreOnlyRepository.java | 3 +- .../xpack/ccr/repository/CcrRepository.java | 3 +- 9 files changed, 134 insertions(+), 96 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/repositories/RepositoryShardId.java diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index 3c51fc574d883..66e974209eebc 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -35,6 +35,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.repositories.IndexId; +import org.elasticsearch.repositories.RepositoryShardId; import org.elasticsearch.repositories.RepositoryOperation; import org.elasticsearch.snapshots.Snapshot; import org.elasticsearch.snapshots.SnapshotId; @@ -136,7 +137,7 @@ public static class Entry implements Writeable, ToXContent, RepositoryOperation @Nullable private final SnapshotId source; - private final ImmutableOpenMap clones; + private final ImmutableOpenMap clones; @Nullable private final Map userMetadata; @Nullable private final String failure; @@ -146,7 +147,7 @@ public Entry(Snapshot snapshot, boolean includeGlobalState, boolean partial, Sta List dataStreams, long startTime, long repositoryStateId, ImmutableOpenMap shards, String failure, Map userMetadata, Version version, @Nullable SnapshotId source, - @Nullable ImmutableOpenMap clones) { + @Nullable ImmutableOpenMap clones) { this.state = state; this.snapshot = snapshot; this.includeGlobalState = includeGlobalState; @@ -188,7 +189,7 @@ private Entry(StreamInput in) throws IOException { } if (in.getVersion().onOrAfter(SnapshotsService.CLONE_SNAPSHOT_VERSION)) { source = in.readOptionalWriteable(SnapshotId::new); - clones = in.readImmutableMap(RepoShardId::new, ShardSnapshotStatus::readFrom); + clones = in.readImmutableMap(RepositoryShardId::new, ShardSnapshotStatus::readFrom); } else { source = null; clones = ImmutableOpenMap.of(); @@ -227,7 +228,7 @@ public Entry withRepoGen(long newRepoGen) { userMetadata, version, source, clones); } - public Entry withClones(ImmutableOpenMap updatedClones) { + public Entry withClones(ImmutableOpenMap updatedClones) { if (updatedClones.equals(clones)) { return this; } @@ -363,7 +364,7 @@ public SnapshotId source() { return source; } - public ImmutableOpenMap clones() { + public ImmutableOpenMap clones() { return clones; } @@ -485,7 +486,7 @@ public boolean isFragment() { * @return true if all shards have completed (either successfully or failed), false otherwise */ public static boolean completed(ObjectContainer shards, - ImmutableOpenMap clones) { + ImmutableOpenMap clones) { for (ObjectCursor status : shards) { if (status.value.state().completed == false) { return false; @@ -753,60 +754,8 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par return builder; } - public static RepoShardId repoShardId(IndexId indexId, int shard) { - return new RepoShardId(indexId, shard); - } - - public static final class RepoShardId implements Writeable { - - private final IndexId index; - - private final int shard; - - private RepoShardId(IndexId index, int shard) { - assert index != null; - this.index = index; - this.shard = shard; - } - - private RepoShardId(StreamInput in) throws IOException { - this(new IndexId(in), in.readVInt()); - } - - public IndexId index() { - return index; - } - - public String indexName() { - return index.getName(); - } - - public int shardId() { - return shard; - } - - @Override - public int hashCode() { - return Objects.hash(index, shard); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj instanceof RepoShardId == false) { - return false; - } - final RepoShardId that = (RepoShardId) obj; - return that.index.equals(index) && that.shard == shard; - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - index.writeTo(out); - out.writeVInt(shard); - } + public static RepositoryShardId repoShardId(IndexId indexId, int shard) { + return new RepositoryShardId(indexId, shard); } public enum ShardState implements Writeable { diff --git a/server/src/main/java/org/elasticsearch/repositories/FilterRepository.java b/server/src/main/java/org/elasticsearch/repositories/FilterRepository.java index d67d75d89f777..1bcc8eb1f6efe 100644 --- a/server/src/main/java/org/elasticsearch/repositories/FilterRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/FilterRepository.java @@ -150,9 +150,9 @@ public void executeConsistentStateUpdate(Function listener) { - in.cloneShardSnapshot(source, target, index, shardId, shardGeneration, listener); + in.cloneShardSnapshot(source, target, shardId, shardGeneration, listener); } @Override diff --git a/server/src/main/java/org/elasticsearch/repositories/Repository.java b/server/src/main/java/org/elasticsearch/repositories/Repository.java index a6f9065cbfa4b..322f94c725f49 100644 --- a/server/src/main/java/org/elasticsearch/repositories/Repository.java +++ b/server/src/main/java/org/elasticsearch/repositories/Repository.java @@ -267,12 +267,11 @@ void executeConsistentStateUpdate(Function listener); /** diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoryShardId.java b/server/src/main/java/org/elasticsearch/repositories/RepositoryShardId.java new file mode 100644 index 0000000000000..617ca877b2525 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoryShardId.java @@ -0,0 +1,87 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.repositories; + +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; + +import java.io.IOException; +import java.util.Objects; + +/** + * Represents a shard snapshot in a repository. + */ +public final class RepositoryShardId implements Writeable { + + private final IndexId index; + + private final int shard; + + public RepositoryShardId(IndexId index, int shard) { + assert index != null; + this.index = index; + this.shard = shard; + } + + public RepositoryShardId(StreamInput in) throws IOException { + this(new IndexId(in), in.readVInt()); + } + + public IndexId index() { + return index; + } + + public String indexName() { + return index.getName(); + } + + public int shardId() { + return shard; + } + + @Override + public int hashCode() { + return Objects.hash(index, shard); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj instanceof RepositoryShardId == false) { + return false; + } + final RepositoryShardId that = (RepositoryShardId) obj; + return that.index.equals(index) && that.shard == shard; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + index.writeTo(out); + out.writeVInt(shard); + } + + @Override + public String toString() { + return "RepositoryShardId{" + index + "}{" + shard + "}"; + } +} diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index e04165e2183d2..d686b7c67582a 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -104,6 +104,7 @@ import org.elasticsearch.repositories.RepositoryData; import org.elasticsearch.repositories.RepositoryException; import org.elasticsearch.repositories.RepositoryOperation; +import org.elasticsearch.repositories.RepositoryShardId; import org.elasticsearch.repositories.RepositoryStats; import org.elasticsearch.repositories.RepositoryVerificationException; import org.elasticsearch.repositories.ShardGenerations; @@ -397,16 +398,18 @@ public TimeValue timeout() { } @Override - public void cloneShardSnapshot(SnapshotId source, SnapshotId target, IndexId index, int shardId, @Nullable String shardGeneration, - ActionListener listener) { + public void cloneShardSnapshot(SnapshotId source, SnapshotId target, RepositoryShardId shardId, + @Nullable String shardGeneration, ActionListener listener) { + final IndexId index = shardId.index(); + final int shardNum = shardId.shardId(); final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); executor.execute(ActionRunnable.supply(listener, () -> { - final BlobContainer shardContainer = shardContainer(index, shardId); + final BlobContainer shardContainer = shardContainer(index, shardNum); final BlobStoreIndexShardSnapshots existingSnapshots = buildBlobStoreIndexShardSnapshots(Collections.emptySet(), shardContainer, shardGeneration).v1(); for (SnapshotFiles existingSnapshot : existingSnapshots) { if (existingSnapshot.snapshot().equals(target.getName())) { - throw new RepositoryException(metadata.name(), "Can't create clone of [" + index + "][" + shardId + "] for snapshot [" + throw new RepositoryException(metadata.name(), "Can't create clone of [" + shardId + "] for snapshot [" + target + "]. A snapshot by that name already exists for this shard."); } } diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 663198834a03c..f7f8ea615586b 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -47,7 +47,7 @@ import org.elasticsearch.cluster.RestoreInProgress; import org.elasticsearch.cluster.SnapshotDeletionsInProgress; import org.elasticsearch.cluster.SnapshotsInProgress; -import org.elasticsearch.cluster.SnapshotsInProgress.RepoShardId; +import org.elasticsearch.repositories.RepositoryShardId; import org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus; import org.elasticsearch.cluster.SnapshotsInProgress.ShardState; import org.elasticsearch.cluster.SnapshotsInProgress.State; @@ -457,19 +457,18 @@ public ClusterState execute(ClusterState currentState) { final ShardGenerations shardGenerations = repoData.shardGenerations(); for (int i = 0; i < updatedEntries.size(); i++) { if (cloneEntry.equals(updatedEntries.get(i))) { - final ImmutableOpenMap.Builder clonesBuilder = + final ImmutableOpenMap.Builder clonesBuilder = ImmutableOpenMap.builder(); // TODO: needlessly complex, just deal with repo shard id directly final Set busyShards = busyShardsForRepo( repository.getMetadata().name(), snapshotsInProgress, currentState.metadata()); - final Set busyShardsInRepo = busyShards + final Set busyShardsInRepo = busyShards .stream() - .map(shardId -> SnapshotsInProgress.repoShardId( - inFlightIndexIds.get(shardId.getIndexName()), shardId.getId())) + .map(shardId -> new RepositoryShardId(inFlightIndexIds.get(shardId.getIndexName()), shardId.getId())) .collect(Collectors.toSet()); for (Tuple count : counts) { for (int shardId = 0; shardId < count.v2(); shardId++) { - final RepoShardId repoShardId = SnapshotsInProgress.repoShardId(count.v1(), shardId); + final RepositoryShardId repoShardId = new RepositoryShardId(count.v1(), shardId); if (busyShardsInRepo.contains(repoShardId)) { clonesBuilder.put(repoShardId, ShardSnapshotStatus.UNASSIGNED_QUEUED); } else { @@ -501,12 +500,12 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS if (updatedEntry != null) { final Snapshot target = updatedEntry.snapshot(); final SnapshotId sourceSnapshot = updatedEntry.source(); - for (ObjectObjectCursor indexClone : updatedEntry.clones()) { + for (ObjectObjectCursor indexClone : updatedEntry.clones()) { final ShardSnapshotStatus shardStatusBefore = indexClone.value; if (shardStatusBefore.state() != ShardState.INIT) { continue; } - final RepoShardId repoShardId = indexClone.key; + final RepositoryShardId repoShardId = indexClone.key; runReadyClone(target, sourceSnapshot, shardStatusBefore, repoShardId, repository); } } else { @@ -518,13 +517,13 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS }, "start snapshot clone", onFailure), onFailure); } - private final Set currentlyCloning = Collections.synchronizedSet(new HashSet<>()); + private final Set currentlyCloning = Collections.synchronizedSet(new HashSet<>()); private void runReadyClone(Snapshot target, SnapshotId sourceSnapshot, - ShardSnapshotStatus shardStatusBefore, RepoShardId repoShardId, Repository repository) { + ShardSnapshotStatus shardStatusBefore, RepositoryShardId repoShardId, Repository repository) { SnapshotId targetSnapshot = target.getSnapshotId(); if (currentlyCloning.add(repoShardId)) { - repository.cloneShardSnapshot(sourceSnapshot, targetSnapshot, repoShardId.index(), repoShardId.shardId(), + repository.cloneShardSnapshot(sourceSnapshot, targetSnapshot, repoShardId, shardStatusBefore.generation(), ActionListener.wrap( generation -> innerUpdateSnapshotState( new ShardSnapshotUpdate(target, @@ -538,8 +537,7 @@ private void runReadyClone(Snapshot target, SnapshotId sourceSnapshot, }, e -> { currentlyCloning.remove(repoShardId); - logger.warn( - "Cluster state update after successful shard clone [{}] failed", repoShardId); + logger.warn("Cluster state update after successful shard clone [{}] failed", repoShardId); failAllListenersOnMasterFailOver(e); } @@ -2106,7 +2104,7 @@ private static Set busyShardsForRepo(String repoName, @Nullable Snapsho } } } else { - for (ObjectObjectCursor clone : runningSnapshot.clones()) { + for (ObjectObjectCursor clone : runningSnapshot.clones()) { final ShardSnapshotStatus shardState = clone.value; if (shardState.isActive()) { IndexMetadata indexMeta = metadata.index(clone.key.indexName()); @@ -2215,14 +2213,14 @@ private static final class ShardSnapshotUpdate { private final Snapshot snapshot; @Nullable - private final RepoShardId repoShardId; + private final RepositoryShardId repoShardId; @Nullable private final ShardId shardId; private final SnapshotsInProgress.ShardSnapshotStatus status; - private ShardSnapshotUpdate(Snapshot snapshot, @Nullable RepoShardId repoShardId, @Nullable ShardId shardId, + private ShardSnapshotUpdate(Snapshot snapshot, @Nullable RepositoryShardId repoShardId, @Nullable ShardId shardId, ShardSnapshotStatus status) { this.snapshot = snapshot; this.repoShardId = repoShardId; @@ -2238,7 +2236,7 @@ public ShardId shardId() { return shardId; } - public RepoShardId repoShardId() { + public RepositoryShardId repoShardId() { return repoShardId; } @@ -2259,7 +2257,7 @@ public ClusterTasksResult execute(ClusterState currentState int startedCount = 0; final List entries = new ArrayList<>(); final Map> reusedShardIdsByRepo = new HashMap<>(); - final Map> reusedRepoShardIdsByRepo = new HashMap<>(); + final Map> reusedRepoShardIdsByRepo = new HashMap<>(); final String localNodeId = currentState.nodes().getLocalNodeId(); // Tasks to check for updates for running snapshots. final List unconsumedTasks = new ArrayList<>(tasks); @@ -2271,7 +2269,7 @@ public ClusterTasksResult execute(ClusterState currentState continue; } ImmutableOpenMap.Builder shards = null; - ImmutableOpenMap.Builder clones = null; + ImmutableOpenMap.Builder clones = null; for (Iterator iterator = unconsumedTasks.iterator(); iterator.hasNext(); ) { final ShardSnapshotUpdate updateSnapshotState = iterator.next(); final Snapshot updatedSnapshot = updateSnapshotState.snapshot(); @@ -2279,12 +2277,12 @@ public ClusterTasksResult execute(ClusterState currentState if (entry.repository().equals(updatedRepository) == false) { continue; } - final Set reusedShardIds = + final Set reusedShardIds = reusedRepoShardIdsByRepo.computeIfAbsent(updatedRepository, k -> new HashSet<>()); final Set reusedConcreteShardIds = reusedShardIdsByRepo.computeIfAbsent(updatedRepository, k -> new HashSet<>()); if (updateSnapshotState.isClone()) { - final RepoShardId finishedShardId = updateSnapshotState.repoShardId(); + final RepositoryShardId finishedShardId = updateSnapshotState.repoShardId(); if (entry.snapshot().getSnapshotId().equals(updatedSnapshot.getSnapshotId())) { logger.trace("[{}] Updating shard clone [{}] with status [{}]", updatedSnapshot, finishedShardId, updateSnapshotState.status().state()); @@ -2401,14 +2399,14 @@ public ClusterTasksResult execute(ClusterState currentState shards.put(finishedShardId, new ShardSnapshotStatus(finishedStatus.nodeId(), finishedStatus.generation())); reusedConcreteShardIds.add(finishedShardId); - final RepoShardId repoShardId = SnapshotsInProgress.repoShardId( + final RepositoryShardId repoShardId = new RepositoryShardId( indicesLookup.get(finishedShardId.getIndexName()), finishedShardId.getId()); reusedShardIds.add(repoShardId); iterator.remove(); } else { final IndexId indexId = indicesLookup.get(finishedShardId.getIndexName()); if (indexId != null) { - final RepoShardId repoShardId = SnapshotsInProgress.repoShardId(indexId, finishedShardId.getId()); + final RepositoryShardId repoShardId = new RepositoryShardId(indexId, finishedShardId.getId()); final ShardSnapshotStatus existingStatus = entry.clones().get(repoShardId); if (existingStatus == null || existingStatus.state() != ShardState.QUEUED) { continue; @@ -2497,7 +2495,7 @@ private void startExecutableClones(SnapshotsInProgress snapshotsInProgress, @Nul for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) { if (entry.source() != null && entry.state() == State.STARTED && (repoName == null || entry.repository().equals(repoName))) { // this is a clone, see if new work is ready - for (ObjectObjectCursor clone : entry.clones()) { + for (ObjectObjectCursor clone : entry.clones()) { if (clone.value.state() == ShardState.INIT) { runReadyClone(entry.snapshot(), entry.source(), clone.value, clone.key, repositoriesService.repository(entry.repository())); diff --git a/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java b/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java index d37720e1062cd..49b4a340f0e33 100644 --- a/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java +++ b/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java @@ -288,7 +288,7 @@ public void executeConsistentStateUpdate(Function listener) { } diff --git a/test/framework/src/main/java/org/elasticsearch/index/shard/RestoreOnlyRepository.java b/test/framework/src/main/java/org/elasticsearch/index/shard/RestoreOnlyRepository.java index a321e734300fb..526106fabdfe0 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/shard/RestoreOnlyRepository.java +++ b/test/framework/src/main/java/org/elasticsearch/index/shard/RestoreOnlyRepository.java @@ -35,6 +35,7 @@ import org.elasticsearch.repositories.IndexMetaDataGenerations; import org.elasticsearch.repositories.Repository; import org.elasticsearch.repositories.RepositoryData; +import org.elasticsearch.repositories.RepositoryShardId; import org.elasticsearch.repositories.ShardGenerations; import org.elasticsearch.snapshots.SnapshotId; import org.elasticsearch.snapshots.SnapshotInfo; @@ -159,7 +160,7 @@ public void executeConsistentStateUpdate(Function listener) { throw new AssertionError("not supported"); } diff --git a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java index 6382f8726c463..ddde485dbc51c 100644 --- a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java +++ b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java @@ -60,6 +60,7 @@ import org.elasticsearch.indices.recovery.RecoveryState; import org.elasticsearch.repositories.IndexId; import org.elasticsearch.repositories.IndexMetaDataGenerations; +import org.elasticsearch.repositories.RepositoryShardId; import org.elasticsearch.repositories.Repository; import org.elasticsearch.repositories.RepositoryData; import org.elasticsearch.repositories.ShardGenerations; @@ -443,7 +444,7 @@ public void executeConsistentStateUpdate(Function listener) { throw new UnsupportedOperationException("Unsupported for repository of type: " + TYPE); } From 9c1d38ca2eb5c2643b964241a032001d9c722d09 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 17 Sep 2020 13:46:27 +0200 Subject: [PATCH 50/94] nicer --- .../snapshots/SnapshotsService.java | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index f7f8ea615586b..2f5543f5ef42a 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -433,7 +433,7 @@ private void startCloning(Repository repository, SnapshotsInProgress.Entry clone final Snapshot targetSnapshot = cloneEntry.snapshot(); final Consumer onFailure = e -> removeFailedSnapshotFromClusterState(targetSnapshot, e, null); repository.getRepositoryData(ActionListener.wrap(repositoryData -> { - for (IndexId index : cloneEntry.indices()) { + for (IndexId index : indices) { executor.execute(ActionRunnable.supply(shardCountListener, () -> { final IndexMetadata metadata = repository.getSnapshotIndexMetaData(repositoryData, sourceSnapshot, index); return Tuple.tuple(index, metadata.getNumberOfShards()); @@ -452,28 +452,27 @@ public ClusterState execute(ClusterState currentState) { final List updatedEntries = new ArrayList<>(snapshotsInProgress.entries()); boolean changed = false; final String localNodeId = currentState.nodes().getLocalNodeId(); - final Map inFlightIndexIds = - getInFlightIndexIds(updatedEntries, repository.getMetadata().name()); + final String repoName = cloneEntry.repository(); + final Map indexIds = getInFlightIndexIds(updatedEntries, repoName); final ShardGenerations shardGenerations = repoData.shardGenerations(); for (int i = 0; i < updatedEntries.size(); i++) { if (cloneEntry.equals(updatedEntries.get(i))) { final ImmutableOpenMap.Builder clonesBuilder = ImmutableOpenMap.builder(); - // TODO: needlessly complex, just deal with repo shard id directly - final Set busyShards = busyShardsForRepo( - repository.getMetadata().name(), snapshotsInProgress, currentState.metadata()); - final Set busyShardsInRepo = busyShards - .stream() - .map(shardId -> new RepositoryShardId(inFlightIndexIds.get(shardId.getIndexName()), shardId.getId())) - .collect(Collectors.toSet()); + // TODO: could be optimized by just dealing with repo shard id directly + final Set busyShardsInRepo = + busyShardsForRepo(repoName, snapshotsInProgress, currentState.metadata()) + .stream() + .map(shardId -> new RepositoryShardId(indexIds.get(shardId.getIndexName()), shardId.getId())) + .collect(Collectors.toSet()); for (Tuple count : counts) { for (int shardId = 0; shardId < count.v2(); shardId++) { final RepositoryShardId repoShardId = new RepositoryShardId(count.v1(), shardId); if (busyShardsInRepo.contains(repoShardId)) { clonesBuilder.put(repoShardId, ShardSnapshotStatus.UNASSIGNED_QUEUED); } else { - clonesBuilder.put(repoShardId, new ShardSnapshotStatus(localNodeId, - shardGenerations.getShardGen(count.v1(), shardId))); + clonesBuilder.put(repoShardId, + new ShardSnapshotStatus(localNodeId, shardGenerations.getShardGen(count.v1(), shardId))); } } } @@ -2270,6 +2269,7 @@ public ClusterTasksResult execute(ClusterState currentState } ImmutableOpenMap.Builder shards = null; ImmutableOpenMap.Builder clones = null; + Map indicesLookup = null; for (Iterator iterator = unconsumedTasks.iterator(); iterator.hasNext(); ) { final ShardSnapshotUpdate updateSnapshotState = iterator.next(); final Snapshot updatedSnapshot = updateSnapshotState.snapshot(); @@ -2381,9 +2381,9 @@ public ClusterTasksResult execute(ClusterState currentState executedTasks.add(updateSnapshotState); changedCount++; } else if (executedTasks.contains(updateSnapshotState)) { - // TODO: horribly inefficient obv. - final Map indicesLookup = - entry.indices().stream().collect(Collectors.toMap(IndexId::getName, Function.identity())); + if (indicesLookup == null) { + indicesLookup = entry.indices().stream().collect(Collectors.toMap(IndexId::getName, Function.identity())); + } if (reusedConcreteShardIds.contains(finishedShardId) == false) { if (entry.source() == null) { final ShardSnapshotStatus existingStatus = entry.shards().get(finishedShardId); From c011f13d6448d092c132b34efdc1ceaf2dcd052b Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 17 Sep 2020 13:47:59 +0200 Subject: [PATCH 51/94] cleaner --- .../java/org/elasticsearch/snapshots/SnapshotsService.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 2f5543f5ef42a..fec6c715662aa 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -482,8 +482,7 @@ public ClusterState execute(ClusterState currentState) { break; } } - return updateWithSnapshots( - currentState, changed ? SnapshotsInProgress.of(updatedEntries) : null, null); + return updateWithSnapshots(currentState, changed ? SnapshotsInProgress.of(updatedEntries) : null, null); } @Override From 51bfc4a9cda6edc836955ee675d0469b5698dd8e Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 17 Sep 2020 15:33:33 +0200 Subject: [PATCH 52/94] fix more edge cases --- .../snapshots/CloneSnapshotIT.java | 36 ++++++++++++++- .../snapshots/ConcurrentSnapshotsIT.java | 5 --- .../cluster/SnapshotsInProgress.java | 26 ++++++++--- .../snapshots/SnapshotsService.java | 44 +++++++++++-------- .../AbstractSnapshotIntegTestCase.java | 6 +++ 5 files changed, 84 insertions(+), 33 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index 7f600ffb38d8e..92f3994f76204 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -347,10 +347,10 @@ public void testMasterFailoverDuringCloneStep2() throws Exception { final String sourceSnapshot = "source-snapshot"; createFullSnapshot(repoName, sourceSnapshot); - final String targetSnapshot1 = "target-snapshot"; + final String targetSnapshot = "target-snapshot"; blockMasterOnShardClone(repoName); final ActionFuture cloneFuture = dataNodeClient().admin().cluster() - .prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot1).setIndices(testIndex).execute(); + .prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(testIndex).execute(); awaitNSnapshotsInProgress(1); final String masterNode = internalCluster().getMasterName(); waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L)); @@ -366,6 +366,38 @@ public void testMasterFailoverDuringCloneStep2() throws Exception { } } + public void testExceptionDuringShardClone() throws Exception { + // large snapshot pool so blocked snapshot threads from cloning don't prevent concurrent snapshot finalizations + internalCluster().startMasterOnlyNodes(3, LARGE_SNAPSHOT_POOL_SETTINGS); + internalCluster().startDataOnlyNode(); + final String repoName = "test-repo"; + createRepository(repoName, "mock"); + final String testIndex = "index-test"; + createSingleShardIndexWithContent(testIndex); + + final String sourceSnapshot = "source-snapshot"; + createFullSnapshot(repoName, sourceSnapshot); + + final String targetSnapshot = "target-snapshot"; + blockMasterFromFinalizingSnapshotOnSnapFile(repoName); + final ActionFuture cloneFuture = dataNodeClient().admin().cluster() + .prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(testIndex).execute(); + awaitNSnapshotsInProgress(1); + final String masterNode = internalCluster().getMasterName(); + waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L)); + unblockNode(repoName, masterNode); + expectThrows(SnapshotException.class, cloneFuture::actionGet); + awaitNoMoreRunningOperations(internalCluster().getMasterName()); + + final RepositoryData repositoryData = getRepositoryData(repoName); + final Collection snapshotIds = repositoryData.getSnapshotIds(); + assertThat(snapshotIds, hasSize(1)); + for (SnapshotId snapshotId : snapshotIds) { + assertThat(repositoryData.getSnapshotState(snapshotId), is(SnapshotState.SUCCESS)); + } + assertAcked(startDelete(repoName, sourceSnapshot).get()); + } + private void blockMasterOnReadIndexMeta(String repoName) { ((MockRepository)internalCluster().getCurrentMasterNodeInstance(RepositoriesService.class).repository(repoName)) .setBlockOnReadIndexMeta(); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java index d6a984bb9d928..c843532a5d543 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java @@ -1272,11 +1272,6 @@ private ActionFuture startDeleteFromNonMasterClient(String return internalCluster().nonMasterClient().admin().cluster().prepareDeleteSnapshot(repoName, snapshotName).execute(); } - private ActionFuture startDelete(String repoName, String snapshotName) { - logger.info("--> deleting snapshot [{}] from repo [{}]", snapshotName, repoName); - return client().admin().cluster().prepareDeleteSnapshot(repoName, snapshotName).execute(); - } - private ActionFuture startFullSnapshotFromNonMasterClient(String repoName, String snapshotName) { logger.info("--> creating full snapshot [{}] to repo [{}] from non master client", snapshotName, repoName); return internalCluster().nonMasterClient().admin().cluster().prepareCreateSnapshot(repoName, snapshotName) diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index 66e974209eebc..88567b5ddf93d 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -167,7 +167,7 @@ public Entry(Snapshot snapshot, boolean includeGlobalState, boolean partial, Sta } else { this.clones = clones; } - assert assertShardsConsistent(source, state, indices, shards); + assert assertShardsConsistent(source, state, indices, shards, this.clones); } private Entry(StreamInput in) throws IOException { @@ -197,7 +197,8 @@ private Entry(StreamInput in) throws IOException { } private static boolean assertShardsConsistent(SnapshotId source, State state, List indices, - ImmutableOpenMap shards) { + ImmutableOpenMap shards, + ImmutableOpenMap clones) { if ((state == State.INIT || state == State.ABORTED) && shards.isEmpty()) { return true; } @@ -211,12 +212,14 @@ private static boolean assertShardsConsistent(SnapshotId source, State state, Li assert source == null || indexNames.isEmpty() == false : "No empty snapshot clones allowed"; assert source != null || indexNames.equals(indexNamesInShards) : "Indices in shards " + indexNamesInShards + " differ from expected indices " + indexNames + " for state [" + state + "]"; - final boolean shardsCompleted = completed(shards.values(), ImmutableOpenMap.of()); - if (source == null) { + final boolean shardsCompleted = completed(shards.values(), clones); + if (source == null || clones.isEmpty() == false) { assert (state.completed() && shardsCompleted) || (state.completed() == false && shardsCompleted == false) : "Completed state must imply all shards completed but saw state [" + state + "] and shards " + shards; - } else { - // TODO: assert things about clones + } + if (source != null && state.completed()) { + assert hasFailures(clones) == false || state == State.FAILED + : "Failed shard clones in [" + clones + "] but state was [" + state + "]"; } return true; } @@ -233,7 +236,7 @@ public Entry withClones(ImmutableOpenMap return this; } return new Entry(snapshot, includeGlobalState, partial, - completed(shards.values(), updatedClones) ? State.SUCCESS : state, + completed(shards.values(), updatedClones) ? (hasFailures(updatedClones) ? State.FAILED : State.SUCCESS) : state, indices, dataStreams, startTime, repositoryStateId, shards, failure, userMetadata, version, source, updatedClones); } @@ -500,6 +503,15 @@ public static boolean completed(ObjectContainer shards, return true; } + private static boolean hasFailures(ImmutableOpenMap clones) { + for (ObjectCursor value : clones.values()) { + if (value.value.state().failed()) { + return true; + } + } + return false; + } + public static class ShardSnapshotStatus implements Writeable { /** diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index fec6c715662aa..ecf2e0212b024 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -517,33 +517,34 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS private final Set currentlyCloning = Collections.synchronizedSet(new HashSet<>()); - private void runReadyClone(Snapshot target, SnapshotId sourceSnapshot, - ShardSnapshotStatus shardStatusBefore, RepositoryShardId repoShardId, Repository repository) { - SnapshotId targetSnapshot = target.getSnapshotId(); + private void runReadyClone(Snapshot target, SnapshotId sourceSnapshot, ShardSnapshotStatus shardStatusBefore, + RepositoryShardId repoShardId, Repository repository) { + final SnapshotId targetSnapshot = target.getSnapshotId(); + final String localNodeId = clusterService.localNode().getId(); if (currentlyCloning.add(repoShardId)) { repository.cloneShardSnapshot(sourceSnapshot, targetSnapshot, repoShardId, shardStatusBefore.generation(), ActionListener.wrap( generation -> innerUpdateSnapshotState( - new ShardSnapshotUpdate(target, - repoShardId, null, - new ShardSnapshotStatus(clusterService.localNode().getId(), ShardState.SUCCESS, generation)), - ActionListener.wrap( - v -> { - currentlyCloning.remove(repoShardId); - logger.trace("Marked [{}] as successfully cloned from [{}] to [{}]", repoShardId, - sourceSnapshot, targetSnapshot); - }, + new ShardSnapshotUpdate(target, repoShardId, null, + new ShardSnapshotStatus(localNodeId, ShardState.SUCCESS, generation)), + ActionListener.runBefore(ActionListener.wrap( + v -> logger.trace("Marked [{}] as successfully cloned from [{}] to [{}]", repoShardId, + sourceSnapshot, targetSnapshot), e -> { - currentlyCloning.remove(repoShardId); logger.warn("Cluster state update after successful shard clone [{}] failed", repoShardId); failAllListenersOnMasterFailOver(e); } - - )), e -> { - currentlyCloning.remove(repoShardId); - // TODO: error handling, cleanup clone right away on partial failure? - throw new AssertionError(e); - })); + ), () -> currentlyCloning.remove(repoShardId))), e -> innerUpdateSnapshotState( + new ShardSnapshotUpdate(target, repoShardId, null, new ShardSnapshotStatus(localNodeId, + ShardState.FAILED, "failed to clone shard snapshot", null)), + ActionListener.runBefore(ActionListener.wrap( + v -> logger.trace("Marked [{}] as failed clone from [{}] to [{}]", repoShardId, + sourceSnapshot, targetSnapshot), + ex -> { + logger.warn("Cluster state update after failed shard clone [{}] failed", repoShardId); + failAllListenersOnMasterFailOver(ex); + } + ), () -> currentlyCloning.remove(repoShardId))))); } } @@ -1043,6 +1044,11 @@ private static boolean removedNodesCleanupNeeded(SnapshotsInProgress snapshotsIn * @param entry snapshot */ private void endSnapshot(SnapshotsInProgress.Entry entry, Metadata metadata, @Nullable RepositoryData repositoryData) { + if (entry.source() != null && entry.state() == State.FAILED) { + logger.debug("Removing failed snapshot clone [{}] from cluster state", entry); + removeFailedSnapshotFromClusterState(entry.snapshot(), new SnapshotException(entry.snapshot(), entry.failure()), null); + return; + } final boolean newFinalization = endingSnapshots.add(entry.snapshot()); final String repoName = entry.repository(); if (tryEnterRepoLoop(repoName)) { diff --git a/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java index 1d4bb753b9ace..3af78ab728958 100644 --- a/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java @@ -25,6 +25,7 @@ import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterStateObserver; import org.elasticsearch.cluster.SnapshotDeletionsInProgress; @@ -506,4 +507,9 @@ protected void createIndexWithContent(String indexName, Settings indexSettings) ensureGreen(indexName); indexDoc(indexName, "some_id", "foo", "bar"); } + + protected ActionFuture startDelete(String repoName, String snapshotName) { + logger.info("--> deleting snapshot [{}] from repo [{}]", snapshotName, repoName); + return client().admin().cluster().prepareDeleteSnapshot(repoName, snapshotName).execute(); + } } From 7eb9762190330f5043dc9bbcc230b3076bc7afc1 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 17 Sep 2020 16:43:34 +0200 Subject: [PATCH 53/94] shorter --- .../cluster/SnapshotsInProgress.java | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index 88567b5ddf93d..1a6f6eb462c55 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -565,7 +565,7 @@ private boolean assertConsistent() { public static ShardSnapshotStatus readFrom(StreamInput in) throws IOException { String nodeId = in.readOptionalString(); - final ShardState state = ShardState.readFrom(in); + final ShardState state = ShardState.fromValue(in.readByte()); final String generation = in.readOptionalString(); final String reason = in.readOptionalString(); if (state == ShardState.QUEUED) { @@ -604,7 +604,7 @@ public boolean isActive() { @Override public void writeTo(StreamOutput out) throws IOException { out.writeOptionalString(nodeId); - state.writeTo(out); + out.writeByte(state.value); out.writeOptionalString(generation); out.writeOptionalString(reason); } @@ -766,11 +766,7 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par return builder; } - public static RepositoryShardId repoShardId(IndexId indexId, int shard) { - return new RepositoryShardId(indexId, shard); - } - - public enum ShardState implements Writeable { + public enum ShardState { INIT((byte) 0, false, false), SUCCESS((byte) 2, true, false), FAILED((byte) 3, true, true), @@ -805,8 +801,7 @@ public boolean failed() { return failed; } - public static ShardState readFrom(StreamInput in) throws IOException { - final byte value = in.readByte(); + public static ShardState fromValue(byte value) { switch (value) { case 0: return INIT; @@ -826,10 +821,5 @@ public static ShardState readFrom(StreamInput in) throws IOException { throw new IllegalArgumentException("No shard snapshot state for value [" + value + "]"); } } - - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeByte(value); - } } } From f1bf399ce49e1ea9c013c59aa4f8dd27fe8f9d21 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Sun, 20 Sep 2020 13:39:16 +0200 Subject: [PATCH 54/94] fix docs? --- docs/reference/snapshot-restore/index.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/reference/snapshot-restore/index.asciidoc b/docs/reference/snapshot-restore/index.asciidoc index 8286c73276864..805b923c6d56d 100644 --- a/docs/reference/snapshot-restore/index.asciidoc +++ b/docs/reference/snapshot-restore/index.asciidoc @@ -107,6 +107,7 @@ understand the time requirements before proceeding. -- include::register-repository.asciidoc[] +include::apis/clone-snapshot-api.asciidoc[] include::take-snapshot.asciidoc[] include::restore-snapshot.asciidoc[] include::monitor-snapshot-restore.asciidoc[] From eed621424feb884d663b2e4dbf4dfac6c30b7e21 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Sun, 20 Sep 2020 19:04:19 +0200 Subject: [PATCH 55/94] remove settings for now --- .../apis/clone-snapshot-api.asciidoc | 16 ++-------- .../snapshots/CloneSnapshotIT.java | 31 ------------------- .../snapshots/clone/CloneSnapshotRequest.java | 23 +------------- .../clone/CloneSnapshotRequestBuilder.java | 8 +---- .../cluster/RestCloneSnapshotAction.java | 7 +---- 5 files changed, 5 insertions(+), 80 deletions(-) diff --git a/docs/reference/snapshot-restore/apis/clone-snapshot-api.asciidoc b/docs/reference/snapshot-restore/apis/clone-snapshot-api.asciidoc index 60d6ccbc67fb8..75875c522dc02 100644 --- a/docs/reference/snapshot-restore/apis/clone-snapshot-api.asciidoc +++ b/docs/reference/snapshot-restore/apis/clone-snapshot-api.asciidoc @@ -10,11 +10,7 @@ Clones part or all of a snapshot into a new snapshot. ---- PUT /_snapshot/my_repository/source_snapshot/_clone/target_snapshot { - "indices": "index_a, index_b", - "excluded_settings": "setting_a", - "index_settings": { - "setting_key": "setting_value" - } + "indices": "index_a, index_b" } ---- // TEST[skip:TODO] @@ -53,12 +49,4 @@ fails and returns an error. Defaults to `30s`. `indices`:: (Required, string) A comma-separated list of indices to include in the snapshot. -<> is supported. - -`settings`:: -(Optional, object) -Optional index settings to be applied to all index clones relative to the source index settings. - -`excluded_settings`:: -(Optional, string) -A comma-separated list of index settings that should not be included in the cloned index snapshots. \ No newline at end of file +<> is supported. \ No newline at end of file diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index 23989370f578d..e94d0ad499c7f 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -23,8 +23,6 @@ import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotIndexStatus; import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus; import org.elasticsearch.action.support.master.AcknowledgedResponse; -import org.elasticsearch.cluster.metadata.IndexMetadata; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.repositories.RepositoryData; @@ -275,35 +273,6 @@ public void testBackToBackClonesForIndexNotInCluster() throws Exception { } } - @AwaitsFix(bugUrl = "TODO if we want it") - public void testCloneSnapshotWithIndexSettingUpdates() throws Exception { - internalCluster().startMasterOnlyNode(); - internalCluster().startDataOnlyNode(); - final String repoName = "repo-name"; - createRepository(repoName, "fs"); - - final String indexName = "index-1"; - createIndexWithRandomDocs(indexName, randomIntBetween(5, 10)); - final String sourceSnapshot = "source-snapshot"; - createFullSnapshot(repoName, sourceSnapshot); - - indexRandomDocs(indexName, randomIntBetween(20, 100)); - - final String targetSnapshot = "target-snapshot"; - assertAcked(client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexName) - .setIndexSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build()).get()); - - final RestoreInfo restoreInfo = client().admin().cluster() - .prepareRestoreSnapshot(repoName, targetSnapshot).setIndices(indexName).setRenamePattern("(.+)") - .setRenameReplacement("$1-copy").setWaitForCompletion(true).get().getRestoreInfo(); - assertEquals(restoreInfo.successfulShards(), restoreInfo.totalShards()); - - final String restoredIndex = indexName + "-copy"; - final Settings settings = - client().admin().indices().prepareGetIndex().setIndices(restoredIndex).get().getSettings().get(restoredIndex); - assertEquals(settings.get(IndexMetadata.SETTING_NUMBER_OF_REPLICAS), "1"); - } - public void testMasterFailoverDuringCloneStep1() throws Exception { // large snapshot pool so blocked snapshot threads from cloning don't prevent concurrent snapshot finalizations internalCluster().startMasterOnlyNodes(3, LARGE_SNAPSHOT_POOL_SETTINGS); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java index 32949ff16ee05..de88dd81351de 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java @@ -25,7 +25,6 @@ import org.elasticsearch.action.support.master.MasterNodeRequest; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.settings.Settings; import java.io.IOException; @@ -41,10 +40,6 @@ public class CloneSnapshotRequest extends MasterNodeRequest { @@ -37,16 +36,11 @@ protected CloneSnapshotRequestBuilder(ElasticsearchClient client, ActionType action, String repository, String source, String target) { this(client, action, - new CloneSnapshotRequest(repository, source, target, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY, Settings.EMPTY)); + new CloneSnapshotRequest(repository, source, target, Strings.EMPTY_ARRAY)); } public CloneSnapshotRequestBuilder setIndices(String... indices) { request.indices(indices); return this; } - - public CloneSnapshotRequestBuilder setIndexSettings(Settings settings) { - request.indexSettings(settings); - return this; - } } diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestCloneSnapshotAction.java b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestCloneSnapshotAction.java index adb0815f95cb2..158a4ed42655a 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestCloneSnapshotAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestCloneSnapshotAction.java @@ -21,7 +21,6 @@ import org.elasticsearch.action.admin.cluster.snapshots.clone.CloneSnapshotRequest; import org.elasticsearch.client.node.NodeClient; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestRequest; @@ -53,13 +52,9 @@ public String getName() { @Override public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException { Map body = request.contentParser().mapOrdered(); - final Object indexSettings = body.get("index_settings"); final CloneSnapshotRequest cloneSnapshotRequest = new CloneSnapshotRequest( request.param("repository"), request.param("snapshot"), request.param("target_snapshot"), - XContentMapValues.nodeStringArrayValue(body.getOrDefault("indices", Collections.emptyList())), - XContentMapValues.nodeStringArrayValue(body.getOrDefault("excluded_settings", Collections.emptyList())), - indexSettings == null ? Settings.EMPTY : - Settings.builder().loadFromMap(XContentMapValues.nodeMapValue(indexSettings, "index_settings")).build()); + XContentMapValues.nodeStringArrayValue(body.getOrDefault("indices", Collections.emptyList()))); cloneSnapshotRequest.masterNodeTimeout(request.paramAsTime("master_timeout", cloneSnapshotRequest.masterNodeTimeout())); return channel -> client.admin().cluster().cloneSnapshot(cloneSnapshotRequest, new RestToXContentListener<>(channel)); } From 72a7f4bf04d545d079eea1da5a09285006149596 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 28 Sep 2020 14:42:03 +0200 Subject: [PATCH 56/94] less noise --- .../snapshots/ConcurrentSnapshotsIT.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java index e54bb23818124..7f1951ddb7494 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java @@ -106,7 +106,7 @@ public void testLongRunningSnapshotAllowsConcurrentSnapshot() throws Exception { final String dataNode2 = internalCluster().startDataOnlyNode(); ensureStableCluster(3); final String indexFast = "index-fast"; - createSingleShardIndexWithContent(indexFast, dataNode2, dataNode); + createIndexWithContent(indexFast, dataNode2, dataNode); assertSuccessful(client().admin().cluster().prepareCreateSnapshot(repoName, "fast-snapshot") .setIndices(indexFast).setWaitForCompletion(true).execute()); @@ -309,7 +309,7 @@ public void testAbortOneOfMultipleSnapshots() throws Exception { final String dataNode2 = internalCluster().startDataOnlyNode(); ensureStableCluster(3); final String secondIndex = "index-two"; - createSingleShardIndexWithContent(secondIndex, dataNode2, dataNode); + createIndexWithContent(secondIndex, dataNode2, dataNode); final String secondSnapshot = "snapshot-two"; final ActionFuture secondSnapshotResponse = startFullSnapshot(repoName, secondSnapshot); @@ -358,7 +358,7 @@ public void testCascadedAborts() throws Exception { final String dataNode2 = internalCluster().startDataOnlyNode(); ensureStableCluster(3); - createSingleShardIndexWithContent("index-two", dataNode2, dataNode); + createIndexWithContent("index-two", dataNode2, dataNode); final String secondSnapshot = "snapshot-two"; final ActionFuture secondSnapshotResponse = startFullSnapshot(repoName, secondSnapshot); @@ -421,7 +421,7 @@ public void testMasterFailOverWithQueuedDeletes() throws Exception { final String dataNode2 = internalCluster().startDataOnlyNode(); ensureStableCluster(5); final String secondIndex = "index-two"; - createSingleShardIndexWithContent(secondIndex, dataNode2, dataNode); + createIndexWithContent(secondIndex, dataNode2, dataNode); final String secondSnapshot = "snapshot-two"; final ActionFuture secondSnapshotResponse = startFullSnapshot(repoName, secondSnapshot); @@ -1194,7 +1194,7 @@ public void testConcurrentSnapshotWorksWithOldVersionRepo() throws Exception { final String dataNode2 = internalCluster().startDataOnlyNode(); ensureStableCluster(3); final String indexFast = "index-fast"; - createSingleShardIndexWithContent(indexFast, dataNode2, dataNode); + createIndexWithContent(indexFast, dataNode2, dataNode); final ActionFuture createFastSnapshot = client().admin().cluster().prepareCreateSnapshot(repoName, "fast-snapshot").setWaitForCompletion(true).execute(); @@ -1293,7 +1293,7 @@ private ActionFuture startFullSnapshotFromMasterClient(S .setWaitForCompletion(true).execute(); } - private void createSingleShardIndexWithContent(String indexName, String nodeInclude, String nodeExclude) { + private void createIndexWithContent(String indexName, String nodeInclude, String nodeExclude) { createIndexWithContent(indexName, indexSettingsNoReplicas(1) .put("index.routing.allocation.include._name", nodeInclude) .put("index.routing.allocation.exclude._name", nodeExclude).build()); From c0504fb662be0971345e4807630b74e130aeff23 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 29 Sep 2020 12:58:11 +0200 Subject: [PATCH 57/94] remove noisy changes --- .../snapshots/blobstore/BlobStoreIndexShardSnapshot.java | 5 ----- .../index/snapshots/blobstore/SnapshotFiles.java | 4 ---- 2 files changed, 9 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java b/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java index 19002071ec736..497a3147c7c99 100644 --- a/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java +++ b/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java @@ -386,11 +386,6 @@ public BlobStoreIndexShardSnapshot(String snapshot, long indexVersion, List indexFiles, @Nullable Strin this.shardStateIdentifier = shardStateIdentifier; } - public SnapshotFiles clone(String targetName) { - return new SnapshotFiles(targetName, indexFiles, shardStateIdentifier); - } - /** * Creates a new instance with the given snapshot name but otherwise identical to the current instance. */ From 157ec270824769a4b3c3898e8e0f3d54eba1b678 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 29 Sep 2020 13:05:07 +0200 Subject: [PATCH 58/94] reduce noise --- .../main/java/org/elasticsearch/cluster/SnapshotsInProgress.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index 88f11daa27d01..90962fa64f44b 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -127,6 +127,7 @@ public static class Entry implements Writeable, ToXContent, RepositoryOperation private final List dataStreams; private final long startTime; private final long repositoryStateId; + // see #useShardGenerations private final Version version; /** From 56779b4a7172f186cb82db535e77bc1f5e7bbed8 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 29 Sep 2020 13:58:09 +0200 Subject: [PATCH 59/94] align style --- .../snapshots/clone/TransportCloneSnapshotAction.java | 2 +- .../org/elasticsearch/client/ClusterAdminClient.java | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java index fc157504972fd..e6dd7c4e6e7a3 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java @@ -71,4 +71,4 @@ protected void masterOperation(Task task, final CloneSnapshotRequest request, Cl final ActionListener listener) { snapshotsService.cloneSnapshot(request, ActionListener.map(listener, v -> new AcknowledgedResponse(true))); } -} \ No newline at end of file +} diff --git a/server/src/main/java/org/elasticsearch/client/ClusterAdminClient.java b/server/src/main/java/org/elasticsearch/client/ClusterAdminClient.java index 32e9f27dd44a3..69b7ef08831f4 100644 --- a/server/src/main/java/org/elasticsearch/client/ClusterAdminClient.java +++ b/server/src/main/java/org/elasticsearch/client/ClusterAdminClient.java @@ -507,10 +507,19 @@ public interface ClusterAdminClient extends ElasticsearchClient { */ CreateSnapshotRequestBuilder prepareCreateSnapshot(String repository, String name); + /** + * Clones a snapshot. + */ CloneSnapshotRequestBuilder prepareCloneSnapshot(String repository, String source, String target); + /** + * Clones a snapshot. + */ ActionFuture cloneSnapshot(CloneSnapshotRequest request); + /** + * Clones a snapshot. + */ void cloneSnapshot(CloneSnapshotRequest request, ActionListener listener); /** From d504a4e50cf9824c231f9e13893bc49e5c1b884c Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 29 Sep 2020 14:00:41 +0200 Subject: [PATCH 60/94] align style --- .../snapshots/clone/TransportCloneSnapshotAction.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java index e6dd7c4e6e7a3..34be53b9fc4ea 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java @@ -37,7 +37,10 @@ import java.io.IOException; -public class TransportCloneSnapshotAction extends TransportMasterNodeAction { +/** + * Transport action for the clone snapshot operation. + */ +public final class TransportCloneSnapshotAction extends TransportMasterNodeAction { private final SnapshotsService snapshotsService; From ad43cea9bca2af3dc7e6d8caff81683872ca5499 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 29 Sep 2020 14:14:29 +0200 Subject: [PATCH 61/94] making things look nicer --- .../org/elasticsearch/cluster/SnapshotsInProgress.java | 10 +++++++++- .../org/elasticsearch/snapshots/SnapshotsService.java | 5 ++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index 90962fa64f44b..7b4f23c7144eb 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -122,6 +122,9 @@ public static class Entry implements Writeable, ToXContent, RepositoryOperation private final Snapshot snapshot; private final boolean includeGlobalState; private final boolean partial; + /** + * Map of {@link ShardId} to {@link ShardSnapshotStatus} tracking the state of each shard snapshot operation. + */ private final ImmutableOpenMap shards; private final List indices; private final List dataStreams; @@ -131,11 +134,15 @@ public static class Entry implements Writeable, ToXContent, RepositoryOperation private final Version version; /** - * Source snapshot if this is a clone operation or {@code null} if this is a normal snapshot. + * Source snapshot if this is a clone operation or {@code null} if this is a snapshot. */ @Nullable private final SnapshotId source; + /** + * Map of {@link RepositoryShardId} to {@link ShardSnapshotStatus} tracking the state of each shard clone operation in this entry + * the same way {@link #shards} tracks the status of each shard snapshot operation in non-clone entries. + */ private final ImmutableOpenMap clones; @Nullable private final Map userMetadata; @@ -208,6 +215,7 @@ private static boolean assertShardsConsistent(SnapshotId source, State state, Li assert source != null || indexNames.equals(indexNamesInShards) : "Indices in shards " + indexNamesInShards + " differ from expected indices " + indexNames + " for state [" + state + "]"; final boolean shardsCompleted = completed(shards.values(), clones); + // Check state consistency for normal snapshots and started clone operations if (source == null || clones.isEmpty() == false) { assert (state.completed() && shardsCompleted) || (state.completed() == false && shardsCompleted == false) : "Completed state must imply all shards completed but saw state [" + state + "] and shards " + shards; diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 1e2783509b6c8..5752853034fd5 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -328,10 +328,9 @@ public void cloneSnapshot(CloneSnapshotRequest request, ActionListener lis final String repositoryName = request.repository(); final String snapshotName = indexNameExpressionResolver.resolveDateMathExpression(request.target()); validate(repositoryName, snapshotName); - Repository repository = repositoriesService.repository(request.repository()); + Repository repository = repositoriesService.repository(repositoryName); if (repository.isReadOnly()) { - listener.onFailure( - new RepositoryException(repository.getMetadata().name(), "cannot create snapshot in a readonly repository")); + listener.onFailure(new RepositoryException(repositoryName, "cannot create snapshot in a readonly repository")); return; } final SnapshotId snapshotId = new SnapshotId(snapshotName, UUIDs.randomBase64UUID()); From 1a4f1f9814ea083ee66de9d505563f9356f1ea5d Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 29 Sep 2020 15:03:40 +0200 Subject: [PATCH 62/94] drier --- .../snapshots/CloneSnapshotIT.java | 81 +++++++++---------- 1 file changed, 37 insertions(+), 44 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index ddba018897cd2..d1806e568d9aa 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -66,7 +66,7 @@ public void testShardClone() throws Exception { if (useBwCFormat) { initWithSnapshotVersion(repoName, repoPath, SnapshotsService.OLD_SNAPSHOT_FORMAT); // Re-create repo to clear repository data cache - assertAcked(client().admin().cluster().prepareDeleteRepository(repoName).get()); + assertAcked(clusterAdmin().prepareDeleteRepository(repoName).get()); createRepository(repoName, "fs", repoPath); } @@ -133,12 +133,12 @@ public void testCloneSnapshotIndex() throws Exception { indexRandomDocs(indexName, randomIntBetween(20, 100)); if (randomBoolean()) { - assertAcked(client().admin().indices().prepareDelete(indexName)); + assertAcked(admin().indices().prepareDelete(indexName)); } final String targetSnapshot = "target-snapshot"; - assertAcked(client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexName).get()); + assertAcked(startClone(repoName, sourceSnapshot, targetSnapshot, indexName).get()); - final List status = client().admin().cluster().prepareSnapshotStatus(repoName) + final List status = clusterAdmin().prepareSnapshotStatus(repoName) .setSnapshots(sourceSnapshot, targetSnapshot).get().getSnapshots(); assertThat(status, hasSize(2)); final SnapshotIndexStatus status1 = status.get(0).getIndices().get(indexName); @@ -162,18 +162,17 @@ public void testClonePreventsSnapshotDelete() throws Exception { final String targetSnapshot = "target-snapshot"; blockNodeOnAnyFiles(repoName, masterName); - final ActionFuture cloneFuture = - client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexName).execute(); + final ActionFuture cloneFuture = startClone(repoName, sourceSnapshot, targetSnapshot, indexName); waitForBlock(masterName, repoName, TimeValue.timeValueSeconds(30L)); assertFalse(cloneFuture.isDone()); - ConcurrentSnapshotExecutionException ex = expectThrows(ConcurrentSnapshotExecutionException.class, () -> - client().admin().cluster().prepareDeleteSnapshot(repoName, sourceSnapshot).execute().actionGet()); + ConcurrentSnapshotExecutionException ex = expectThrows(ConcurrentSnapshotExecutionException.class, + () -> startDeleteSnapshot(repoName, sourceSnapshot).actionGet()); assertThat(ex.getMessage(), containsString("cannot delete snapshot while it is being cloned")); unblockNode(repoName, masterName); assertAcked(cloneFuture.get()); - final List status = client().admin().cluster().prepareSnapshotStatus(repoName) + final List status = clusterAdmin().prepareSnapshotStatus(repoName) .setSnapshots(sourceSnapshot, targetSnapshot).get().getSnapshots(); assertThat(status, hasSize(2)); final SnapshotIndexStatus status1 = status.get(0).getIndices().get(indexName); @@ -199,8 +198,7 @@ public void testConcurrentCloneAndSnapshot() throws Exception { final ActionFuture snapshot2Future = startFullSnapshotBlockedOnDataNode("snapshot-2", repoName, dataNode); waitForBlock(dataNode, repoName, TimeValue.timeValueSeconds(30L)); - final ActionFuture cloneFuture = - client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexName).execute(); + final ActionFuture cloneFuture = startClone(repoName, sourceSnapshot, targetSnapshot, indexName); awaitNumberOfSnapshotsInProgress(2); unblockNode(repoName, dataNode); assertAcked(cloneFuture.get()); @@ -221,14 +219,13 @@ public void testLongRunningCloneAllowsConcurrentSnapshot() throws Exception { final String targetSnapshot = "target-snapshot"; blockMasterOnShardClone(repoName); - final ActionFuture cloneFuture = - client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexSlow).execute(); + final ActionFuture cloneFuture = startClone(repoName, sourceSnapshot, targetSnapshot, indexSlow); waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L)); final String indexFast = "index-fast"; createIndexWithRandomDocs(indexFast, randomIntBetween(20, 100)); - assertSuccessful(client().admin().cluster().prepareCreateSnapshot(repoName, "fast-snapshot") + assertSuccessful(clusterAdmin().prepareCreateSnapshot(repoName, "fast-snapshot") .setIndices(indexFast).setWaitForCompletion(true).execute()); assertThat(cloneFuture.isDone(), is(false)); @@ -252,12 +249,12 @@ public void testLongRunningSnapshotAllowsConcurrentClone() throws Exception { createIndexWithRandomDocs(indexFast, randomIntBetween(20, 100)); blockDataNode(repoName, dataNode); - final ActionFuture snapshotFuture = client().admin().cluster() + final ActionFuture snapshotFuture = clusterAdmin() .prepareCreateSnapshot(repoName, "fast-snapshot").setIndices(indexFast).setWaitForCompletion(true).execute(); waitForBlock(dataNode, repoName, TimeValue.timeValueSeconds(30L)); final String targetSnapshot = "target-snapshot"; - assertAcked(client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexSlow).get()); + assertAcked(startClone(repoName, sourceSnapshot, targetSnapshot, indexSlow).get()); assertThat(snapshotFuture.isDone(), is(false)); unblockNode(repoName, dataNode); @@ -280,14 +277,12 @@ public void testDeletePreventsClone() throws Exception { final String targetSnapshot = "target-snapshot"; blockNodeOnAnyFiles(repoName, masterName); - final ActionFuture deleteFuture = - client().admin().cluster().prepareDeleteSnapshot(repoName, sourceSnapshot).execute(); + final ActionFuture deleteFuture = startDeleteSnapshot(repoName, sourceSnapshot); waitForBlock(masterName, repoName, TimeValue.timeValueSeconds(30L)); assertFalse(deleteFuture.isDone()); ConcurrentSnapshotExecutionException ex = expectThrows(ConcurrentSnapshotExecutionException.class, () -> - client().admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indexName).execute() - .actionGet()); + startClone(repoName, sourceSnapshot, targetSnapshot, indexName).actionGet()); assertThat(ex.getMessage(), containsString("cannot clone from snapshot that is being deleted")); unblockNode(repoName, masterName); @@ -306,20 +301,18 @@ public void testBackToBackClonesForIndexNotInCluster() throws Exception { final String sourceSnapshot = "source-snapshot"; createFullSnapshot(repoName, sourceSnapshot); - assertAcked(client().admin().indices().prepareDelete(indexBlocked).get()); + assertAcked(admin().indices().prepareDelete(indexBlocked).get()); final String targetSnapshot1 = "target-snapshot"; blockMasterOnShardClone(repoName); - final ActionFuture cloneFuture1 = client().admin().cluster() - .prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot1).setIndices(indexBlocked).execute(); + final ActionFuture cloneFuture1 = startClone(repoName, sourceSnapshot, targetSnapshot1, indexBlocked); waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L)); assertThat(cloneFuture1.isDone(), is(false)); final int extraClones = randomIntBetween(1, 5); final List> extraCloneFutures = new ArrayList<>(extraClones); for (int i = 0; i < extraClones; i++) { - extraCloneFutures.add(client().admin().cluster() - .prepareCloneSnapshot(repoName, sourceSnapshot, "target-snapshot-" + i).setIndices(indexBlocked).execute()); + extraCloneFutures.add(startClone(repoName, sourceSnapshot, "target-snapshot-" + i, indexBlocked)); } awaitNumberOfSnapshotsInProgress(1 + extraClones); for (ActionFuture extraCloneFuture : extraCloneFutures) { @@ -375,12 +368,7 @@ public void testMasterFailoverDuringCloneStep1() throws Exception { expectThrows(SnapshotException.class, cloneFuture::actionGet); awaitNoMoreRunningOperations(internalCluster().getMasterName()); - final RepositoryData repositoryData = getRepositoryData(repoName); - final Collection snapshotIds = repositoryData.getSnapshotIds(); - assertThat(snapshotIds, hasSize(1)); - for (SnapshotId snapshotId : snapshotIds) { - assertThat(repositoryData.getSnapshotState(snapshotId), is(SnapshotState.SUCCESS)); - } + assertAllSnapshotsSuccessful(getRepositoryData(repoName), 1); } public void testMasterFailoverDuringCloneStep2() throws Exception { @@ -406,12 +394,7 @@ public void testMasterFailoverDuringCloneStep2() throws Exception { expectThrows(SnapshotException.class, cloneFuture::actionGet); awaitNoMoreRunningOperations(internalCluster().getMasterName()); - final RepositoryData repositoryData = getRepositoryData(repoName); - final Collection snapshotIds = repositoryData.getSnapshotIds(); - assertThat(snapshotIds, hasSize(2)); - for (SnapshotId snapshotId : snapshotIds) { - assertThat(repositoryData.getSnapshotState(snapshotId), is(SnapshotState.SUCCESS)); - } + assertAllSnapshotsSuccessful(getRepositoryData(repoName), 2); } public void testExceptionDuringShardClone() throws Exception { @@ -436,16 +419,15 @@ public void testExceptionDuringShardClone() throws Exception { unblockNode(repoName, masterNode); expectThrows(SnapshotException.class, cloneFuture::actionGet); awaitNoMoreRunningOperations(internalCluster().getMasterName()); - - final RepositoryData repositoryData = getRepositoryData(repoName); - final Collection snapshotIds = repositoryData.getSnapshotIds(); - assertThat(snapshotIds, hasSize(1)); - for (SnapshotId snapshotId : snapshotIds) { - assertThat(repositoryData.getSnapshotState(snapshotId), is(SnapshotState.SUCCESS)); - } + assertAllSnapshotsSuccessful(getRepositoryData(repoName), 1); assertAcked(startDeleteSnapshot(repoName, sourceSnapshot).get()); } + private ActionFuture startClone(String repoName, String sourceSnapshot, String targetSnapshot1, + String... indices) { + return clusterAdmin().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot1).setIndices(indices).execute(); + } + private void blockMasterOnReadIndexMeta(String repoName) { ((MockRepository)internalCluster().getCurrentMasterNodeInstance(RepositoriesService.class).repository(repoName)) .setBlockOnReadIndexMeta(); @@ -456,6 +438,17 @@ private void blockMasterOnShardClone(String repoName) { .setBlockOnWriteShardLevelMeta(); } + /** + * Assert that given {@link RepositoryData} contains exactly the given number of snapshots and all of them are successful. + */ + private static void assertAllSnapshotsSuccessful(RepositoryData repositoryData, int successfulSnapshotCount) { + final Collection snapshotIds = repositoryData.getSnapshotIds(); + assertThat(snapshotIds, hasSize(successfulSnapshotCount)); + for (SnapshotId snapshotId : snapshotIds) { + assertThat(repositoryData.getSnapshotState(snapshotId), is(SnapshotState.SUCCESS)); + } + } + private static BlobStoreIndexShardSnapshots readShardGeneration(BlobStoreRepository repository, RepositoryShardId repositoryShardId, String generation) { return PlainActionFuture.get(f -> repository.threadPool().generic().execute(ActionRunnable.supply(f, From 0a15e5db9e01e5498e9f601071de1b362efddd26 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 29 Sep 2020 15:42:54 +0200 Subject: [PATCH 63/94] drier --- .../snapshots/CloneSnapshotIT.java | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index d1806e568d9aa..0069ae195ef65 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -23,6 +23,7 @@ import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotIndexStatus; import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus; import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.client.Client; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.repositories.RepositoryData; @@ -359,8 +360,8 @@ public void testMasterFailoverDuringCloneStep1() throws Exception { final String targetSnapshot1 = "target-snapshot"; blockMasterOnReadIndexMeta(repoName); - final ActionFuture cloneFuture = dataNodeClient().admin().cluster() - .prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot1).setIndices(testIndex).execute(); + final ActionFuture cloneFuture = + startCloneFromDataNode(repoName, sourceSnapshot, targetSnapshot1, testIndex); awaitNumberOfSnapshotsInProgress(1); final String masterNode = internalCluster().getMasterName(); waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L)); @@ -385,8 +386,7 @@ public void testMasterFailoverDuringCloneStep2() throws Exception { final String targetSnapshot = "target-snapshot"; blockMasterOnShardClone(repoName); - final ActionFuture cloneFuture = dataNodeClient().admin().cluster() - .prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(testIndex).execute(); + final ActionFuture cloneFuture = startCloneFromDataNode(repoName, sourceSnapshot, targetSnapshot, testIndex); awaitNumberOfSnapshotsInProgress(1); final String masterNode = internalCluster().getMasterName(); waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L)); @@ -411,8 +411,7 @@ public void testExceptionDuringShardClone() throws Exception { final String targetSnapshot = "target-snapshot"; blockMasterFromFinalizingSnapshotOnSnapFile(repoName); - final ActionFuture cloneFuture = dataNodeClient().admin().cluster() - .prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(testIndex).execute(); + final ActionFuture cloneFuture = startCloneFromDataNode(repoName, sourceSnapshot, targetSnapshot, testIndex); awaitNumberOfSnapshotsInProgress(1); final String masterNode = internalCluster().getMasterName(); waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L)); @@ -423,9 +422,19 @@ public void testExceptionDuringShardClone() throws Exception { assertAcked(startDeleteSnapshot(repoName, sourceSnapshot).get()); } - private ActionFuture startClone(String repoName, String sourceSnapshot, String targetSnapshot1, - String... indices) { - return clusterAdmin().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot1).setIndices(indices).execute(); + private ActionFuture startCloneFromDataNode(String repoName, String sourceSnapshot, String targetSnapshot, + String... indices) { + return startClone(dataNodeClient(), repoName, sourceSnapshot, targetSnapshot, indices); + } + + private ActionFuture startClone(String repoName, String sourceSnapshot, String targetSnapshot, + String... indices) { + return startClone(client(), repoName, sourceSnapshot, targetSnapshot, indices); + } + + private static ActionFuture startClone(Client client, String repoName, String sourceSnapshot, + String targetSnapshot, String... indices) { + return client.admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indices).execute(); } private void blockMasterOnReadIndexMeta(String repoName) { From c3ae02d65cf1d3cb811fb5590f469d7383571c28 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 29 Sep 2020 20:45:04 +0200 Subject: [PATCH 64/94] nicer --- .../snapshots/CloneSnapshotIT.java | 17 +++++++ .../snapshots/clone/CloneSnapshotRequest.java | 22 ++------- .../snapshots/SnapshotUtils.java | 47 +++++++++++++++++++ .../snapshots/SnapshotsService.java | 11 ++--- .../snapshots/SnapshotUtilsTests.java | 32 +++++++++++++ 5 files changed, 104 insertions(+), 25 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java index 0069ae195ef65..8113fbe307c50 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CloneSnapshotIT.java @@ -372,6 +372,23 @@ public void testMasterFailoverDuringCloneStep1() throws Exception { assertAllSnapshotsSuccessful(getRepositoryData(repoName), 1); } + public void testFailsOnCloneMissingIndices() { + internalCluster().startMasterOnlyNode(); + internalCluster().startDataOnlyNode(); + final String repoName = "repo-name"; + final Path repoPath = randomRepoPath(); + if (randomBoolean()) { + createIndexWithContent("test-idx"); + } + createRepository(repoName, "fs", repoPath); + + final String snapshotName = "snapshot"; + createFullSnapshot(repoName, snapshotName); + final SnapshotException sne = expectThrows(SnapshotException.class, + () -> startClone(repoName, snapshotName, "target-snapshot", "does-not-exist").actionGet()); + assertThat(sne.getMessage(), containsString("No index [does-not-exist] found in the source snapshot ")); + } + public void testMasterFailoverDuringCloneStep2() throws Exception { // large snapshot pool so blocked snapshot threads from cloning don't prevent concurrent snapshot finalizations internalCluster().startMasterOnlyNodes(3, LARGE_SNAPSHOT_POOL_SETTINGS); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java index de88dd81351de..062046223d263 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/clone/CloneSnapshotRequest.java @@ -20,15 +20,13 @@ package org.elasticsearch.action.admin.cluster.snapshots.clone; import org.elasticsearch.action.ActionRequestValidationException; -import org.elasticsearch.action.IndicesRequest; -import org.elasticsearch.action.support.IndicesOptions; import org.elasticsearch.action.support.master.MasterNodeRequest; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import java.io.IOException; -public class CloneSnapshotRequest extends MasterNodeRequest implements IndicesRequest.Replaceable { +public class CloneSnapshotRequest extends MasterNodeRequest { private final String repository; @@ -36,17 +34,16 @@ public class CloneSnapshotRequest extends MasterNodeRequest