diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java index 359a1651b5c57..bca970f86347a 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java @@ -9,6 +9,8 @@ package org.opensearch.indices.replication; import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; +import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreRequest; +import org.opensearch.action.support.PlainActionFuture; import org.opensearch.cluster.health.ClusterHealthStatus; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.settings.Settings; @@ -112,6 +114,51 @@ public void testRecoveryAfterDocsIndexed() throws Exception { assertDocCounts(10, replica); } + public void testStopPrimary_RestoreOnNewNode() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + final String primary = internalCluster().startDataOnlyNode(); + createIndex( + INDEX_NAME, + Settings.builder() + .put(indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS, 1) + .build() + ); + ensureYellowAndNoInitializingShards(INDEX_NAME); + final int docCount = 10; + for (int i = 0; i < docCount; i++) { + client().prepareIndex(INDEX_NAME).setId(Integer.toString(i)).setSource("field", "value" + i).execute().get(); + } + refresh(INDEX_NAME); + assertDocCounts(docCount, primary); + + final String replica = internalCluster().startDataOnlyNode(); + ensureGreen(INDEX_NAME); + assertDocCounts(docCount, replica); + // stop the primary + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primary)); + + assertBusy(() -> { + ClusterHealthResponse clusterHealthResponse = clusterAdmin().prepareHealth(INDEX_NAME).get(); + assertEquals(ClusterHealthStatus.RED, clusterHealthResponse.getStatus()); + }); + assertDocCounts(docCount, replica); + + String restoredPrimary = internalCluster().startDataOnlyNode(); + + client().admin().cluster().restoreRemoteStore(new RestoreRemoteStoreRequest().indices(INDEX_NAME), PlainActionFuture.newFuture()); + ensureGreen(INDEX_NAME); + assertDocCounts(docCount, replica, restoredPrimary); + + for (int i = docCount; i < docCount * 2; i++) { + client().prepareIndex(INDEX_NAME).setId(Integer.toString(i)).setSource("field", "value" + i).execute().get(); + } + refresh(INDEX_NAME); + assertBusy(() -> assertDocCounts(20, replica, restoredPrimary)); + } + public void testFailoverToNewPrimaryWithPollingReplication() throws Exception { internalCluster().startClusterManagerOnlyNode(); final String primary = internalCluster().startDataOnlyNode(); @@ -150,7 +197,6 @@ public void testFailoverToNewPrimaryWithPollingReplication() throws Exception { client().prepareIndex(INDEX_NAME).setId(Integer.toString(i)).setSource("field", "value" + i).execute().get(); } refresh(INDEX_NAME); - assertBusy(() -> { assertDocCounts(20, replica, writer_replica); }); - + assertBusy(() -> assertDocCounts(20, replica, writer_replica)); } } diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaRestoreIT.java index 717dd0d701dde..e8d65e07c7dd9 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaRestoreIT.java @@ -8,25 +8,19 @@ package org.opensearch.indices.replication; -import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; -import org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotRequestBuilder; import org.opensearch.action.search.SearchResponse; -import org.opensearch.cluster.health.ClusterHealthStatus; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.FeatureFlags; -import org.opensearch.core.rest.RestStatus; import org.opensearch.index.query.QueryBuilders; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.remotestore.RemoteSnapshotIT; import org.opensearch.snapshots.SnapshotRestoreException; -import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; import java.util.List; -import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; @@ -103,58 +97,6 @@ public void testSearchReplicaRestore_WhenSnapshotOnSegRepWithSearchReplica_Resto assertTrue(exception.getMessage().contains(getSnapshotExceptionMessage(ReplicationType.SEGMENT, ReplicationType.DOCUMENT))); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/15952") - public void testStopPrimary_RestoreOnNewNode() throws Exception { - internalCluster().startClusterManagerOnlyNode(); - final String primary = internalCluster().startDataOnlyNode(); - createIndex( - INDEX_NAME, - Settings.builder() - .put(indexSettings()) - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) - .put(SETTING_NUMBER_OF_REPLICAS, 0) - .put(IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS, 1) - .build() - ); - ensureYellowAndNoInitializingShards(INDEX_NAME); - final int docCount = 10; - for (int i = 0; i < docCount; i++) { - client().prepareIndex(INDEX_NAME).setId(Integer.toString(i)).setSource("field", "value" + i).execute().get(); - } - refresh(INDEX_NAME); - assertDocCounts(docCount, primary); - - final String replica = internalCluster().startDataOnlyNode(); - ensureGreen(INDEX_NAME); - assertDocCounts(docCount, replica); - createRepository(REPOSITORY_NAME, FS_REPOSITORY_TYPE, randomRepoPath().toAbsolutePath()); - createSnapshot(REPOSITORY_NAME, SNAPSHOT_NAME, List.of(INDEX_NAME)); - // stop the primary - internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primary)); - - assertBusy(() -> { - ClusterHealthResponse clusterHealthResponse = clusterAdmin().prepareHealth(INDEX_NAME).get(); - assertEquals(ClusterHealthStatus.RED, clusterHealthResponse.getStatus()); - }); - assertDocCounts(docCount, replica); - - String restoredPrimary = internalCluster().startDataOnlyNode(); - - RestoreSnapshotRequestBuilder builder = client().admin() - .cluster() - .prepareRestoreSnapshot(REPOSITORY_NAME, SNAPSHOT_NAME) - .setWaitForCompletion(true); - assertEquals(builder.get().status(), RestStatus.ACCEPTED); - ensureGreen(INDEX_NAME); - assertDocCounts(docCount, replica, restoredPrimary); - } - - protected void assertDocCounts(int expectedDocCount, String... nodeNames) { - for (String node : nodeNames) { - assertHitCount(client(node).prepareSearch(INDEX_NAME).setSize(0).setPreference("_only_local").get(), expectedDocCount); - } - } - private void bootstrapIndexWithOutSearchReplicas(ReplicationType replicationType) throws InterruptedException { startCluster(2); diff --git a/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java b/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java index 43b962f7ca697..d2c7c7ce71953 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java +++ b/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java @@ -149,7 +149,10 @@ boolean validate(Metadata metadata) { "Shard [" + indexShardRoutingTable.shardId().id() + "] routing table has wrong number of replicas, expected [" + + "Replicas: " + indexMetadata.getNumberOfReplicas() + + "Search Replicas: " + + indexMetadata.getNumberOfSearchOnlyReplicas() + "], got [" + routingNumberOfReplicas + "]" @@ -514,6 +517,16 @@ public Builder initializeAsRemoteStoreRestore( ShardRouting.newUnassigned(shardId, false, PeerRecoverySource.INSTANCE, unassignedInfo) ); } + // if writers are red we do not want to re-recover search only shards if already assigned. + for (ShardRouting shardRouting : indexShardRoutingTable.searchOnlyReplicas()) { + if (shardRouting.unassigned()) { + indexShardRoutingBuilder.addShard( + ShardRouting.newUnassigned(shardId, false, EmptyStoreRecoverySource.INSTANCE, unassignedInfo) + ); + } else { + indexShardRoutingBuilder.addShard(shardRouting); + } + } } else { // Primary is either active or initializing. Do not trigger restore. indexShardRoutingBuilder.addShard(indexShardRoutingTable.primaryShard());