From 3df57ce02d4d47220b785cbb819b1b38139d44ba Mon Sep 17 00:00:00 2001 From: Pranshu Shukla Date: Fri, 6 Dec 2024 21:44:56 +0530 Subject: [PATCH] address comments Signed-off-by: Pranshu Shukla --- .../discovery/DiscoveryDisruptionIT.java | 59 +++++++++++++------ .../remotestore/RemoteStoreNodeService.java | 9 ++- .../opensearch/test/InternalTestCluster.java | 8 +-- 3 files changed, 48 insertions(+), 28 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/discovery/DiscoveryDisruptionIT.java b/server/src/internalClusterTest/java/org/opensearch/discovery/DiscoveryDisruptionIT.java index 21a395ad4c980..ed602a2cf4358 100644 --- a/server/src/internalClusterTest/java/org/opensearch/discovery/DiscoveryDisruptionIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/discovery/DiscoveryDisruptionIT.java @@ -35,6 +35,7 @@ import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.coordination.FailedToCommitClusterStateException; import org.opensearch.cluster.coordination.JoinHelper; +import org.opensearch.cluster.coordination.PersistedStateRegistry; import org.opensearch.cluster.coordination.PublicationTransportHandler; import org.opensearch.cluster.metadata.RepositoriesMetadata; import org.opensearch.cluster.metadata.RepositoryMetadata; @@ -311,6 +312,30 @@ public void testElectClusterManagerRemotePublicationConfigurationNodeJoinCommitF // a new cluster-state event with the new node-join along with new repositories setup in the cluster meta-data. internalCluster().startDataOnlyNodes(1, remotePublicationSettings, Boolean.TRUE); + // Checking if publish succeeded in the nodes before shutting down the blocked cluster-manager + assertBusy(() -> { + String randomNode = nonClusterManagerNodes.get(Randomness.get().nextInt(nonClusterManagerNodes.size())); + PersistedStateRegistry registry = internalCluster().getInstance(PersistedStateRegistry.class, randomNode); + + ClusterState state = registry.getPersistedState(PersistedStateRegistry.PersistedStateType.LOCAL).getLastAcceptedState(); + RepositoriesMetadata repositoriesMetadata = state.metadata().custom(RepositoriesMetadata.TYPE); + Boolean isRemoteStateRepoConfigured = Boolean.FALSE; + Boolean isRemoteRoutingTableRepoConfigured = Boolean.FALSE; + + assertNotNull(repositoriesMetadata); + assertNotNull(repositoriesMetadata.repositories()); + + for (RepositoryMetadata repo : repositoriesMetadata.repositories()) { + if (repo.name().equals(remoteStateRepoName)) { + isRemoteStateRepoConfigured = Boolean.TRUE; + } else if (repo.name().equals(remoteRoutingTableRepoName)) { + isRemoteRoutingTableRepoConfigured = Boolean.TRUE; + } + } + assertTrue(isRemoteStateRepoConfigured); + assertTrue(isRemoteRoutingTableRepoConfigured); + }); + logger.info("Stopping current Cluster Manager"); // We stop the current cluster-manager whose outbound paths were blocked. This is to force a new election onto nodes // we had the new cluster-state published but not commited. @@ -323,6 +348,7 @@ public void testElectClusterManagerRemotePublicationConfigurationNodeJoinCommitF String randomNode = nonClusterManagerNodes.get(Randomness.get().nextInt(nonClusterManagerNodes.size())); + // Checking if the final cluster-state is updated. RepositoriesMetadata repositoriesMetadata = internalCluster().getInstance(ClusterService.class, randomNode) .state() .metadata() @@ -342,33 +368,28 @@ public void testElectClusterManagerRemotePublicationConfigurationNodeJoinCommitF Assert.assertTrue("RemoteState Repo is not set in RepositoriesMetadata", isRemoteStateRepoConfigured); Assert.assertTrue("RemoteRoutingTable Repo is not set in RepositoriesMetadata", isRemoteRoutingTableRepoConfigured); - isRemoteStateRepoConfigured = Boolean.FALSE; - isRemoteRoutingTableRepoConfigured = Boolean.FALSE; - RepositoriesService repositoriesService = internalCluster().getInstance(RepositoriesService.class, randomNode); - try { - Repository remoteStateRepo = repositoriesService.repository(remoteStateRepoName); - if (Objects.nonNull(remoteStateRepo)) { - isRemoteStateRepoConfigured = Boolean.TRUE; - } - } catch (RepositoryMissingException e) { - isRemoteStateRepoConfigured = Boolean.FALSE; - } + isRemoteStateRepoConfigured = isRepoPresentInRepositoryService(repositoriesService, remoteStateRepoName); + isRemoteRoutingTableRepoConfigured = isRepoPresentInRepositoryService(repositoriesService, remoteRoutingTableRepoName); + + Assert.assertTrue("RemoteState Repo is not set in RepositoryService", isRemoteStateRepoConfigured); + Assert.assertTrue("RemoteRoutingTable Repo is not set in RepositoryService", isRemoteRoutingTableRepoConfigured); + + logger.info("Stopping current Cluster Manager"); + } + private Boolean isRepoPresentInRepositoryService(RepositoriesService repositoriesService, String repoName) { try { - Repository routingTableRepo = repositoriesService.repository(remoteRoutingTableRepoName); - if (Objects.nonNull(routingTableRepo)) { - isRemoteRoutingTableRepoConfigured = Boolean.TRUE; + Repository remoteStateRepo = repositoriesService.repository(repoName); + if (Objects.nonNull(remoteStateRepo)) { + return Boolean.TRUE; } } catch (RepositoryMissingException e) { - isRemoteRoutingTableRepoConfigured = Boolean.FALSE; + return Boolean.FALSE; } - Assert.assertTrue("RemoteState Repo is not set in RepositoryService", isRemoteStateRepoConfigured); - Assert.assertTrue("RemoteRoutingTable Repo is not set in RepositoryService", isRemoteRoutingTableRepoConfigured); - - logger.info("Stopping current Cluster Manager"); + return Boolean.FALSE; } } diff --git a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java index da94c8a7836b3..fb97cf40d90d6 100644 --- a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java +++ b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java @@ -186,11 +186,9 @@ public RepositoriesMetadata updateRepositoriesMetadata(DiscoveryNode joiningNode if (newRepositoryMetadata.name().equals(existingRepositoryMetadata.name())) { try { // This is to handle cases where-in the during a previous node-join attempt if the publish operation succeeded - // but - // the commit operation failed, the cluster-state may have the repository metadata which is not applied into the - // repository service. This may lead to assertion failures down the line. - String repositoryName = newRepositoryMetadata.name(); - repositoriesService.get().repository(repositoryName); + // but the commit operation failed, the cluster-state may have the repository metadata which is not applied + // into the repository service. This may lead to assertion failures down the line. + repositoriesService.get().repository(newRepositoryMetadata.name()); } catch (RepositoryMissingException e) { logger.warn( "Skipping repositories metadata checks: Remote repository [{}] is in the cluster state but not present " @@ -199,6 +197,7 @@ public RepositoriesMetadata updateRepositoriesMetadata(DiscoveryNode joiningNode ); break; } + try { // This will help in handling two scenarios - // 1. When a fresh cluster is formed and a node tries to join the cluster, the repository diff --git a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java index 14f0a30ef3f84..7b2c653e9bdb2 100644 --- a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java @@ -2325,8 +2325,8 @@ public List startNodes(int numOfNodes, Settings settings) { /** * Starts multiple nodes with the given settings and returns their names */ - public List startNodes(int numOfNodes, Settings settings, Boolean ignoreNodeJoin) { - return startNodes(ignoreNodeJoin, Collections.nCopies(numOfNodes, settings).toArray(new Settings[0])); + public List startNodes(int numOfNodes, Settings settings, Boolean waitForNodeJoin) { + return startNodes(waitForNodeJoin, Collections.nCopies(numOfNodes, settings).toArray(new Settings[0])); } /** @@ -2339,7 +2339,7 @@ public synchronized List startNodes(Settings... extraSettings) { /** * Starts multiple nodes with the given settings and returns their names */ - public synchronized List startNodes(Boolean ignoreNodeJoin, Settings... extraSettings) { + public synchronized List startNodes(Boolean waitForNodeJoin, Settings... extraSettings) { final int newClusterManagerCount = Math.toIntExact(Stream.of(extraSettings).filter(DiscoveryNode::isClusterManagerNode).count()); final int defaultMinClusterManagerNodes; if (autoManageClusterManagerNodes) { @@ -2391,7 +2391,7 @@ public synchronized List startNodes(Boolean ignoreNodeJoin, Settings... nodes.add(nodeAndClient); } startAndPublishNodesAndClients(nodes); - if (autoManageClusterManagerNodes && !ignoreNodeJoin) { + if (autoManageClusterManagerNodes && !waitForNodeJoin) { validateClusterFormed(); } return nodes.stream().map(NodeAndClient::getName).collect(Collectors.toList());