From 0df37d72170bb7e5959094e4ddbd3badc511380d Mon Sep 17 00:00:00 2001 From: Dianna Hohensee Date: Fri, 22 Mar 2024 15:56:27 -0400 Subject: [PATCH] Dianna's updates: always provide a 'reason' msg for node ineligibility; make some methods public for out-of-package testing --- .../cluster/coordination/Coordinator.java | 47 ++++++++----------- .../coordination/ElectionStrategy.java | 26 +++++----- .../AbstractCoordinatorTestCase.java | 16 +++---- 3 files changed, 43 insertions(+), 46 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java index 578cc1be64d5f..9effadadc598c 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java @@ -147,7 +147,7 @@ public class Coordinator extends AbstractLifecycleComponent implements ClusterSt private final MasterServiceTaskQueue nodeLeftQueue; private final Supplier persistedStateSupplier; private final NoMasterBlockService noMasterBlockService; - final Object mutex = new Object(); // package-private to allow tests to call methods that assert that the mutex is held + public final Object mutex = new Object(); // public to allow tests to call methods that assert that the mutex is held private final SetOnce coordinationState = new SetOnce<>(); // initialized on start-up (see doStart) private volatile ClusterState applierState; // the state that should be exposed to the cluster state applier @@ -540,15 +540,11 @@ private void startElection() { if (mode == Mode.CANDIDATE) { final var nodeEligibility = localNodeMayWinElection(getLastAcceptedState(), electionStrategy); if (nodeEligibility.mayWin() == false) { - if (nodeEligibility.reason() == null) { - logger.trace("skip election as local node may not win it: {}", getLastAcceptedState().coordinationMetadata()); - } else { - logger.info( - "skip election as local node may not win it ({}): {}", - nodeEligibility.reason(), - getLastAcceptedState().coordinationMetadata() - ); - } + logger.info( + "skip election as local node may not win it ({}): {}", + nodeEligibility.reason(), + getLastAcceptedState().coordinationMetadata() + ); return; } @@ -838,7 +834,7 @@ public String toString() { } } - void becomeCandidate(String method) { + public void becomeCandidate(String method) { assert Thread.holdsLock(mutex) : "Coordinator mutex not held"; logger.debug( "{}: coordinator becoming CANDIDATE in term {} (was {}, lastKnownLeader was [{}])", @@ -1018,8 +1014,8 @@ long getCurrentTerm() { } } - // package-visible for testing - Mode getMode() { + // visible for testing + public Mode getMode() { synchronized (mutex) { return mode; } @@ -1273,8 +1269,12 @@ public boolean setInitialConfiguration(final VotingConfiguration votingConfigura metadataBuilder.coordinationMetadata(coordinationMetadata); coordinationState.get().setInitialState(ClusterState.builder(currentState).metadata(metadataBuilder).build()); - assert localNodeMayWinElection(getLastAcceptedState(), electionStrategy).mayWin() - : "initial state does not allow local node to win election: " + getLastAcceptedState().coordinationMetadata(); + var nodeEligibility = localNodeMayWinElection(getLastAcceptedState(), electionStrategy); + assert nodeEligibility.mayWin() + : "initial state does not allow local node to win election, reason: " + + nodeEligibility.reason() + + " , metadata: " + + getLastAcceptedState().coordinationMetadata(); preVoteCollector.update(getPreVoteResponse(), null); // pick up the change to last-accepted version startElectionScheduler(); return true; @@ -1759,18 +1759,11 @@ public void run() { final ClusterState lastAcceptedState = coordinationState.get().getLastAcceptedState(); final var nodeEligibility = localNodeMayWinElection(lastAcceptedState, electionStrategy); if (nodeEligibility.mayWin() == false) { - if (nodeEligibility.reason() == null) { - logger.trace( - "skip prevoting as local node may not win election: {}", - lastAcceptedState.coordinationMetadata() - ); - } else { - logger.info( - "skip prevoting as local node may not win election ({}): {}", - nodeEligibility.reason(), - lastAcceptedState.coordinationMetadata() - ); - } + logger.info( + "skip prevoting as local node may not win election ({}): {}", + nodeEligibility.reason(), + lastAcceptedState.coordinationMetadata() + ); return; } diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/ElectionStrategy.java b/server/src/main/java/org/elasticsearch/cluster/coordination/ElectionStrategy.java index 3faa0653f28b6..639a1702c486e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/ElectionStrategy.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/ElectionStrategy.java @@ -18,7 +18,6 @@ * Custom additional quorum restrictions can be defined by implementing the {@link #satisfiesAdditionalQuorumConstraints} method. */ public abstract class ElectionStrategy { - public static final ElectionStrategy DEFAULT_INSTANCE = new ElectionStrategy() { @Override protected boolean satisfiesAdditionalQuorumConstraints( @@ -34,6 +33,11 @@ protected boolean satisfiesAdditionalQuorumConstraints( } }; + /** + * Contains a result for whether a node may win an election and the reason if not. + */ + public record NodeEligibility(boolean mayWin, String reason) {} + /** * Whether there is an election quorum from the point of view of the given local node under the provided voting configurations */ @@ -105,17 +109,17 @@ public void beforeCommit(long term, long version, ActionListener listener) listener.onResponse(null); } - // Whether a node may win elections - public record NodeEligibility(boolean mayWin, String reason) {} - - public static final NodeEligibility NODE_MAY_WIN_ELECTION = new NodeEligibility(true, null); - public static final NodeEligibility NODE_MAY_NOT_WIN_ELECTION = new NodeEligibility(false, null); - public NodeEligibility nodeMayWinElection(ClusterState lastAcceptedState, DiscoveryNode node) { final String nodeId = node.getId(); - final boolean nodeMayWin = lastAcceptedState.getLastCommittedConfiguration().getNodeIds().contains(nodeId) - || lastAcceptedState.getLastAcceptedConfiguration().getNodeIds().contains(nodeId) - || lastAcceptedState.getVotingConfigExclusions().stream().noneMatch(vce -> vce.getNodeId().equals(nodeId)); - return nodeMayWin ? NODE_MAY_WIN_ELECTION : NODE_MAY_NOT_WIN_ELECTION; + if (lastAcceptedState.getLastCommittedConfiguration().getNodeIds().contains(nodeId) == false) { + return new NodeEligibility(false, "node cannot win election, it is not part of the last committed cluster state"); + } + if (lastAcceptedState.getLastAcceptedConfiguration().getNodeIds().contains(nodeId) == false) { + return new NodeEligibility(false, "node cannot win election, it is not part of the last accepted cluster state"); + } + if (lastAcceptedState.getVotingConfigExclusions().stream().noneMatch(vce -> vce.getNodeId().equals(nodeId)) == false) { + return new NodeEligibility(false, "node cannot win election, it is excluded from voting"); + } + return new NodeEligibility(true, ""); } } diff --git a/test/framework/src/main/java/org/elasticsearch/cluster/coordination/AbstractCoordinatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/cluster/coordination/AbstractCoordinatorTestCase.java index 7f39120e83c07..82f49160041f4 100644 --- a/test/framework/src/main/java/org/elasticsearch/cluster/coordination/AbstractCoordinatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/cluster/coordination/AbstractCoordinatorTestCase.java @@ -284,7 +284,7 @@ public class Cluster implements Releasable { @Nullable // null means construct a list from all the current nodes private List seedHostsList; - Cluster(int initialNodeCount) { + public Cluster(int initialNodeCount) { this(initialNodeCount, true, Settings.EMPTY); } @@ -360,7 +360,7 @@ List addNodes(int newNodesCount) { return addedNodes; } - int size() { + public int size() { return clusterNodes.size(); } @@ -753,7 +753,7 @@ private void stabilise(long stabilisationDurationMillis, boolean expectIdleJoinV } } - void bootstrapIfNecessary() { + public void bootstrapIfNecessary() { if (clusterNodes.stream().allMatch(ClusterNode::isNotUsefullyBootstrapped)) { assertThat("setting initial configuration may fail with disconnected nodes", disconnectedNodes, empty()); assertThat("setting initial configuration may fail with blackholed nodes", blackholedNodes, empty()); @@ -766,7 +766,7 @@ void bootstrapIfNecessary() { } } - void runFor(long runDurationMillis, String description) { + public void runFor(long runDurationMillis, String description) { final long endTime = deterministicTaskQueue.getCurrentTimeMillis() + runDurationMillis; logger.info("--> runFor({}ms) running until [{}ms]: {}", runDurationMillis, endTime, description); @@ -849,7 +849,7 @@ ClusterNode getAnyNode() { return getAnyNodeExcept(); } - ClusterNode getAnyNodeExcept(ClusterNode... clusterNodesToExclude) { + public ClusterNode getAnyNodeExcept(ClusterNode... clusterNodesToExclude) { List filteredNodes = getAllNodesExcept(clusterNodesToExclude); assert filteredNodes.isEmpty() == false; return randomFrom(filteredNodes); @@ -946,7 +946,7 @@ public final class ClusterNode { private static final Logger logger = LogManager.getLogger(ClusterNode.class); private final int nodeIndex; - Coordinator coordinator; + public Coordinator coordinator; private final DiscoveryNode localNode; final CoordinationState.PersistedState persistedState; final Settings nodeSettings; @@ -1378,7 +1378,7 @@ public void onFailure(Exception e) { }); } - AckCollector submitUpdateTask( + public AckCollector submitUpdateTask( String source, UnaryOperator clusterStateUpdate, CoordinatorTestClusterStateUpdateTask taskListener @@ -1450,7 +1450,7 @@ void onDisconnectEventFrom(ClusterNode clusterNode) { transportService.disconnectFromNode(clusterNode.localNode); } - ClusterState getLastAppliedClusterState() { + public ClusterState getLastAppliedClusterState() { return clusterApplierService.state(); }