-
Notifications
You must be signed in to change notification settings - Fork 1.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Cluster health call to throw decommissioned exception for local decommissioned node #6008
Changes from 9 commits
bd1df17
99ae759
6fc708d
fbd6d71
2f9ea8a
69495e0
4160593
50a52a6
2b95f53
5ff1c0f
670fd5e
7f715a5
2b04e51
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -111,6 +111,10 @@ | |
"awareness_attribute":{ | ||
"type":"string", | ||
"description":"The awareness attribute for which the health is required" | ||
}, | ||
"ensure_local_node_commissioned":{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wondering we could use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There could only be two kinds of transport request. One which retrieves information from local cluster state of the node or another which gets it from leader's state. There's no mechanism which says run this transport request on a specific node id. Hence, I feel this would ALWAYS run with local param only There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updating it to ensure_node_commissioned |
||
"type":"boolean", | ||
"description": "Checks whether local node is commissioned or not. If set to true on a local call it will throw exception if node is decommissioned (default: false)" | ||
} | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,6 +46,8 @@ | |
import org.opensearch.cluster.LocalClusterUpdateTask; | ||
import org.opensearch.cluster.NotClusterManagerException; | ||
import org.opensearch.cluster.block.ClusterBlockException; | ||
import org.opensearch.cluster.coordination.Coordinator; | ||
import org.opensearch.cluster.decommission.NodeDecommissionedException; | ||
import org.opensearch.cluster.health.ClusterHealthStatus; | ||
import org.opensearch.cluster.metadata.IndexNameExpressionResolver; | ||
import org.opensearch.cluster.metadata.ProcessClusterEventTimeoutException; | ||
|
@@ -57,6 +59,7 @@ | |
import org.opensearch.common.io.stream.StreamInput; | ||
import org.opensearch.common.unit.TimeValue; | ||
import org.opensearch.common.util.CollectionUtils; | ||
import org.opensearch.discovery.Discovery; | ||
import org.opensearch.index.IndexNotFoundException; | ||
import org.opensearch.node.NodeClosedException; | ||
import org.opensearch.tasks.Task; | ||
|
@@ -77,6 +80,7 @@ public class TransportClusterHealthAction extends TransportClusterManagerNodeRea | |
private static final Logger logger = LogManager.getLogger(TransportClusterHealthAction.class); | ||
|
||
private final AllocationService allocationService; | ||
private final Discovery discovery; | ||
|
||
@Inject | ||
public TransportClusterHealthAction( | ||
|
@@ -85,7 +89,8 @@ public TransportClusterHealthAction( | |
ThreadPool threadPool, | ||
ActionFilters actionFilters, | ||
IndexNameExpressionResolver indexNameExpressionResolver, | ||
AllocationService allocationService | ||
AllocationService allocationService, | ||
Discovery discovery | ||
) { | ||
super( | ||
ClusterHealthAction.NAME, | ||
|
@@ -98,6 +103,7 @@ public TransportClusterHealthAction( | |
indexNameExpressionResolver | ||
); | ||
this.allocationService = allocationService; | ||
this.discovery = discovery; | ||
} | ||
|
||
@Override | ||
|
@@ -134,7 +140,11 @@ protected void clusterManagerOperation( | |
final ClusterState unusedState, | ||
final ActionListener<ClusterHealthResponse> listener | ||
) { | ||
|
||
if (request.ensureLocalNodeCommissioned() | ||
&& discovery instanceof Coordinator | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Asserts wouldn't run on prod. And only coordinator has this node's commission status info. If a developer uses a different Discovery mechanism it might break this. Hence putting this check directly There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Exactly so tests should fail for a developer, in prod this is expected to be Coordinator There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be fair to assume that
|
||
&& ((Coordinator) discovery).localNodeCommissioned() == false) { | ||
listener.onFailure(new NodeDecommissionedException("local node is decommissioned")); | ||
} | ||
final int waitCount = getWaitCount(request); | ||
|
||
if (request.waitForEvents() != null) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,7 @@ | |
|
||
package org.opensearch.action.support.clustermanager; | ||
|
||
import org.opensearch.Version; | ||
import org.opensearch.common.io.stream.StreamInput; | ||
import org.opensearch.common.io.stream.StreamOutput; | ||
|
||
|
@@ -46,18 +47,25 @@ public abstract class ClusterManagerNodeReadRequest<Request extends ClusterManag | |
ClusterManagerNodeRequest<Request> { | ||
|
||
protected boolean local = false; | ||
protected boolean ensureLocalNodeCommissioned = false; | ||
|
||
protected ClusterManagerNodeReadRequest() {} | ||
|
||
protected ClusterManagerNodeReadRequest(StreamInput in) throws IOException { | ||
super(in); | ||
local = in.readBoolean(); | ||
if (in.getVersion().onOrAfter(Version.CURRENT)) { | ||
ensureLocalNodeCommissioned = in.readBoolean(); | ||
} | ||
} | ||
|
||
@Override | ||
public void writeTo(StreamOutput out) throws IOException { | ||
super.writeTo(out); | ||
out.writeBoolean(local); | ||
if (out.getVersion().onOrAfter(Version.CURRENT)) { | ||
out.writeBoolean(ensureLocalNodeCommissioned); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Build fails in mixed cluster test if version check is PUT for 2.6 and not current. Please suggest correct way of doing this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We will put 2.6 and then backport |
||
} | ||
|
||
@SuppressWarnings("unchecked") | ||
|
@@ -66,6 +74,12 @@ public final Request local(boolean local) { | |
return (Request) this; | ||
} | ||
|
||
@SuppressWarnings("unchecked") | ||
public final Request ensureLocalNodeCommissioned(boolean ensureLocalNodeCommissioned) { | ||
this.ensureLocalNodeCommissioned = ensureLocalNodeCommissioned; | ||
return (Request) this; | ||
} | ||
|
||
/** | ||
* Return local information, do not retrieve the state from cluster-manager node (default: false). | ||
* @return <code>true</code> if local information is to be returned; | ||
|
@@ -74,4 +88,13 @@ public final Request local(boolean local) { | |
public final boolean local() { | ||
return local; | ||
} | ||
|
||
/** | ||
* For a given local request, checks if the local node is commissioned or not (default: false). | ||
* @return <code>true</code> if local information is to be returned only when local node is also commissioned | ||
* <code>false</code> to not check local node if commissioned or not for a local request | ||
*/ | ||
public final boolean ensureLocalNodeCommissioned() { | ||
return ensureLocalNodeCommissioned; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ | |
|
||
import org.opensearch.OpenSearchException; | ||
import org.opensearch.common.io.stream.StreamInput; | ||
import org.opensearch.rest.RestStatus; | ||
|
||
import java.io.IOException; | ||
|
||
|
@@ -28,4 +29,9 @@ public NodeDecommissionedException(String msg, Object... args) { | |
public NodeDecommissionedException(StreamInput in) throws IOException { | ||
super(in); | ||
} | ||
|
||
@Override | ||
public RestStatus status() { | ||
return RestStatus.UNPROCESSABLE_ENTITY; | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 424 HTTP Error code seems more appropriate There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, updated |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Put this under section unreleased 2.x
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ack