Skip to content

Commit

Permalink
[ML] Fix handling of ml.config_version node attribute
Browse files Browse the repository at this point in the history
  • Loading branch information
przemekwitek committed Feb 2, 2024
1 parent f642b8a commit d05af36
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ public static Tuple<MlConfigVersion, MlConfigVersion> getMinMaxMlConfigVersion(D
if (mlConfigVersion.after(maxMlConfigVersion)) {
maxMlConfigVersion = mlConfigVersion;
}
} catch (IllegalArgumentException e) {
} catch (IllegalStateException e) {
// This means we encountered a node that is after 8.10.0 but has the ML plugin disabled - ignore it
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.node.VersionInformation;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.core.ml.utils.MlConfigVersionUtils;
import org.hamcrest.Matchers;
Expand Down Expand Up @@ -149,6 +150,51 @@ public void testGetMinMaxMlConfigVersion() {
assertEquals(MlConfigVersion.V_10, MlConfigVersion.getMaxMlConfigVersion(nodes));
}

public void testGetMinMaxMlConfigVersionWhenMlConfigVersionAttrIsMissing() {
Map<String, String> nodeAttr1 = Map.of(MlConfigVersion.ML_CONFIG_VERSION_NODE_ATTR, MlConfigVersion.V_7_1_0.toString());
Map<String, String> nodeAttr2 = Map.of(MlConfigVersion.ML_CONFIG_VERSION_NODE_ATTR, MlConfigVersion.V_8_2_0.toString());
Map<String, String> nodeAttr3 = Map.of();
DiscoveryNodes nodes = DiscoveryNodes.builder()
.add(
DiscoveryNodeUtils.builder("_node_id1")
.name("_node_name1")
.address(new TransportAddress(InetAddress.getLoopbackAddress(), 9300))
.attributes(nodeAttr1)
.roles(ROLES_WITH_ML)
.version(VersionInformation.inferVersions(Version.fromString("7.2.0")))
.build()
)
.add(
DiscoveryNodeUtils.builder("_node_id2")
.name("_node_name2")
.address(new TransportAddress(InetAddress.getLoopbackAddress(), 9301))
.attributes(nodeAttr2)
.roles(ROLES_WITH_ML)
.version(VersionInformation.inferVersions(Version.fromString("7.1.0")))
.build()
)
.add(
DiscoveryNodeUtils.builder("_node_id3")
.name("_node_name3")
.address(new TransportAddress(InetAddress.getLoopbackAddress(), 9302))
.attributes(nodeAttr3)
.roles(ROLES_WITH_ML)
.version(
new VersionInformation(
Version.V_8_11_0,
IndexVersion.getMinimumCompatibleIndexVersion(Version.V_8_11_0.id),
IndexVersion.fromId(Version.V_8_11_0.id)
)
)
.build()
)
.build();

assertEquals(MlConfigVersion.V_7_1_0, MlConfigVersion.getMinMlConfigVersion(nodes));
// _node_name3 is ignored
assertEquals(MlConfigVersion.V_8_2_0, MlConfigVersion.getMaxMlConfigVersion(nodes));
}

public void testGetMlConfigVersionForNode() {
DiscoveryNode node = DiscoveryNodeUtils.builder("_node_id4")
.name("_node_name4")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,19 @@
import org.elasticsearch.client.Response;
import org.elasticsearch.client.ResponseException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.test.rest.ESRestTestCase;
import org.elasticsearch.xcontent.XContentBuilder;

import java.io.IOException;
import java.util.Map;

import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.Matchers.aMapWithSize;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.notNullValue;

public class MlPluginDisabledIT extends ESRestTestCase {

Expand Down Expand Up @@ -71,7 +76,19 @@ public void testActionsFail() throws Exception {

public void testMlFeatureReset() throws IOException {
Request request = new Request("POST", "/_features/_reset");
Response response = client().performRequest(request);
assertThat(response.getStatusLine().getStatusCode(), equalTo(200));
assertOK(client().performRequest(request));
}

@SuppressWarnings("unchecked")
public void testAllNodesHaveMlConfigVersionAttribute() throws IOException {
Request request = new Request("GET", "/_nodes");
Response response = assertOK(client().performRequest(request));
var nodesMap = (Map<String, Object>) entityAsMap(response).get("nodes");
assertThat(nodesMap, is(aMapWithSize(greaterThanOrEqualTo(1))));
for (var nodeObj : nodesMap.values()) {
var nodeMap = (Map<String, Object>) nodeObj;
// We do not expect any specific version. The only important assertion is that the attribute exists.
assertThat(XContentMapValues.extractValue(nodeMap, "attributes", "ml.config_version"), is(notNullValue()));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -829,13 +829,19 @@ public Settings additionalSettings() {
String allocatedProcessorsAttrName = "node.attr." + ALLOCATED_PROCESSORS_NODE_ATTR;
String mlConfigVersionAttrName = "node.attr." + ML_CONFIG_VERSION_NODE_ATTR;

if (enabled == false) {
disallowMlNodeAttributes(maxOpenJobsPerNodeNodeAttrName, machineMemoryAttrName, jvmSizeAttrName, mlConfigVersionAttrName);
return Settings.EMPTY;
}

Settings.Builder additionalSettings = Settings.builder();
if (DiscoveryNode.hasRole(settings, DiscoveryNodeRole.ML_ROLE)) {

// The ML config version is needed for two related reasons even if ML is currently disabled on the node:
// 1. If ML is in use then decisions about minimum node versions need to include this node, and not
// having it available can cause exceptions during cluster state processing
// 2. It could be argued that reason 1 could be fixed by completely ignoring the node, however,
// then there would be a risk that ML is later enabled on an old node that was ignored, and
// some new ML feature that's been used is then incompatible with it
// The only safe approach is to consider which ML code _all_ nodes in the cluster are running, regardless
// of whether they currently have ML enabled.
addMlNodeAttribute(additionalSettings, mlConfigVersionAttrName, MlConfigVersion.CURRENT.toString());

if (enabled && DiscoveryNode.hasRole(settings, DiscoveryNodeRole.ML_ROLE)) {
addMlNodeAttribute(
additionalSettings,
machineMemoryAttrName,
Expand All @@ -859,7 +865,6 @@ public Settings additionalSettings() {
allocatedProcessorsAttrName
);
}
addMlNodeAttribute(additionalSettings, mlConfigVersionAttrName, MlConfigVersion.CURRENT.toString());
return additionalSettings.build();
}

Expand Down

0 comments on commit d05af36

Please sign in to comment.