diff --git a/docs/reference/ml/anomaly-detection/apis/get-ml-info.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-ml-info.asciidoc index b62701373c17a..eac0a3c83d64e 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-ml-info.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-ml-info.asciidoc @@ -30,7 +30,8 @@ privileges. See <>, <> and This endpoint is designed to be used by a user interface that needs to fully understand machine learning configurations where some options are not specified, meaning that the defaults should be used. This endpoint may be used to find out -what those defaults are. +what those defaults are. It also provides information about the maximum size +of {ml} jobs that could run in the current cluster configuration. [[get-ml-info-example]] == {api-examples-title} @@ -115,7 +116,8 @@ This is a possible response: "build_hash": "99a07c016d5a73" }, "limits" : { - "effective_max_model_memory_limit": "28961mb" + "effective_max_model_memory_limit": "28961mb", + "total_ml_memory": "86883mb" } } ---- @@ -123,3 +125,4 @@ This is a possible response: // TESTRESPONSE[s/"version": "7.0.0",/"version": "$body.native_code.version",/] // TESTRESPONSE[s/"build_hash": "99a07c016d5a73"/"build_hash": "$body.native_code.build_hash"/] // TESTRESPONSE[s/"effective_max_model_memory_limit": "28961mb"/"effective_max_model_memory_limit": "$body.limits.effective_max_model_memory_limit"/] +// TESTRESPONSE[s/"total_ml_memory": "86883mb"/"total_ml_memory": "$body.limits.total_ml_memory"/] diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportMlInfoAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportMlInfoAction.java index 065a2dc780908..d3ad54dbac86c 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportMlInfoAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportMlInfoAction.java @@ -15,6 +15,7 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.ClusterSettings; +import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.env.Environment; @@ -128,6 +129,23 @@ private Map datafeedsDefaults() { return anomalyDetectorsDefaults; } + static ByteSizeValue calculateTotalMlMemory(ClusterSettings clusterSettings, DiscoveryNodes nodes) { + + long totalMlMemory = 0; + + for (DiscoveryNode node : nodes) { + OptionalLong limit = NativeMemoryCalculator.allowedBytesForMl(node, clusterSettings); + if (limit.isEmpty()) { + continue; + } + totalMlMemory += limit.getAsLong(); + } + + // Round down to a whole number of megabytes, since we generally deal with model + // memory limits in whole megabytes + return ByteSizeValue.ofMb(ByteSizeUnit.BYTES.toMB(totalMlMemory)); + } + static ByteSizeValue calculateEffectiveMaxModelMemoryLimit(ClusterSettings clusterSettings, DiscoveryNodes nodes) { long maxMlMemory = -1; @@ -148,7 +166,7 @@ static ByteSizeValue calculateEffectiveMaxModelMemoryLimit(ClusterSettings clust maxMlMemory -= Math.max(Job.PROCESS_MEMORY_OVERHEAD.getBytes(), DataFrameAnalyticsConfig.PROCESS_MEMORY_OVERHEAD.getBytes()); maxMlMemory -= MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(); - return ByteSizeValue.ofMb(Math.max(0L, maxMlMemory) / 1024 / 1024); + return ByteSizeValue.ofMb(ByteSizeUnit.BYTES.toMB(Math.max(0L, maxMlMemory))); } private Map limits() { @@ -166,6 +184,8 @@ private Map limits() { if (effectiveMaxModelMemoryLimit != null) { limits.put("effective_max_model_memory_limit", effectiveMaxModelMemoryLimit.getStringRep()); } + limits.put("total_ml_memory", + calculateTotalMlMemory(clusterService.getClusterSettings(), clusterService.state().getNodes()).getStringRep()); return limits; } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportMlInfoActionTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportMlInfoActionTests.java index b55218d7b5f1f..b9a8f7de81220 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportMlInfoActionTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportMlInfoActionTests.java @@ -24,6 +24,7 @@ import static org.elasticsearch.xpack.ml.MachineLearning.MAX_MACHINE_MEMORY_PERCENT; import static org.elasticsearch.xpack.ml.MachineLearning.USE_AUTO_MACHINE_MEMORY_PERCENT; +import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.nullValue; @@ -36,7 +37,8 @@ public void testCalculateEffectiveMaxModelMemoryLimit() { ClusterSettings clusterSettings = new ClusterSettings( Settings.builder().put(MAX_MACHINE_MEMORY_PERCENT.getKey(), mlMemoryPercent).build(), Sets.newHashSet(MAX_MACHINE_MEMORY_PERCENT, USE_AUTO_MACHINE_MEMORY_PERCENT)); - long highestMlMachineMemory = -1; + long highestMlMachineMemoryBytes = -1; + long totalMlMemoryBytes = 0; DiscoveryNodes.Builder builder = DiscoveryNodes.builder(); for (int i = randomIntBetween(1, 10); i > 0; --i) { @@ -49,7 +51,8 @@ public void testCalculateEffectiveMaxModelMemoryLimit() { } else { // ML node long machineMemory = randomLongBetween(2000000000L, 100000000000L); - highestMlMachineMemory = Math.max(machineMemory, highestMlMachineMemory); + highestMlMachineMemoryBytes = Math.max(machineMemory, highestMlMachineMemoryBytes); + totalMlMemoryBytes += machineMemory * mlMemoryPercent / 100; builder.add(new DiscoveryNode(nodeName, nodeId, ta, Collections.singletonMap(MachineLearning.MACHINE_MEMORY_NODE_ATTR, String.valueOf(machineMemory)), Collections.emptySet(), Version.CURRENT)); @@ -59,14 +62,19 @@ public void testCalculateEffectiveMaxModelMemoryLimit() { ByteSizeValue effectiveMaxModelMemoryLimit = TransportMlInfoAction.calculateEffectiveMaxModelMemoryLimit(clusterSettings, nodes); - if (highestMlMachineMemory < 0) { + if (highestMlMachineMemoryBytes < 0) { assertThat(effectiveMaxModelMemoryLimit, nullValue()); } else { assertThat(effectiveMaxModelMemoryLimit, notNullValue()); assertThat(effectiveMaxModelMemoryLimit.getBytes() + Math.max(Job.PROCESS_MEMORY_OVERHEAD.getBytes(), DataFrameAnalyticsConfig.PROCESS_MEMORY_OVERHEAD.getBytes()) + MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(), - lessThanOrEqualTo(highestMlMachineMemory * mlMemoryPercent / 100)); + lessThanOrEqualTo(highestMlMachineMemoryBytes * mlMemoryPercent / 100)); } + + ByteSizeValue totalMlMemory = TransportMlInfoAction.calculateTotalMlMemory(clusterSettings, nodes); + + assertThat(totalMlMemory, notNullValue()); + assertThat(totalMlMemory, is(ByteSizeValue.ofMb(totalMlMemoryBytes / (1024 * 1024)))); } } diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/ml_info.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/ml_info.yml index 3478a0bc8f3c7..78043458a3fd0 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/ml_info.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/ml_info.yml @@ -17,8 +17,9 @@ teardown: - match: { defaults.anomaly_detectors.daily_model_snapshot_retention_after_days: 1 } - match: { defaults.datafeeds.scroll_size: 1000 } - is_false: limits.max_model_memory_limit - # We cannot assert an exact value for the next one as it will vary depending on the test machine + # We cannot assert an exact value for the next two as they will vary depending on the test machine - match: { limits.effective_max_model_memory_limit: "/\\d+[kmg]?b/" } + - match: { limits.total_ml_memory: "/\\d+mb/" } - match: { upgrade_mode: false } - do: @@ -36,8 +37,9 @@ teardown: - match: { defaults.anomaly_detectors.daily_model_snapshot_retention_after_days: 1 } - match: { defaults.datafeeds.scroll_size: 1000 } - match: { limits.max_model_memory_limit: "512mb" } - # We cannot assert an exact value for the next one as it will vary depending on the test machine + # We cannot assert an exact value for the next two as they will vary depending on the test machine - match: { limits.effective_max_model_memory_limit: "/\\d+[kmg]?b/" } + - match: { limits.total_ml_memory: "/\\d+mb/" } - match: { upgrade_mode: false } - do: @@ -55,8 +57,9 @@ teardown: - match: { defaults.anomaly_detectors.daily_model_snapshot_retention_after_days: 1 } - match: { defaults.datafeeds.scroll_size: 1000 } - match: { limits.max_model_memory_limit: "6gb" } - # We cannot assert an exact value for the next one as it will vary depending on the test machine + # We cannot assert an exact value for the next two as they will vary depending on the test machine - match: { limits.effective_max_model_memory_limit: "/\\d+[kmg]?b/" } + - match: { limits.total_ml_memory: "/\\d+mb/" } - match: { upgrade_mode: false } - do: @@ -74,8 +77,9 @@ teardown: - match: { defaults.anomaly_detectors.daily_model_snapshot_retention_after_days: 1 } - match: { defaults.datafeeds.scroll_size: 1000 } - match: { limits.max_model_memory_limit: "6gb" } - # We cannot assert an exact value for the next one as it will vary depending on the test machine + # We cannot assert an exact value for the next two as they will vary depending on the test machine - match: { limits.effective_max_model_memory_limit: "/\\d+[kmg]?b/" } + - match: { limits.total_ml_memory: "/\\d+mb/" } - match: { upgrade_mode: false } - do: @@ -95,4 +99,5 @@ teardown: - match: { limits.max_model_memory_limit: "1mb" } # This time we can assert an exact value for the next one because the hard limit is so low - match: { limits.effective_max_model_memory_limit: "1mb" } + - match: { limits.total_ml_memory: "/\\d+mb/" } - match: { upgrade_mode: false }