diff --git a/src/main/java/com/ibm/watson/modelmesh/Metrics.java b/src/main/java/com/ibm/watson/modelmesh/Metrics.java index b246a5c3..5afc9491 100644 --- a/src/main/java/com/ibm/watson/modelmesh/Metrics.java +++ b/src/main/java/com/ibm/watson/modelmesh/Metrics.java @@ -39,14 +39,20 @@ import java.lang.reflect.Array; import java.net.SocketAddress; import java.nio.channels.DatagramChannel; -import java.util.*; +import java.util.Collections; +import java.util.EnumMap; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; import java.util.Map.Entry; +import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; import java.util.stream.Stream; import static com.ibm.watson.modelmesh.Metric.*; +import static com.ibm.watson.modelmesh.Metric.MetricType.*; import static com.ibm.watson.modelmesh.ModelMesh.M; import static com.ibm.watson.modelmesh.ModelMeshEnvVars.MMESH_CUSTOM_ENV_VAR; import static com.ibm.watson.modelmesh.ModelMeshEnvVars.MMESH_METRICS_ENV_VAR; @@ -56,14 +62,14 @@ * */ interface Metrics extends AutoCloseable { + boolean isPerModelMetricsEnabled(); boolean isEnabled(); - void logTimingMetricSince(Metric metric, long prevTime, boolean isNano); - void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano); + void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano, String modelId); - void logSizeEventMetric(Metric metric, long value); + void logSizeEventMetric(Metric metric, long value, String modelId); void logGaugeMetric(Metric metric, long value); @@ -101,7 +107,7 @@ default void logInstanceStats(final InstanceRecord ir) { * @param respPayloadSize response payload size in bytes (or -1 if not applicable) */ void logRequestMetrics(boolean external, String name, long elapsedNanos, Code code, - int reqPayloadSize, int respPayloadSize); + int reqPayloadSize, int respPayloadSize, String modelId, String vModelId); default void registerGlobals() {} @@ -111,6 +117,11 @@ default void unregisterGlobals() {} default void close() {} Metrics NO_OP_METRICS = new Metrics() { + @Override + public boolean isPerModelMetricsEnabled() { + return false; + } + @Override public boolean isEnabled() { return false; @@ -120,10 +131,10 @@ public boolean isEnabled() { public void logTimingMetricSince(Metric metric, long prevTime, boolean isNano) {} @Override - public void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano) {} + public void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano, String modelId){} @Override - public void logSizeEventMetric(Metric metric, long value) {} + public void logSizeEventMetric(Metric metric, long value, String modelId){} @Override public void logGaugeMetric(Metric metric, long value) {} @@ -136,7 +147,7 @@ public void logInstanceStats(InstanceRecord ir) {} @Override public void logRequestMetrics(boolean external, String name, long elapsedNanos, Code code, - int reqPayloadSize, int respPayloadSize) {} + int reqPayloadSize, int respPayloadSize, String modelId, String vModelId) {} }; final class PrometheusMetrics implements Metrics { @@ -154,12 +165,14 @@ final class PrometheusMetrics implements Metrics { private final CollectorRegistry registry; private final NettyServer metricServer; private final boolean shortNames; + private final boolean enablePerModelMetrics; private final EnumMap<Metric, Collector> metricsMap = new EnumMap<>(Metric.class); public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMetricParams) throws Exception { int port = 2112; boolean shortNames = true; boolean https = true; + boolean enablePerModelMetrics = true; String memMetrics = "all"; // default to all for (Entry<String, String> ent : params.entrySet()) { switch (ent.getKey()) { @@ -170,6 +183,9 @@ public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMet throw new Exception("Invalid metrics port: " + ent.getValue()); } break; + case "per_model_metrics": + enablePerModelMetrics = "true".equalsIgnoreCase(ent.getValue()); + break; case "fq_names": shortNames = !"true".equalsIgnoreCase(ent.getValue()); break; @@ -188,6 +204,7 @@ public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMet throw new Exception("Unrecognized metrics config parameter: " + ent.getKey()); } } + this.enablePerModelMetrics = enablePerModelMetrics; registry = new CollectorRegistry(); for (Metric m : Metric.values()) { @@ -220,10 +237,15 @@ public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMet } if (m == API_REQUEST_TIME || m == API_REQUEST_COUNT || m == INVOKE_MODEL_TIME - || m == INVOKE_MODEL_COUNT || m == REQUEST_PAYLOAD_SIZE || m == RESPONSE_PAYLOAD_SIZE) { - builder.labelNames("method", "code"); + || m == INVOKE_MODEL_COUNT || m == REQUEST_PAYLOAD_SIZE || m == RESPONSE_PAYLOAD_SIZE) { + if (this.enablePerModelMetrics && m.type != COUNTER_WITH_HISTO) { + builder.labelNames("method", "code", "modelId"); + } else { + builder.labelNames("method", "code"); + } + } else if (this.enablePerModelMetrics && m.type != GAUGE && m.type != COUNTER && m.type != COUNTER_WITH_HISTO) { + builder.labelNames("modelId"); } - Collector collector = builder.name(m.promName).help(m.description).create(); metricsMap.put(m, collector); if (!m.global) { @@ -251,7 +273,6 @@ public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMet this.metricServer = new NettyServer(registry, port, https); this.shortNames = shortNames; - logger.info("Will expose " + (https ? "https" : "http") + " Prometheus metrics on port " + port + " using " + (shortNames ? "short" : "fully-qualified") + " method names"); @@ -330,6 +351,11 @@ public void close() { this.metricServer.close(); } + @Override + public boolean isPerModelMetricsEnabled() { + return enablePerModelMetrics; + } + @Override public boolean isEnabled() { return true; @@ -342,13 +368,21 @@ public void logTimingMetricSince(Metric metric, long prevTime, boolean isNano) { } @Override - public void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano) { - ((Histogram) metricsMap.get(metric)).observe(isNano ? elapsed / M : elapsed); + public void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano, String modelId) { + if (enablePerModelMetrics) { + ((Histogram) metricsMap.get(metric)).labels(modelId).observe(isNano ? elapsed / M : elapsed); + } else { + ((Histogram) metricsMap.get(metric)).observe(isNano ? elapsed / M : elapsed); + } } @Override - public void logSizeEventMetric(Metric metric, long value) { - ((Histogram) metricsMap.get(metric)).observe(value * metric.newMultiplier); + public void logSizeEventMetric(Metric metric, long value, String modelId) { + if (enablePerModelMetrics) { + ((Histogram) metricsMap.get(metric)).labels(modelId).observe(value * metric.newMultiplier); + } else { + ((Histogram) metricsMap.get(metric)).observe(value * metric.newMultiplier); + } } @Override @@ -365,23 +399,35 @@ public void logCounterMetric(Metric metric) { @Override public void logRequestMetrics(boolean external, String name, long elapsedNanos, Code code, - int reqPayloadSize, int respPayloadSize) { + int reqPayloadSize, int respPayloadSize, String modelId, String vModelId) { final long elapsedMillis = elapsedNanos / M; final Histogram timingHisto = (Histogram) metricsMap .get(external ? API_REQUEST_TIME : INVOKE_MODEL_TIME); - + String mId = vModelId == null ? modelId : vModelId; int idx = shortNames ? name.indexOf('/') : -1; - final String methodName = idx == -1 ? name : name.substring(idx + 1); - - timingHisto.labels(methodName, code.name()).observe(elapsedMillis); - + String methodName = idx == -1 ? name : name.substring(idx + 1); + if (enablePerModelMetrics) { + timingHisto.labels(methodName, code.name(), mId).observe(elapsedMillis); + } else { + timingHisto.labels(methodName, code.name()).observe(elapsedMillis); + } if (reqPayloadSize != -1) { - ((Histogram) metricsMap.get(REQUEST_PAYLOAD_SIZE)) - .labels(methodName, code.name()).observe(reqPayloadSize); + if (enablePerModelMetrics) { + ((Histogram) metricsMap.get(REQUEST_PAYLOAD_SIZE)) + .labels(methodName, code.name(), mId).observe(reqPayloadSize); + } else { + ((Histogram) metricsMap.get(REQUEST_PAYLOAD_SIZE)) + .labels(methodName, code.name()).observe(reqPayloadSize); + } } if (respPayloadSize != -1) { - ((Histogram) metricsMap.get(RESPONSE_PAYLOAD_SIZE)) - .labels(methodName, code.name()).observe(respPayloadSize); + if (enablePerModelMetrics) { + ((Histogram) metricsMap.get(RESPONSE_PAYLOAD_SIZE)) + .labels(methodName, code.name(), mId).observe(respPayloadSize); + } else { + ((Histogram) metricsMap.get(RESPONSE_PAYLOAD_SIZE)) + .labels(methodName, code.name()).observe(respPayloadSize); + } } } @@ -437,6 +483,11 @@ protected StatsDSender createSender(Callable<SocketAddress> addressLookup, int q + (shortNames ? "short" : "fully-qualified") + " method names"); } + @Override + public boolean isPerModelMetricsEnabled() { + return false; + } + @Override public boolean isEnabled() { return true; @@ -454,12 +505,12 @@ public void logTimingMetricSince(Metric metric, long prevTime, boolean isNano) { } @Override - public void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano) { + public void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano, String modelId) { client.recordExecutionTime(name(metric), isNano ? elapsed / M : elapsed); } @Override - public void logSizeEventMetric(Metric metric, long value) { + public void logSizeEventMetric(Metric metric, long value, String modelId) { if (!legacy) { value *= metric.newMultiplier; } @@ -497,7 +548,7 @@ static String[] getOkTags(String method, boolean shortName) { @Override public void logRequestMetrics(boolean external, String name, long elapsedNanos, Code code, - int reqPayloadSize, int respPayloadSize) { + int reqPayloadSize, int respPayloadSize, String modelId, String vModelId) { final StatsDClient client = this.client; final long elapsedMillis = elapsedNanos / M; final String countName = name(external ? API_REQUEST_COUNT : INVOKE_MODEL_COUNT); diff --git a/src/main/java/com/ibm/watson/modelmesh/ModelMesh.java b/src/main/java/com/ibm/watson/modelmesh/ModelMesh.java index 9755df49..53b7b918 100644 --- a/src/main/java/com/ibm/watson/modelmesh/ModelMesh.java +++ b/src/main/java/com/ibm/watson/modelmesh/ModelMesh.java @@ -1966,7 +1966,7 @@ final synchronized boolean doRemove(final boolean evicted, // "unload" event if explicit unloading isn't enabled. // Otherwise, this gets recorded in a callback set in the // CacheEntry.unload(int) method - metrics.logTimingMetricDuration(Metric.UNLOAD_MODEL_TIME, 0L, false); + metrics.logTimingMetricDuration(Metric.UNLOAD_MODEL_TIME, 0L, false, modelId); metrics.logCounterMetric(Metric.UNLOAD_MODEL); } } @@ -2037,7 +2037,7 @@ public void onSuccess(Boolean reallyHappened) { //TODO probably only log if took longer than a certain time long tookMillis = msSince(beforeNanos); logger.info("Unload of " + modelId + " completed in " + tookMillis + "ms"); - metrics.logTimingMetricDuration(Metric.UNLOAD_MODEL_TIME, tookMillis, false); + metrics.logTimingMetricDuration(Metric.UNLOAD_MODEL_TIME, tookMillis, false, modelId); metrics.logCounterMetric(Metric.UNLOAD_MODEL); } // else considered trivially succeeded because the corresponding @@ -2158,7 +2158,7 @@ public final void run() { long queueStartTimeNanos = getAndResetLoadingQueueStartTimeNanos(); if (queueStartTimeNanos > 0) { long queueDelayMillis = (nanoTime() - queueStartTimeNanos) / M; - metrics.logSizeEventMetric(Metric.LOAD_MODEL_QUEUE_DELAY, queueDelayMillis); + metrics.logSizeEventMetric(Metric.LOAD_MODEL_QUEUE_DELAY, queueDelayMillis, modelId); // Only log if the priority value is "in the future" which indicates // that there is or were runtime requests waiting for this load. // Otherwise we don't care about arbitrary delays here @@ -2228,7 +2228,7 @@ public final void run() { loadingTimeStats(modelType).recordTime(tookMillis); logger.info("Load of model " + modelId + " type=" + modelType + " completed in " + tookMillis + "ms"); - metrics.logTimingMetricDuration(Metric.LOAD_MODEL_TIME, tookMillis, false); + metrics.logTimingMetricDuration(Metric.LOAD_MODEL_TIME, tookMillis, false, modelId); metrics.logCounterMetric(Metric.LOAD_MODEL); } catch (Throwable t) { loadFuture = null; @@ -2388,7 +2388,7 @@ protected final void complete(LoadedRuntime<T> result, Throwable error) { if (size > 0) { long sizeBytes = size * UNIT_SIZE; logger.info("Model " + modelId + " size = " + size + " units" + ", ~" + mb(sizeBytes)); - metrics.logSizeEventMetric(Metric.LOADED_MODEL_SIZE, sizeBytes); + metrics.logSizeEventMetric(Metric.LOADED_MODEL_SIZE, sizeBytes, modelId); } else { try { long before = nanoTime(); @@ -2397,9 +2397,9 @@ protected final void complete(LoadedRuntime<T> result, Throwable error) { long took = msSince(before), sizeBytes = size * UNIT_SIZE; logger.info("Model " + modelId + " size = " + size + " units" + ", ~" + mb(sizeBytes) + " sizing took " + took + "ms"); - metrics.logTimingMetricDuration(Metric.MODEL_SIZING_TIME, took, false); + metrics.logTimingMetricDuration(Metric.MODEL_SIZING_TIME, took, false, modelId); // this is actually a size (bytes), not a "time" - metrics.logSizeEventMetric(Metric.LOADED_MODEL_SIZE, sizeBytes); + metrics.logSizeEventMetric(Metric.LOADED_MODEL_SIZE, sizeBytes, modelId); } } catch (Exception e) { if (!isInterruption(e) && state == SIZING) { @@ -2722,7 +2722,7 @@ protected void beforeInvoke(int requestWeight) //noinspection ThrowFromFinallyBlock throw new ModelNotHereException(instanceId, modelId); } - metrics.logTimingMetricDuration(Metric.QUEUE_DELAY, tookMillis, false); + metrics.logTimingMetricDuration(Metric.QUEUE_DELAY, tookMillis, false, modelId); } } } @@ -2901,7 +2901,7 @@ public void onEviction(String key, CacheEntry<?> ce, long lastUsed) { logger.info("Evicted " + (failed ? "failed model record" : "model") + " " + key + " from local cache, last used " + readableTime(millisSinceLastUsed) + " ago (" + lastUsed + "ms), invoked " + ce.getTotalInvocationCount() + " times"); - metrics.logTimingMetricDuration(Metric.AGE_AT_EVICTION, millisSinceLastUsed, false); + metrics.logTimingMetricDuration(Metric.AGE_AT_EVICTION, millisSinceLastUsed, false, ce.modelId); metrics.logCounterMetric(Metric.EVICT_MODEL); } @@ -3989,9 +3989,10 @@ else if (mr.getInstanceIds().containsKey(instanceId)) { throw t; } finally { if (methodStartNanos > 0L && metrics.isEnabled()) { + String[] extraLabels = new String[]{modelId}; // only logged here in non-grpc (legacy) mode metrics.logRequestMetrics(true, getRequestMethodName(method, args), - nanoTime() - methodStartNanos, metricStatusCode, -1, -1); + nanoTime() - methodStartNanos, metricStatusCode, -1, -1, modelId, ""); } curThread.setName(threadNameBefore); } @@ -4450,7 +4451,7 @@ private Object invokeLocalModel(CacheEntry<?> ce, Method method, Object[] args) long delayMillis = msSince(beforeNanos); logger.info("Cache miss for model invocation, held up " + delayMillis + "ms"); metrics.logCounterMetric(Metric.CACHE_MISS); - metrics.logTimingMetricDuration(Metric.CACHE_MISS_DELAY, delayMillis, false); + metrics.logTimingMetricDuration(Metric.CACHE_MISS_DELAY, delayMillis, false, ce.modelId); } } } else { @@ -4528,7 +4529,7 @@ private Object invokeLocalModel(CacheEntry<?> ce, Method method, Object[] args) ce.afterInvoke(weight, tookNanos); if (code != null && metrics.isEnabled()) { metrics.logRequestMetrics(false, getRequestMethodName(method, args), - tookNanos, code, -1, -1); + tookNanos, code, -1, -1, ce.modelId, ""); } } } diff --git a/src/main/java/com/ibm/watson/modelmesh/ModelMeshApi.java b/src/main/java/com/ibm/watson/modelmesh/ModelMeshApi.java index ff143ac6..5db750f1 100644 --- a/src/main/java/com/ibm/watson/modelmesh/ModelMeshApi.java +++ b/src/main/java/com/ibm/watson/modelmesh/ModelMeshApi.java @@ -87,6 +87,7 @@ import io.netty.util.ReferenceCountUtil; import io.netty.util.concurrent.FastThreadLocalThread; import org.apache.thrift.TException; +import org.checkerframework.checker.units.qual.A; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.MDC; @@ -783,8 +784,13 @@ public void onHalfClose() { call.close(status, emptyMeta()); Metrics metrics = delegate.metrics; if (metrics.isEnabled()) { - metrics.logRequestMetrics(true, methodName, nanoTime() - startNanos, - status.getCode(), reqSize, respSize); + if (isVModel) { + metrics.logRequestMetrics(true, methodName, nanoTime() - startNanos, + status.getCode(), reqSize, respSize, "", Iterables.toString(modelIds)); + } else { + metrics.logRequestMetrics(true, methodName, nanoTime() - startNanos, + status.getCode(), reqSize, respSize, Iterables.toString(modelIds), ""); + } } } } diff --git a/src/main/java/com/ibm/watson/prometheus/SimpleCollector.java b/src/main/java/com/ibm/watson/prometheus/SimpleCollector.java index ffca070b..c7b25c1f 100644 --- a/src/main/java/com/ibm/watson/prometheus/SimpleCollector.java +++ b/src/main/java/com/ibm/watson/prometheus/SimpleCollector.java @@ -161,7 +161,7 @@ private static int nextIdx(int i, int len) { private void validateCount(int count) { if (count != labelCount) { - throw new IllegalArgumentException("Incorrect number of labels."); + throw new IllegalArgumentException("Incorrect number of labels. Expected: " + labelCount + ", got: " + count); } } diff --git a/src/test/java/com/ibm/watson/modelmesh/ModelMeshMetricsTest.java b/src/test/java/com/ibm/watson/modelmesh/ModelMeshMetricsTest.java index dc6ee35e..4ca4f05e 100644 --- a/src/test/java/com/ibm/watson/modelmesh/ModelMeshMetricsTest.java +++ b/src/test/java/com/ibm/watson/modelmesh/ModelMeshMetricsTest.java @@ -32,6 +32,7 @@ import io.grpc.ManagedChannel; import io.grpc.netty.NettyChannelBuilder; import io.netty.handler.ssl.util.InsecureTrustManagerFactory; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import javax.net.ssl.SSLContext; @@ -76,10 +77,11 @@ protected int requestCount() { @Override protected Map<String, String> extraEnvVars() { - return ImmutableMap.of("MM_METRICS", "prometheus:port=" + METRICS_PORT + ";scheme=" + SCHEME); + return ImmutableMap.of("MM_METRICS", "prometheus:port=" + METRICS_PORT + ";scheme=" + SCHEME + + ";per_model_metrics=true"); } - @Test + @BeforeAll public void metricsTest() throws Exception { ManagedChannel channel = NettyChannelBuilder.forAddress("localhost", 9000).usePlaintext().build(); @@ -150,8 +152,9 @@ public void metricsTest() throws Exception { channel.shutdown(); } } + protected Map<String,Double> metrics; - public void verifyMetrics() throws Exception { + protected void prepareMetrics() throws Exception { // Insecure trust manager - skip TLS verification SSLContext sslContext = SSLContext.getInstance("TLS"); sslContext.init(null, InsecureTrustManagerFactory.INSTANCE.getTrustManagers(), null); @@ -168,33 +171,40 @@ public void verifyMetrics() throws Exception { final Pattern line = Pattern.compile("([^\\s{]+(?:\\{.+\\})?)\\s+(\\S+)"); - Map<String,Double> metrics = resp.body().filter(s -> !s.startsWith("#")).map(s -> line.matcher(s)) + metrics = resp.body().filter(s -> !s.startsWith("#")).map(s -> line.matcher(s)) .filter(Matcher::matches) .collect(Collectors.toMap(m -> m.group(1), m -> Double.parseDouble(m.group(2)))); + } + + @Test + public void verifyMetrics() throws Exception { + // Insecure trust manager - skip TLS verification + prepareMetrics(); + System.out.println(metrics.size() + " metrics scraped"); // Spot check some expected metrics and values // External response time should all be < 2000ms (includes cache hit loading time) - assertEquals(40.0, metrics.get("modelmesh_api_request_milliseconds_bucket{method=\"predict\",code=\"OK\",le=\"2000.0\",}")); + assertEquals(40.0, metrics.get("modelmesh_api_request_milliseconds_bucket{method=\"predict\",code=\"OK\",modelId=\"\",le=\"2000.0\",}")); // External response time should all be < 200ms (includes cache hit loading time) - assertEquals(40.0, metrics.get("modelmesh_invoke_model_milliseconds_bucket{method=\"predict\",code=\"OK\",le=\"200.0\",}")); + assertEquals(40.0, + metrics.get("modelmesh_invoke_model_milliseconds_bucket{method=\"predict\",code=\"OK\",modelId=\"\",le=\"120000.0\",}")); // Simulated model sizing time is < 200ms - assertEquals(1.0, metrics.get("modelmesh_model_sizing_milliseconds_bucket{le=\"200.0\",}")); + assertEquals(1.0, metrics.get("modelmesh_model_sizing_milliseconds_bucket{modelId=\"myModel\",le=\"60000.0\",}")); // Simulated model sizing time is > 50ms - assertEquals(0.0, metrics.get("modelmesh_model_sizing_milliseconds_bucket{le=\"50.0\",}")); + assertEquals(0.0, metrics.get("modelmesh_model_sizing_milliseconds_bucket{modelId=\"myModel\",le=\"50.0\",}")); // Simulated model size is between 64MiB and 256MiB - assertEquals(0.0, metrics.get("modelmesh_loaded_model_size_bytes_bucket{le=\"6.7108864E7\",}")); - assertEquals(1.0, metrics.get("modelmesh_loaded_model_size_bytes_bucket{le=\"2.68435456E8\",}")); + assertEquals(0.0, metrics.get("modelmesh_loaded_model_size_bytes_bucket{modelId=\"myModel\",le=\"6.7108864E7\",}")); + assertEquals(1.0, metrics.get("modelmesh_loaded_model_size_bytes_bucket{modelId=\"myModel\",le=\"2.68435456E8\",}")); // One model is loaded - assertEquals(1.0, metrics.get("modelmesh_models_loaded_total")); assertEquals(1.0, metrics.get("modelmesh_instance_models_total")); // Histogram counts should reflect the two payload sizes (30 small, 10 large) - assertEquals(30.0, metrics.get("modelmesh_request_size_bytes_bucket{method=\"predict\",code=\"OK\",le=\"128.0\",}")); - assertEquals(40.0, metrics.get("modelmesh_request_size_bytes_bucket{method=\"predict\",code=\"OK\",le=\"2097152.0\",}")); - assertEquals(30.0, metrics.get("modelmesh_response_size_bytes_bucket{method=\"predict\",code=\"OK\",le=\"128.0\",}")); - assertEquals(40.0, metrics.get("modelmesh_response_size_bytes_bucket{method=\"predict\",code=\"OK\",le=\"2097152.0\",}")); + assertEquals(30.0, metrics.get("modelmesh_request_size_bytes_bucket{method=\"predict\",code=\"OK\",modelId=\"\",le=\"128.0\",}")); + assertEquals(40.0, metrics.get("modelmesh_request_size_bytes_bucket{method=\"predict\",code=\"OK\",modelId=\"\",le=\"2097152.0\",}")); + assertEquals(30.0, metrics.get("modelmesh_response_size_bytes_bucket{method=\"predict\",code=\"OK\",modelId=\"\",le=\"128.0\",}")); + assertEquals(40.0, metrics.get("modelmesh_response_size_bytes_bucket{method=\"predict\",code=\"OK\",modelId=\"\",le=\"2097152.0\",}")); // Memory metrics assertTrue(metrics.containsKey("netty_pool_mem_allocated_bytes{area=\"direct\",}"));