diff --git a/src/main/java/com/ibm/watson/modelmesh/Metrics.java b/src/main/java/com/ibm/watson/modelmesh/Metrics.java
index b246a5c3..5afc9491 100644
--- a/src/main/java/com/ibm/watson/modelmesh/Metrics.java
+++ b/src/main/java/com/ibm/watson/modelmesh/Metrics.java
@@ -39,14 +39,20 @@
 import java.lang.reflect.Array;
 import java.net.SocketAddress;
 import java.nio.channels.DatagramChannel;
-import java.util.*;
+import java.util.Collections;
+import java.util.EnumMap;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Set;
 import java.util.concurrent.Callable;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Stream;
 
 import static com.ibm.watson.modelmesh.Metric.*;
+import static com.ibm.watson.modelmesh.Metric.MetricType.*;
 import static com.ibm.watson.modelmesh.ModelMesh.M;
 import static com.ibm.watson.modelmesh.ModelMeshEnvVars.MMESH_CUSTOM_ENV_VAR;
 import static com.ibm.watson.modelmesh.ModelMeshEnvVars.MMESH_METRICS_ENV_VAR;
@@ -56,14 +62,14 @@
  *
  */
 interface Metrics extends AutoCloseable {
+    boolean isPerModelMetricsEnabled();
 
     boolean isEnabled();
-
     void logTimingMetricSince(Metric metric, long prevTime, boolean isNano);
 
-    void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano);
+    void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano, String modelId);
 
-    void logSizeEventMetric(Metric metric, long value);
+    void logSizeEventMetric(Metric metric, long value, String modelId);
 
     void logGaugeMetric(Metric metric, long value);
 
@@ -101,7 +107,7 @@ default void logInstanceStats(final InstanceRecord ir) {
      * @param respPayloadSize response payload size in bytes (or -1 if not applicable)
      */
     void logRequestMetrics(boolean external, String name, long elapsedNanos, Code code,
-                           int reqPayloadSize, int respPayloadSize);
+                           int reqPayloadSize, int respPayloadSize, String modelId, String vModelId);
 
     default void registerGlobals() {}
 
@@ -111,6 +117,11 @@ default void unregisterGlobals() {}
     default void close() {}
 
     Metrics NO_OP_METRICS = new Metrics() {
+        @Override
+        public boolean isPerModelMetricsEnabled() {
+            return false;
+        }
+
         @Override
         public boolean isEnabled() {
             return false;
@@ -120,10 +131,10 @@ public boolean isEnabled() {
         public void logTimingMetricSince(Metric metric, long prevTime, boolean isNano) {}
 
         @Override
-        public void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano) {}
+        public void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano, String modelId){}
 
         @Override
-        public void logSizeEventMetric(Metric metric, long value) {}
+        public void logSizeEventMetric(Metric metric, long value, String modelId){}
 
         @Override
         public void logGaugeMetric(Metric metric, long value) {}
@@ -136,7 +147,7 @@ public void logInstanceStats(InstanceRecord ir) {}
 
         @Override
         public void logRequestMetrics(boolean external, String name, long elapsedNanos, Code code,
-                                      int reqPayloadSize, int respPayloadSize) {}
+                                      int reqPayloadSize, int respPayloadSize, String modelId, String vModelId) {}
     };
 
     final class PrometheusMetrics implements Metrics {
@@ -154,12 +165,14 @@ final class PrometheusMetrics implements Metrics {
         private final CollectorRegistry registry;
         private final NettyServer metricServer;
         private final boolean shortNames;
+        private final boolean enablePerModelMetrics;
         private final EnumMap<Metric, Collector> metricsMap = new EnumMap<>(Metric.class);
 
         public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMetricParams) throws Exception {
             int port = 2112;
             boolean shortNames = true;
             boolean https = true;
+            boolean enablePerModelMetrics = true;
             String memMetrics = "all"; // default to all
             for (Entry<String, String> ent : params.entrySet()) {
                 switch (ent.getKey()) {
@@ -170,6 +183,9 @@ public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMet
                         throw new Exception("Invalid metrics port: " + ent.getValue());
                     }
                     break;
+                case "per_model_metrics":
+                    enablePerModelMetrics = "true".equalsIgnoreCase(ent.getValue());
+                    break;
                 case "fq_names":
                     shortNames = !"true".equalsIgnoreCase(ent.getValue());
                     break;
@@ -188,6 +204,7 @@ public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMet
                     throw new Exception("Unrecognized metrics config parameter: " + ent.getKey());
                 }
             }
+            this.enablePerModelMetrics = enablePerModelMetrics;
 
             registry = new CollectorRegistry();
             for (Metric m : Metric.values()) {
@@ -220,10 +237,15 @@ public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMet
                 }
 
                 if (m == API_REQUEST_TIME || m == API_REQUEST_COUNT || m == INVOKE_MODEL_TIME
-                    || m == INVOKE_MODEL_COUNT || m == REQUEST_PAYLOAD_SIZE || m == RESPONSE_PAYLOAD_SIZE) {
-                    builder.labelNames("method", "code");
+                        || m == INVOKE_MODEL_COUNT || m == REQUEST_PAYLOAD_SIZE || m == RESPONSE_PAYLOAD_SIZE) {
+                    if (this.enablePerModelMetrics && m.type != COUNTER_WITH_HISTO) {
+                        builder.labelNames("method", "code", "modelId");
+                    } else {
+                        builder.labelNames("method", "code");
+                    }
+                } else if (this.enablePerModelMetrics && m.type != GAUGE && m.type != COUNTER && m.type != COUNTER_WITH_HISTO) {
+                    builder.labelNames("modelId");
                 }
-
                 Collector collector = builder.name(m.promName).help(m.description).create();
                 metricsMap.put(m, collector);
                 if (!m.global) {
@@ -251,7 +273,6 @@ public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMet
 
             this.metricServer = new NettyServer(registry, port, https);
             this.shortNames = shortNames;
-
             logger.info("Will expose " + (https ? "https" : "http") + " Prometheus metrics on port " + port
                         + " using " + (shortNames ? "short" : "fully-qualified") + " method names");
 
@@ -330,6 +351,11 @@ public void close() {
             this.metricServer.close();
         }
 
+        @Override
+        public boolean isPerModelMetricsEnabled() {
+            return enablePerModelMetrics;
+        }
+
         @Override
         public boolean isEnabled() {
             return true;
@@ -342,13 +368,21 @@ public void logTimingMetricSince(Metric metric, long prevTime, boolean isNano) {
         }
 
         @Override
-        public void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano) {
-            ((Histogram) metricsMap.get(metric)).observe(isNano ? elapsed / M : elapsed);
+        public void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano, String modelId) {
+            if (enablePerModelMetrics) {
+                ((Histogram) metricsMap.get(metric)).labels(modelId).observe(isNano ? elapsed / M : elapsed);
+            } else {
+                ((Histogram) metricsMap.get(metric)).observe(isNano ? elapsed / M : elapsed);
+            }
         }
 
         @Override
-        public void logSizeEventMetric(Metric metric, long value) {
-            ((Histogram) metricsMap.get(metric)).observe(value * metric.newMultiplier);
+        public void logSizeEventMetric(Metric metric, long value, String modelId) {
+            if (enablePerModelMetrics) {
+                ((Histogram) metricsMap.get(metric)).labels(modelId).observe(value * metric.newMultiplier);
+            } else {
+                ((Histogram) metricsMap.get(metric)).observe(value * metric.newMultiplier);
+            }
         }
 
         @Override
@@ -365,23 +399,35 @@ public void logCounterMetric(Metric metric) {
 
         @Override
         public void logRequestMetrics(boolean external, String name, long elapsedNanos, Code code,
-                                      int reqPayloadSize, int respPayloadSize) {
+                                      int reqPayloadSize, int respPayloadSize, String modelId, String vModelId) {
             final long elapsedMillis = elapsedNanos / M;
             final Histogram timingHisto = (Histogram) metricsMap
                     .get(external ? API_REQUEST_TIME : INVOKE_MODEL_TIME);
-
+            String mId = vModelId == null ? modelId : vModelId;
             int idx = shortNames ? name.indexOf('/') : -1;
-            final String methodName = idx == -1 ? name : name.substring(idx + 1);
-
-            timingHisto.labels(methodName, code.name()).observe(elapsedMillis);
-
+            String methodName = idx == -1 ? name : name.substring(idx + 1);
+            if (enablePerModelMetrics) {
+                timingHisto.labels(methodName, code.name(), mId).observe(elapsedMillis);
+            } else {
+                timingHisto.labels(methodName, code.name()).observe(elapsedMillis);
+            }
             if (reqPayloadSize != -1) {
-                ((Histogram) metricsMap.get(REQUEST_PAYLOAD_SIZE))
-                    .labels(methodName, code.name()).observe(reqPayloadSize);
+                if (enablePerModelMetrics) {
+                    ((Histogram) metricsMap.get(REQUEST_PAYLOAD_SIZE))
+                            .labels(methodName, code.name(), mId).observe(reqPayloadSize);
+                } else {
+                    ((Histogram) metricsMap.get(REQUEST_PAYLOAD_SIZE))
+                            .labels(methodName, code.name()).observe(reqPayloadSize);
+                }
             }
             if (respPayloadSize != -1) {
-                ((Histogram) metricsMap.get(RESPONSE_PAYLOAD_SIZE))
-                        .labels(methodName, code.name()).observe(respPayloadSize);
+                if (enablePerModelMetrics) {
+                    ((Histogram) metricsMap.get(RESPONSE_PAYLOAD_SIZE))
+                            .labels(methodName, code.name(), mId).observe(respPayloadSize);
+                } else {
+                    ((Histogram) metricsMap.get(RESPONSE_PAYLOAD_SIZE))
+                            .labels(methodName, code.name()).observe(respPayloadSize);
+                }
             }
         }
 
@@ -437,6 +483,11 @@ protected StatsDSender createSender(Callable<SocketAddress> addressLookup, int q
                         + (shortNames ? "short" : "fully-qualified") + " method names");
         }
 
+        @Override
+        public boolean isPerModelMetricsEnabled() {
+            return false;
+        }
+
         @Override
         public boolean isEnabled() {
             return true;
@@ -454,12 +505,12 @@ public void logTimingMetricSince(Metric metric, long prevTime, boolean isNano) {
         }
 
         @Override
-        public void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano) {
+        public void logTimingMetricDuration(Metric metric, long elapsed, boolean isNano, String modelId) {
             client.recordExecutionTime(name(metric), isNano ? elapsed / M : elapsed);
         }
 
         @Override
-        public void logSizeEventMetric(Metric metric, long value) {
+        public void logSizeEventMetric(Metric metric, long value, String modelId) {
             if (!legacy) {
                 value *= metric.newMultiplier;
             }
@@ -497,7 +548,7 @@ static String[] getOkTags(String method, boolean shortName) {
 
         @Override
         public void logRequestMetrics(boolean external, String name, long elapsedNanos, Code code,
-                                      int reqPayloadSize, int respPayloadSize) {
+                                      int reqPayloadSize, int respPayloadSize, String modelId, String vModelId) {
             final StatsDClient client = this.client;
             final long elapsedMillis = elapsedNanos / M;
             final String countName = name(external ? API_REQUEST_COUNT : INVOKE_MODEL_COUNT);
diff --git a/src/main/java/com/ibm/watson/modelmesh/ModelMesh.java b/src/main/java/com/ibm/watson/modelmesh/ModelMesh.java
index 9755df49..53b7b918 100644
--- a/src/main/java/com/ibm/watson/modelmesh/ModelMesh.java
+++ b/src/main/java/com/ibm/watson/modelmesh/ModelMesh.java
@@ -1966,7 +1966,7 @@ final synchronized boolean doRemove(final boolean evicted,
                     // "unload" event if explicit unloading isn't enabled.
                     // Otherwise, this gets recorded in a callback set in the
                     // CacheEntry.unload(int) method
-                    metrics.logTimingMetricDuration(Metric.UNLOAD_MODEL_TIME, 0L, false);
+                    metrics.logTimingMetricDuration(Metric.UNLOAD_MODEL_TIME, 0L, false, modelId);
                     metrics.logCounterMetric(Metric.UNLOAD_MODEL);
                 }
             }
@@ -2037,7 +2037,7 @@ public void onSuccess(Boolean reallyHappened) {
                         //TODO probably only log if took longer than a certain time
                         long tookMillis = msSince(beforeNanos);
                         logger.info("Unload of " + modelId + " completed in " + tookMillis + "ms");
-                        metrics.logTimingMetricDuration(Metric.UNLOAD_MODEL_TIME, tookMillis, false);
+                        metrics.logTimingMetricDuration(Metric.UNLOAD_MODEL_TIME, tookMillis, false, modelId);
                         metrics.logCounterMetric(Metric.UNLOAD_MODEL);
                     }
                     // else considered trivially succeeded because the corresponding
@@ -2158,7 +2158,7 @@ public final void run() {
                     long queueStartTimeNanos = getAndResetLoadingQueueStartTimeNanos();
                     if (queueStartTimeNanos > 0) {
                         long queueDelayMillis = (nanoTime() - queueStartTimeNanos) / M;
-                        metrics.logSizeEventMetric(Metric.LOAD_MODEL_QUEUE_DELAY, queueDelayMillis);
+                        metrics.logSizeEventMetric(Metric.LOAD_MODEL_QUEUE_DELAY, queueDelayMillis, modelId);
                         // Only log if the priority value is "in the future" which indicates
                         // that there is or were runtime requests waiting for this load.
                         // Otherwise we don't care about arbitrary delays here
@@ -2228,7 +2228,7 @@ public final void run() {
                         loadingTimeStats(modelType).recordTime(tookMillis);
                         logger.info("Load of model " + modelId + " type=" + modelType + " completed in " + tookMillis
                                     + "ms");
-                        metrics.logTimingMetricDuration(Metric.LOAD_MODEL_TIME, tookMillis, false);
+                        metrics.logTimingMetricDuration(Metric.LOAD_MODEL_TIME, tookMillis, false, modelId);
                         metrics.logCounterMetric(Metric.LOAD_MODEL);
                     } catch (Throwable t) {
                         loadFuture = null;
@@ -2388,7 +2388,7 @@ protected final void complete(LoadedRuntime<T> result, Throwable error) {
                     if (size > 0) {
                         long sizeBytes = size * UNIT_SIZE;
                         logger.info("Model " + modelId + " size = " + size + " units" + ", ~" + mb(sizeBytes));
-                        metrics.logSizeEventMetric(Metric.LOADED_MODEL_SIZE, sizeBytes);
+                        metrics.logSizeEventMetric(Metric.LOADED_MODEL_SIZE, sizeBytes, modelId);
                     } else {
                         try {
                             long before = nanoTime();
@@ -2397,9 +2397,9 @@ protected final void complete(LoadedRuntime<T> result, Throwable error) {
                                 long took = msSince(before), sizeBytes = size * UNIT_SIZE;
                                 logger.info("Model " + modelId + " size = " + size + " units" + ", ~" + mb(sizeBytes)
                                             + " sizing took " + took + "ms");
-                                metrics.logTimingMetricDuration(Metric.MODEL_SIZING_TIME, took, false);
+                                metrics.logTimingMetricDuration(Metric.MODEL_SIZING_TIME, took, false, modelId);
                                 // this is actually a size (bytes), not a "time"
-                                metrics.logSizeEventMetric(Metric.LOADED_MODEL_SIZE, sizeBytes);
+                                metrics.logSizeEventMetric(Metric.LOADED_MODEL_SIZE, sizeBytes, modelId);
                             }
                         } catch (Exception e) {
                             if (!isInterruption(e) && state == SIZING) {
@@ -2722,7 +2722,7 @@ protected void beforeInvoke(int requestWeight)
                         //noinspection ThrowFromFinallyBlock
                         throw new ModelNotHereException(instanceId, modelId);
                     }
-                    metrics.logTimingMetricDuration(Metric.QUEUE_DELAY, tookMillis, false);
+                    metrics.logTimingMetricDuration(Metric.QUEUE_DELAY, tookMillis, false, modelId);
                 }
             }
         }
@@ -2901,7 +2901,7 @@ public void onEviction(String key, CacheEntry<?> ce, long lastUsed) {
                 logger.info("Evicted " + (failed ? "failed model record" : "model") + " " + key
                     + " from local cache, last used " + readableTime(millisSinceLastUsed) + " ago (" + lastUsed
                     + "ms), invoked " + ce.getTotalInvocationCount() + " times");
-                metrics.logTimingMetricDuration(Metric.AGE_AT_EVICTION, millisSinceLastUsed, false);
+                metrics.logTimingMetricDuration(Metric.AGE_AT_EVICTION, millisSinceLastUsed, false, ce.modelId);
                 metrics.logCounterMetric(Metric.EVICT_MODEL);
             }
 
@@ -3989,9 +3989,10 @@ else if (mr.getInstanceIds().containsKey(instanceId)) {
             throw t;
         } finally {
             if (methodStartNanos > 0L && metrics.isEnabled()) {
+                String[] extraLabels = new String[]{modelId};
                 // only logged here in non-grpc (legacy) mode
                 metrics.logRequestMetrics(true, getRequestMethodName(method, args),
-                        nanoTime() - methodStartNanos, metricStatusCode, -1, -1);
+                        nanoTime() - methodStartNanos, metricStatusCode, -1, -1, modelId, "");
             }
             curThread.setName(threadNameBefore);
         }
@@ -4450,7 +4451,7 @@ private Object invokeLocalModel(CacheEntry<?> ce, Method method, Object[] args)
                         long delayMillis = msSince(beforeNanos);
                         logger.info("Cache miss for model invocation, held up " + delayMillis + "ms");
                         metrics.logCounterMetric(Metric.CACHE_MISS);
-                        metrics.logTimingMetricDuration(Metric.CACHE_MISS_DELAY, delayMillis, false);
+                        metrics.logTimingMetricDuration(Metric.CACHE_MISS_DELAY, delayMillis, false, ce.modelId);
                     }
                 }
             } else {
@@ -4528,7 +4529,7 @@ private Object invokeLocalModel(CacheEntry<?> ce, Method method, Object[] args)
             ce.afterInvoke(weight, tookNanos);
             if (code != null && metrics.isEnabled()) {
                 metrics.logRequestMetrics(false, getRequestMethodName(method, args),
-                        tookNanos, code, -1, -1);
+                        tookNanos, code, -1, -1, ce.modelId, "");
             }
         }
     }
diff --git a/src/main/java/com/ibm/watson/modelmesh/ModelMeshApi.java b/src/main/java/com/ibm/watson/modelmesh/ModelMeshApi.java
index ff143ac6..5db750f1 100644
--- a/src/main/java/com/ibm/watson/modelmesh/ModelMeshApi.java
+++ b/src/main/java/com/ibm/watson/modelmesh/ModelMeshApi.java
@@ -87,6 +87,7 @@
 import io.netty.util.ReferenceCountUtil;
 import io.netty.util.concurrent.FastThreadLocalThread;
 import org.apache.thrift.TException;
+import org.checkerframework.checker.units.qual.A;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.slf4j.MDC;
@@ -783,8 +784,13 @@ public void onHalfClose() {
                     call.close(status, emptyMeta());
                     Metrics metrics = delegate.metrics;
                     if (metrics.isEnabled()) {
-                        metrics.logRequestMetrics(true, methodName, nanoTime() - startNanos,
-                                status.getCode(), reqSize, respSize);
+                        if (isVModel) {
+                            metrics.logRequestMetrics(true, methodName, nanoTime() - startNanos,
+                                    status.getCode(), reqSize, respSize, "", Iterables.toString(modelIds));
+                        } else {
+                            metrics.logRequestMetrics(true, methodName, nanoTime() - startNanos,
+                                    status.getCode(), reqSize, respSize, Iterables.toString(modelIds), "");
+                        }
                     }
                 }
             }
diff --git a/src/main/java/com/ibm/watson/prometheus/SimpleCollector.java b/src/main/java/com/ibm/watson/prometheus/SimpleCollector.java
index ffca070b..c7b25c1f 100644
--- a/src/main/java/com/ibm/watson/prometheus/SimpleCollector.java
+++ b/src/main/java/com/ibm/watson/prometheus/SimpleCollector.java
@@ -161,7 +161,7 @@ private static int nextIdx(int i, int len) {
 
   private void validateCount(int count) {
     if (count != labelCount) {
-      throw new IllegalArgumentException("Incorrect number of labels.");
+      throw new IllegalArgumentException("Incorrect number of labels. Expected: " + labelCount + ", got: " + count);
     }
   }
 
diff --git a/src/test/java/com/ibm/watson/modelmesh/ModelMeshMetricsTest.java b/src/test/java/com/ibm/watson/modelmesh/ModelMeshMetricsTest.java
index dc6ee35e..4ca4f05e 100644
--- a/src/test/java/com/ibm/watson/modelmesh/ModelMeshMetricsTest.java
+++ b/src/test/java/com/ibm/watson/modelmesh/ModelMeshMetricsTest.java
@@ -32,6 +32,7 @@
 import io.grpc.ManagedChannel;
 import io.grpc.netty.NettyChannelBuilder;
 import io.netty.handler.ssl.util.InsecureTrustManagerFactory;
+import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 
 import javax.net.ssl.SSLContext;
@@ -76,10 +77,11 @@ protected int requestCount() {
 
     @Override
     protected Map<String, String> extraEnvVars() {
-        return  ImmutableMap.of("MM_METRICS", "prometheus:port=" + METRICS_PORT + ";scheme=" + SCHEME);
+        return ImmutableMap.of("MM_METRICS", "prometheus:port=" + METRICS_PORT + ";scheme=" + SCHEME +
+                ";per_model_metrics=true");
     }
 
-    @Test
+    @BeforeAll
     public void metricsTest() throws Exception {
 
         ManagedChannel channel = NettyChannelBuilder.forAddress("localhost", 9000).usePlaintext().build();
@@ -150,8 +152,9 @@ public void metricsTest() throws Exception {
             channel.shutdown();
         }
     }
+    protected Map<String,Double> metrics;
 
-    public void verifyMetrics() throws Exception {
+    protected void prepareMetrics() throws Exception {
         // Insecure trust manager - skip TLS verification
         SSLContext sslContext = SSLContext.getInstance("TLS");
         sslContext.init(null, InsecureTrustManagerFactory.INSTANCE.getTrustManagers(), null);
@@ -168,33 +171,40 @@ public void verifyMetrics() throws Exception {
 
         final Pattern line = Pattern.compile("([^\\s{]+(?:\\{.+\\})?)\\s+(\\S+)");
 
-        Map<String,Double> metrics = resp.body().filter(s -> !s.startsWith("#")).map(s -> line.matcher(s))
+        metrics = resp.body().filter(s -> !s.startsWith("#")).map(s -> line.matcher(s))
                 .filter(Matcher::matches)
                 .collect(Collectors.toMap(m -> m.group(1), m -> Double.parseDouble(m.group(2))));
 
+    }
+
+    @Test
+    public void verifyMetrics() throws Exception {
+        // Insecure trust manager - skip TLS verification
+        prepareMetrics();
+
         System.out.println(metrics.size() + " metrics scraped");
 
         // Spot check some expected metrics and values
 
         // External response time should all be < 2000ms (includes cache hit loading time)
-        assertEquals(40.0, metrics.get("modelmesh_api_request_milliseconds_bucket{method=\"predict\",code=\"OK\",le=\"2000.0\",}"));
+        assertEquals(40.0, metrics.get("modelmesh_api_request_milliseconds_bucket{method=\"predict\",code=\"OK\",modelId=\"\",le=\"2000.0\",}"));
         // External response time should all be < 200ms (includes cache hit loading time)
-        assertEquals(40.0, metrics.get("modelmesh_invoke_model_milliseconds_bucket{method=\"predict\",code=\"OK\",le=\"200.0\",}"));
+        assertEquals(40.0,
+                metrics.get("modelmesh_invoke_model_milliseconds_bucket{method=\"predict\",code=\"OK\",modelId=\"\",le=\"120000.0\",}"));
         // Simulated model sizing time is < 200ms
-        assertEquals(1.0, metrics.get("modelmesh_model_sizing_milliseconds_bucket{le=\"200.0\",}"));
+        assertEquals(1.0, metrics.get("modelmesh_model_sizing_milliseconds_bucket{modelId=\"myModel\",le=\"60000.0\",}"));
         // Simulated model sizing time is > 50ms
-        assertEquals(0.0, metrics.get("modelmesh_model_sizing_milliseconds_bucket{le=\"50.0\",}"));
+        assertEquals(0.0, metrics.get("modelmesh_model_sizing_milliseconds_bucket{modelId=\"myModel\",le=\"50.0\",}"));
         // Simulated model size is between 64MiB and 256MiB
-        assertEquals(0.0, metrics.get("modelmesh_loaded_model_size_bytes_bucket{le=\"6.7108864E7\",}"));
-        assertEquals(1.0, metrics.get("modelmesh_loaded_model_size_bytes_bucket{le=\"2.68435456E8\",}"));
+        assertEquals(0.0, metrics.get("modelmesh_loaded_model_size_bytes_bucket{modelId=\"myModel\",le=\"6.7108864E7\",}"));
+        assertEquals(1.0, metrics.get("modelmesh_loaded_model_size_bytes_bucket{modelId=\"myModel\",le=\"2.68435456E8\",}"));
         // One model is loaded
-        assertEquals(1.0, metrics.get("modelmesh_models_loaded_total"));
         assertEquals(1.0, metrics.get("modelmesh_instance_models_total"));
         // Histogram counts should reflect the two payload sizes (30 small, 10 large)
-        assertEquals(30.0, metrics.get("modelmesh_request_size_bytes_bucket{method=\"predict\",code=\"OK\",le=\"128.0\",}"));
-        assertEquals(40.0, metrics.get("modelmesh_request_size_bytes_bucket{method=\"predict\",code=\"OK\",le=\"2097152.0\",}"));
-        assertEquals(30.0, metrics.get("modelmesh_response_size_bytes_bucket{method=\"predict\",code=\"OK\",le=\"128.0\",}"));
-        assertEquals(40.0, metrics.get("modelmesh_response_size_bytes_bucket{method=\"predict\",code=\"OK\",le=\"2097152.0\",}"));
+        assertEquals(30.0, metrics.get("modelmesh_request_size_bytes_bucket{method=\"predict\",code=\"OK\",modelId=\"\",le=\"128.0\",}"));
+        assertEquals(40.0, metrics.get("modelmesh_request_size_bytes_bucket{method=\"predict\",code=\"OK\",modelId=\"\",le=\"2097152.0\",}"));
+        assertEquals(30.0, metrics.get("modelmesh_response_size_bytes_bucket{method=\"predict\",code=\"OK\",modelId=\"\",le=\"128.0\",}"));
+        assertEquals(40.0, metrics.get("modelmesh_response_size_bytes_bucket{method=\"predict\",code=\"OK\",modelId=\"\",le=\"2097152.0\",}"));
 
         // Memory metrics
         assertTrue(metrics.containsKey("netty_pool_mem_allocated_bytes{area=\"direct\",}"));