diff --git a/src/main/java/com/ibm/watson/modelmesh/ModelMesh.java b/src/main/java/com/ibm/watson/modelmesh/ModelMesh.java index fb54921a..763b0336 100644 --- a/src/main/java/com/ibm/watson/modelmesh/ModelMesh.java +++ b/src/main/java/com/ibm/watson/modelmesh/ModelMesh.java @@ -5488,6 +5488,12 @@ private Runnable rateTrackingTask() { final int secondCopyMaxAgeIters = (int) ((SECOND_COPY_MAX_AGE_SECS * 1000L) / RATE_CHECK_INTERVAL_MS); final int secondCopyMinAgeIters = (int) ((SECOND_COPY_MIN_AGE_SECS * 1000L) / RATE_CHECK_INTERVAL_MS); + // Minimum LRU when cache is close to full for permitting redundant model copies. + // This aims to avoid flip-flopping by ensuring that the scale-down lastUsed cutoff + // (10% of LRU) is at least 3x the SECOND_COPY_MAX_AGE_SECS scale-up threshold. + // Impose an absolute minimum of 6 hours. + final int secondCopyLruThresholdMillis = Math.max(SECOND_COPY_MAX_AGE_SECS * 3 * 10 * 1000, 6 * 3600_000); + // this is incremented each iteration (~ every RATE_CHECK_INTERVAL_MS) int iterationCounter; @@ -5594,10 +5600,6 @@ public void run() { + ": target range [" + lower + ", " + upper + "], I1=" + i1 + ", I2=" + i2 + ", curIteration=" + iterationCounter); } - System.out.println("Second copy trigger evaluation for model " + modelId - + ": target range [" + lower + ", " + upper + "], I1=" - + i1 + ", I2=" + i2 + ", curIteration=" + iterationCounter); - boolean i1inRange = false, i2inRange = false; if (i2 >= lower && i1 <= upper) { i1inRange = i1 >= lower; @@ -5612,9 +5614,9 @@ public void run() { // Model was used within the target range [MIN_AGE, MAX_AGE] iterations ago // so trigger loading of a second copy - // Don't do it if > 90% full and cache is younger than 6 hours + // Don't do it if > 90% full and cache is younger than secondCopyLruThresholdMillis if ((10 * clusterStats.totalFree) / clusterStats.totalCapacity >= 1 - || (now - clusterStats.globalLru) > (6 * 3600_000)) { + || (now - clusterStats.globalLru) > secondCopyLruThresholdMillis) { logger.info("Attempting to add second copy of model " + modelId + " in another instance since \"regular\" usage was detected"); ensureLoadedInternalAsync(modelId, lastTime, ce.getWeight(), excludeThisInstance, 0); @@ -6508,7 +6510,7 @@ void triggerProactiveLoadsForInstanceSubset(ClusterStats stats, // last-used time more recent than at least ~33% of the loaded models (since // they will be replacing older loaded models) long proactiveLastUsedCutoff = stats.globalLru == Long.MAX_VALUE ? 0L - // minimum of 10 minutes here to avoid churn in pathological cases + // minimum of 20 minutes here to avoid churn in pathological cases : stats.globalLru + Math.max(age(stats.globalLru) / 3L, 1_200_000L); if (logger.isDebugEnabled()) { logger.debug(excludeTypes == null ? "" :