system_health hazardMetric

The current SH metrics, Responsiveness and Efficiency, only measure aspects of the user experience that actually happened. Was this interaction actually slow? However, we know of certain types of behaviors that sites can engage in that are risky, and might have impacted responsiveness if the user had behaved differently. For example, if sites spend longer than 50ms on a single given javascript task, then that would impact responsiveness if the user attempts to interact with the page during it. It's like speeding: it's hazardous even if it doesn't cause a crash. We have identified other types of hazards, but we're starting with long idle tasks. This CL produces a single 'hazard' metric for a given trace, with diagnostic values for each IdleExpectation. This CL scores the risk of each long idle task as if it had impacted an ideal Scroll Response that had been minding its own business. Longer idle tasks are given worse (higher) hazard scores than relatively shorter idle tasks. Extending the analogy, the faster you speed, the higher your ticket fine. The per-UE hazard scores are perceptually blended like responsiveness scores, but require a different weighting function since they are smallerIsBetter. test_data/trace_amp-obama-2.json is a good manual test case. This metric may eventually be renamed to something biggerIsBetter like "readiness" or "availability" per GoogleChrome/lighthouse#27 BUG=catapult:#1242 Review URL: https://codereview.chromium.org/1823323002
catapult-project · Apr 4, 2016 · 8e524c6 · 8e524c6
1 parent 6704711
commit 8e524c6
Show file tree

Hide file tree

Showing 7 changed files with 298 additions and 4 deletions.
diff --git a/tracing/trace_viewer.gypi b/tracing/trace_viewer.gypi
@@ -161,6 +161,7 @@
       'tracing/metrics/system_health/animation_smoothness_metric.html',
       'tracing/metrics/system_health/animation_throughput_metric.html',
       'tracing/metrics/system_health/efficiency_metric.html',
+      'tracing/metrics/system_health/hazard_metric.html',
       'tracing/metrics/system_health/responsiveness_metric.html',
       'tracing/metrics/system_health/system_health_metrics.html',
       'tracing/metrics/system_health/utils.html',

diff --git a/tracing/tracing/core/test_utils.html b/tracing/tracing/core/test_utils.html
@@ -251,6 +251,8 @@
         sd.duration,
         cpuSD.start, cpuSD.duration);
 
+    if (options.isTopLevel)
+      slice.isTopLevel = true;
 
     return slice;
   };

diff --git a/tracing/tracing/metrics/all_metrics.html b/tracing/tracing/metrics/all_metrics.html
@@ -7,6 +7,7 @@
 
 <link rel="import" href="/tracing/metrics/sample_metric.html">
 <link rel="import" href="/tracing/metrics/system_health/efficiency_metric.html">
+<link rel="import" href="/tracing/metrics/system_health/hazard_metric.html">
 <link rel="import"
     href="/tracing/metrics/system_health/responsiveness_metric.html">
 <link rel="import"

diff --git a/tracing/tracing/metrics/system_health/hazard_metric.html b/tracing/tracing/metrics/system_health/hazard_metric.html
@@ -0,0 +1,169 @@
+<!DOCTYPE html>
+<!--
+Copyright (c) 2015 The Chromium Authors. All rights reserved.
+Use of this source code is governed by a BSD-style license that can be
+found in the LICENSE file.
+-->
+
+<link rel="import" href="/tracing/metrics/metric_registry.html">
+<link rel="import"
+      href="/tracing/metrics/system_health/responsiveness_metric.html">
+<link rel="import" href="/tracing/model/user_model/idle_expectation.html">
+<link rel="import" href="/tracing/value/numeric.html">
+<link rel="import" href="/tracing/value/value.html">
+
+<script>
+'use strict';
+
+tr.exportTo('tr.metrics.sh', function() {
+  var LONG_TASK_MS = 50;
+
+  var normalizedPercentage_smallerIsBetter =
+    tr.v.Unit.byName.normalizedPercentage_smallerIsBetter;
+  var timeDurationInMs_smallerIsBetter =
+    tr.v.Unit.byName.timeDurationInMs_smallerIsBetter;
+
+  function findLongTasks(ue) {
+    var longTasks = [];
+    // NB: This misses tasks that were associated with another UE,
+    // since only unassociated events are vacuumed up into IdleExpectations.
+    ue.associatedEvents.forEach(function(event) {
+      if ((event instanceof tr.model.ThreadSlice) &&
+          (event.duration > LONG_TASK_MS) &&
+          event.isTopLevel)
+        longTasks.push(event);
+    });
+    return longTasks;
+  }
+
+  function computeResponsivenessRisk(durationMs) {
+    // Returns 0 when the risk of impacting responsiveness is minimal.
+    // Returns 1 when it is maximal.
+    // durationMs is the duration of a long idle task.
+    // It is at least DEFAULT_LONG_TASK_MS.
+    // The FAST_RESPONSE_HISTOGRAM was designed to permit both a 50ms idle task
+    // when a Scroll Response begins, plus 16ms latency between the idle task
+    // and the first frame of the scroll, without impacting the responsiveness
+    // score.
+    // Add 16ms to durationMs to simulate the standard (maximum ideal) scroll
+    // response latency, and use the FAST_RESPONSE_HISTOGRAM to punish every ms
+    // that the long idle task exceeds DEFAULT_LONG_TASK_MS.
+
+    durationMs += 16;
+
+    // computeDurationResponsiveness returns a normalized percentage that
+    // represents the fraction of users that would be satisfied with a
+    // Scroll Response that takes durationMs to respond.
+    // The risk of impacting responsiveness is approximated as the long task's
+    // impact on a hypothetical Scroll Response that starts when the long task
+    // starts, and then takes the standard 16ms to respond after the long task
+    // finishes.
+    // We imagine a Scroll Response instead of a Load or another type of
+    // Response because the Scroll Response carries the strictest expectation.
+    // The risk of impacting responsiveness is framed as the fraction of users
+    // that would be *un*satisifed with the responsiveness of that hypothetical
+    // Scroll Response. The fraction of users who are unsatisfied with something
+    // is equal to 1 - the fraction of users who are satisfied with it.
+    return 1 - tr.metrics.sh.computeDurationResponsiveness(
+        tr.metrics.sh.FAST_RESPONSE_HISTOGRAM, durationMs);
+  }
+
+  // This weighting function is similar to tr.metrics.sh.perceptualBlend,
+  // but this version is appropriate for SmallerIsBetter metrics, whereas
+  // that version is for BiggerIsBetter metrics.
+  // (This would not be necessary if hazard were reframed as a BiggerIsBetter
+  // metric such as "stability".)
+  // Also, that version assumes that the 'ary' will be UserExpectations, whereas
+  // this version assumes that the 'ary' will be scores.
+  function perceptualBlendSmallerIsBetter(hazardScore) {
+    return Math.exp(hazardScore);
+  }
+
+  // This metric requires only the 'toplevel' tracing category,
+  // in addition to whatever categories are required to compute the
+  // IdleExpectations (or rather the R/A/Ls that delineate the Idles).
+  // This metric computes a hazard score for each Idle independently (instead of
+  // grouping all long idle tasks together) because each Idle is different:
+  // the Idle before a Load is usually very empty, whereas the Idle immediately
+  // after a Load is usually still very active, since Loads end at a very early
+  // end point (the first contentful paint) while many parts of the page are
+  // still loading. (There may not necessarily be an Idle after a Load in
+  // real-world traces, but there almost always is in telemetry.)
+  function computeLongIdleTaskHazard(hazardScores, valueList, ue) {
+    var longTaskScores = [];
+    var durationValues = new tr.metrics.ValueList();
+
+    findLongTasks(ue).forEach(function(longTask) {
+      longTaskScores.push(computeResponsivenessRisk(longTask.duration));
+      durationValues.addValue(new tr.v.NumericValue(
+          ue.parentModel.canonicalUrlThatCreatedThisTrace,
+          'long idle task duration',
+          new tr.v.ScalarNumeric(
+              timeDurationInMs_smallerIsBetter, longTask.duration),
+          {description: 'Duration of a long idle task'}));
+    });
+
+    var options = {description: 'Risk of impacting responsiveness'};
+    var groupingKeys = {};
+    groupingKeys.userExpectationStableId = ue.stableId;
+    groupingKeys.userExpectationStageTitle = ue.stageTitle;
+    groupingKeys.userExpectationInitiatorTitle = ue.initiatorTitle;
+    var diagnostics = {values: durationValues.valueDicts};
+
+    var hazardScore = tr.b.Statistics.weightedMean(
+        longTaskScores, perceptualBlendSmallerIsBetter);
+
+    if (hazardScore === undefined)
+      hazardScore = 0;
+
+    hazardScores.push(hazardScore);
+
+    valueList.addValue(new tr.v.NumericValue(
+        ue.parentModel.canonicalUrlThatCreatedThisTrace,
+        'long idle tasks hazard',
+        new tr.v.ScalarNumeric(
+            normalizedPercentage_smallerIsBetter, hazardScore),
+        options, groupingKeys, diagnostics));
+  }
+
+  function hazardMetric(valueList, model) {
+    var hazardScores = [];
+    var hazardValues = new tr.metrics.ValueList();
+
+    model.userModel.expectations.forEach(function(ue) {
+      // Add normalized metrics to diagnostics.values.
+      // TODO(memory): Add memory here.
+
+      if (ue instanceof tr.model.um.IdleExpectation)
+        computeLongIdleTaskHazard(hazardScores, hazardValues, ue);
+    });
+
+    var options = {description: 'Risk of impacting responsiveness'};
+    var groupingKeys = {};
+    var diagnostics = {values: hazardValues.valueDicts};
+
+    var overallHazard = tr.b.Statistics.weightedMean(
+        hazardScores, perceptualBlendSmallerIsBetter);
+
+    if (overallHazard === undefined)
+      overallHazard = 0;
+
+    valueList.addValue(new tr.v.NumericValue(
+        model.canonicalUrlThatCreatedThisTrace, 'hazard',
+        new tr.v.ScalarNumeric(
+            normalizedPercentage_smallerIsBetter, overallHazard),
+        options, groupingKeys, diagnostics));
+  }
+
+  hazardMetric.prototype = {
+    __proto__: Function.prototype
+  };
+
+  tr.metrics.MetricRegistry.register(hazardMetric);
+
+  return {
+    hazardMetric: hazardMetric,
+    computeLongIdleTaskHazard: computeLongIdleTaskHazard
+  };
+});
+</script>
diff --git a/tracing/tracing/metrics/system_health/hazard_metric_test.html b/tracing/tracing/metrics/system_health/hazard_metric_test.html
@@ -0,0 +1,117 @@
+<!DOCTYPE html>
+<!--
+Copyright (c) 2015 The Chromium Authors. All rights reserved.
+Use of this source code is governed by a BSD-style license that can be
+found in the LICENSE file.
+-->
+
+<link rel="import" href="/tracing/core/test_utils.html">
+<link rel="import"
+    href="/tracing/metrics/system_health/hazard_metric.html">
+<link rel="import" href="/tracing/metrics/value_list.html">
+<link rel="import" href="/tracing/model/user_model/idle_expectation.html">
+
+<script>
+'use strict';
+
+tr.b.unittest.testSuite(function() {
+  function computeHazardValue(customizeModelCallback) {
+    var model = tr.c.TestUtils.newModel(function(model) {
+      customizeModelCallback(model);
+    });
+    var valueList = new tr.metrics.ValueList();
+    tr.metrics.sh.hazardMetric(valueList, model);
+    var valueDicts = valueList.valueDicts;
+    assert.equal(1, valueDicts.length);
+    return valueDicts[0];
+  }
+
+  test('minimalHazard', function() {
+    var duration = 50 + 1e-4;
+    var value = computeHazardValue(function(model) {
+      var idle = new tr.model.um.IdleExpectation(
+          model, 'test', 0, 10000);
+      model.userModel.expectations.push(idle);
+      idle.associatedEvents.push(tr.c.TestUtils.newSliceEx({
+        type: tr.model.ThreadSlice,
+        isTopLevel: true,
+        start: 0,
+        duration: duration
+      }));
+    });
+    assert.notEqual(0, value.numeric.value);
+    assert.closeTo(value.numeric.value, 0, 1e-5);
+    assert.equal(1, value.diagnostics.values.length);
+    assert.equal(1, value.diagnostics.values[0].diagnostics.values.length);
+    assert.equal(duration,
+        value.diagnostics.values[0].diagnostics.values[0].numeric.value);
+  });
+
+  test('maximalHazard', function() {
+    var value = computeHazardValue(function(model) {
+      var idle = new tr.model.um.IdleExpectation(
+          model, 'test', 0, 10000);
+      model.userModel.expectations.push(idle);
+      idle.associatedEvents.push(tr.c.TestUtils.newSliceEx({
+        type: tr.model.ThreadSlice,
+        isTopLevel: true,
+        start: 0,
+        duration: 2200
+      }));
+    });
+    assert.equal(value.numeric.value, 1);
+    assert.equal(1, value.diagnostics.values.length);
+    assert.equal(1, value.diagnostics.values[0].diagnostics.values.length);
+    assert.equal(2200,
+        value.diagnostics.values[0].diagnostics.values[0].numeric.value);
+  });
+
+  test('blendedHazards', function() {
+    var value = computeHazardValue(function(model) {
+      var idle = new tr.model.um.IdleExpectation(
+          model, 'test', 0, 10000);
+      model.userModel.expectations.push(idle);
+      idle.associatedEvents.push(tr.c.TestUtils.newSliceEx({
+        type: tr.model.ThreadSlice,
+        isTopLevel: true,
+        start: 0,
+        duration: 100
+      }));
+      idle.associatedEvents.push(tr.c.TestUtils.newSliceEx({
+        type: tr.model.ThreadSlice,
+        isTopLevel: true,
+        start: 0,
+        duration: 200
+      }));
+      idle = new tr.model.um.IdleExpectation(
+          model, 'test', 0, 10000);
+      model.userModel.expectations.push(idle);
+      idle.associatedEvents.push(tr.c.TestUtils.newSliceEx({
+        type: tr.model.ThreadSlice,
+        isTopLevel: true,
+        start: 0,
+        duration: 300
+      }));
+      idle.associatedEvents.push(tr.c.TestUtils.newSliceEx({
+        type: tr.model.ThreadSlice,
+        isTopLevel: true,
+        start: 0,
+        duration: 400
+      }));
+    });
+    assert.closeTo(0.5454, value.numeric.value, 1e-3);
+    assert.equal(2, value.diagnostics.values.length);
+    assert.closeTo(0.279, value.diagnostics.values[0].numeric.value, 1e-3);
+    assert.closeTo(0.717, value.diagnostics.values[1].numeric.value, 1e-3);
+    assert.equal(2, value.diagnostics.values[0].diagnostics.values.length);
+    assert.equal(100,
+        value.diagnostics.values[0].diagnostics.values[0].numeric.value);
+    assert.equal(200,
+        value.diagnostics.values[0].diagnostics.values[1].numeric.value);
+    assert.equal(300,
+        value.diagnostics.values[1].diagnostics.values[0].numeric.value);
+    assert.equal(400,
+        value.diagnostics.values[1].diagnostics.values[1].numeric.value);
+  });
+});
+</script>
diff --git a/tracing/tracing/metrics/system_health/responsiveness_metric.html b/tracing/tracing/metrics/system_health/responsiveness_metric.html
@@ -106,7 +106,7 @@
       'For Animation, perceptual blend of Mean Opinion Scores of ' +
       'throughput and smoothness');
 
-  function getDurationScore(histogram, duration) {
+  function computeDurationResponsiveness(histogram, duration) {
     return histogram.getInterpolatedCountAt(duration) / histogram.maxCount;
   }
 
@@ -138,13 +138,13 @@
         // Responsiveness is not defined for Idle.
         return;
       } else if (ue instanceof tr.model.um.LoadExpectation) {
-        score = getDurationScore(LOAD_HISTOGRAM, ue.duration);
+        score = computeDurationResponsiveness(LOAD_HISTOGRAM, ue.duration);
       } else if (ue instanceof tr.model.um.ResponseExpectation) {
         var histogram = RESPONSE_HISTOGRAM;
         if (ue.isAnimationBegin)
           histogram = FAST_RESPONSE_HISTOGRAM;
 
-        score = getDurationScore(histogram, ue.duration);
+        score = computeDurationResponsiveness(histogram, ue.duration);
       } else if (ue instanceof tr.model.um.AnimationExpectation) {
         var throughput = throughputForAnimation[ue.stableId];
         var smoothness = smoothnessForAnimation[ue.stableId];
@@ -204,7 +204,9 @@
   tr.metrics.MetricRegistry.register(ResponsivenessMetric);
 
   return {
-    ResponsivenessMetric: ResponsivenessMetric
+    ResponsivenessMetric: ResponsivenessMetric,
+    computeDurationResponsiveness: computeDurationResponsiveness,
+    FAST_RESPONSE_HISTOGRAM: FAST_RESPONSE_HISTOGRAM
   };
 });
 </script>
diff --git a/tracing/tracing/metrics/system_health/system_health_metrics.html b/tracing/tracing/metrics/system_health/system_health_metrics.html
@@ -6,6 +6,7 @@
 -->
 
 <link rel="import" href="/tracing/metrics/system_health/efficiency_metric.html">
+<link rel="import" href="/tracing/metrics/system_health/hazard_metric.html">
 <link rel="import"
       href="/tracing/metrics/system_health/responsiveness_metric.html">
 
@@ -16,6 +17,7 @@
   function SystemHealthMetrics(valueList, model) {
     tr.metrics.sh.ResponsivenessMetric(valueList, model);
     tr.metrics.sh.EfficiencyMetric(valueList, model);
+    tr.metrics.sh.hazardMetric(valueList, model);
   }
 
   SystemHealthMetrics.prototype = {