google · manugarg · Nov 3, 2020 · Nov 3, 2020 · Nov 3, 2020
diff --git a/surfacers/stackdriver/stackdriver.go b/surfacers/stackdriver/stackdriver.go
@@ -365,6 +365,45 @@ func processLabels(em *metrics.EventMetrics) (labels map[string]string, labelsKe
 	return
 }
 
+func (s *SDSurfacer) ignoreMetric(name string) bool {
+	if s.allowedMetricsRegex != nil {
+		if !s.allowedMetricsRegex.MatchString(name) {
+			return true
+		}
+	}
+
+	if !validMetricLength(name, s.c.GetMonitoringUrl()) {
+		s.l.Warningf("Message name %q is greater than the 100 character limit, skipping write", name)
+		return true
+	}
+
+	return false
+}
+
+// failureCountForDefaultMetrics computes failure count from success and total
+// metrics, if available.
+func (s *SDSurfacer) failureCountForDefaultMetrics(em *metrics.EventMetrics, name string) (bool, float64) {
+	if s.ignoreMetric(name) {
+		return false, 0
+	}
+
+	tv, sv, fv := em.Metric("total"), em.Metric("success"), em.Metric("failure")
+	// If there is already a failure metric, or if "total" and "success" metrics
+	// are not available, don't compute failure metric.
+	if fv != nil || tv == nil || sv == nil {
+		return false, 0
+	}
+
+	total, totalOk := tv.(metrics.NumValue)
+	success, successOk := sv.(metrics.NumValue)
+	if !totalOk || !successOk {
+		s.l.Errorf("total (%v) and success (%v) values are not numeric, this should never happen", tv, sv)
+		return false, 0
+	}
+
+	return true, total.Float64() - success.Float64()
+}
+
 // recordEventMetrics processes the incoming EventMetrics objects and builds
 // TimeSeries from it.
 //
@@ -380,6 +419,13 @@ func (s *SDSurfacer) recordEventMetrics(em *metrics.EventMetrics) (ts []*monitor
 
 	emLabels, cacheKey, metricPrefix := processLabels(em)
 
+	// Compute failure count for default metrics.
+	fName := metricPrefix + "failure"
+	creatFailureMetric, fVal := s.failureCountForDefaultMetrics(em, fName)
+	if creatFailureMetric {
+		ts = append(ts, s.recordTimeSeries(metricKind, fName, "DOUBLE", emLabels, em.Timestamp, &monitoring.TypedValue{DoubleValue: &fVal}, "1", cacheKey))
+	}
+
 	for _, k := range em.MetricsKeys() {
 		// Create a copy of emLabels for use in timeseries object.
 		mLabels := make(map[string]string)
@@ -388,14 +434,7 @@ func (s *SDSurfacer) recordEventMetrics(em *metrics.EventMetrics) (ts []*monitor
 		}
 		name := metricPrefix + k
 
-		if s.allowedMetricsRegex != nil {
-			if !s.allowedMetricsRegex.MatchString(name) {
-				continue
-			}
-		}
-
-		if !validMetricLength(name, s.c.GetMonitoringUrl()) {
-			s.l.Warningf("Message name %q is greater than the 100 character limit, skipping write", name)
+		if s.ignoreMetric(name) {
 			continue
 		}
 

diff --git a/surfacers/stackdriver/stackdriver_test.go b/surfacers/stackdriver/stackdriver_test.go
@@ -16,6 +16,7 @@ package stackdriver
 
 import (
 	"context"
+	"fmt"
 	"testing"
 	"time"
 
@@ -217,3 +218,69 @@ func TestTimeSeries(t *testing.T) {
 		}
 	}
 }
+
+func TestFailureCountForDefaultMetrics(t *testing.T) {
+	s := &SDSurfacer{}
+	var tests = []struct {
+		total, success, failure metrics.Value
+		failureCount            float64
+		wantFailure             bool
+	}{
+		{
+			total:        metrics.NewInt(1000),
+			success:      metrics.NewInt(990),
+			failureCount: float64(10),
+			wantFailure:  true,
+		},
+		{
+			total:       metrics.NewInt(1000),
+			success:     metrics.NewInt(990),
+			failure:     metrics.NewInt(10),
+			wantFailure: false,
+		},
+		{
+			total:        metrics.NewInt(1000),
+			success:      metrics.NewInt(1000),
+			failureCount: float64(0),
+			wantFailure:  true,
+		},
+		{
+			total:       nil,
+			success:     metrics.NewInt(990),
+			wantFailure: false,
+		},
+		{
+			total:       metrics.NewInt(1000),
+			success:     nil,
+			wantFailure: false,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(fmt.Sprintf("%+v", test), func(t *testing.T) {
+			em := metrics.NewEventMetrics(time.Now()).
+				AddMetric("total", test.total).
+				AddMetric("success", test.success)
+
+			if test.failure != nil {
+				em.AddMetric("failure", test.failure)
+			}
+
+			testMetricPrefix := "custom/"
+			expectedMetricName := testMetricPrefix + "failure"
+			createFailureMetric, fVal := s.failureCountForDefaultMetrics(em, testMetricPrefix)
+
+			if !test.wantFailure && createFailureMetric {
+				t.Errorf("Unexpected failure count metric (with value: %v)", fVal)
+			}
+
+			if test.wantFailure && !createFailureMetric {
+				t.Errorf("Not creating failure count metric; expected to create metric %s with value %v", expectedMetricName, fVal)
+			}
+
+			if test.failureCount != fVal {
+				t.Errorf("Failure count=%v, want=%v", fVal, test.failureCount)
+			}
+		})
+	}
+}