Skip to content

Commit

Permalink
Cherry-pick to 7.0: Ignore prometheus metrics when their values are N…
Browse files Browse the repository at this point in the history
…aN or Inf (elastic#12084) (elastic#12286)

* Ignore prometheus metrics when their values are NaN or Inf (elastic#12084)

* Ignore prometheus metrics when their values are NaN or Inf
* Avoid NaN/Inf in prometheus helper
* Add checks on Gauge, Summary and Counter
* Add NaN/Inf check on histogram values

(cherry picked from commit 6810e31)

* Fix changelog
  • Loading branch information
kaiyan-sheng authored May 27, 2019
1 parent fe9961a commit 1953317
Show file tree
Hide file tree
Showing 7 changed files with 411 additions and 40 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ https://github.com/elastic/beats/compare/v7.0.0...7.0[Check the HEAD diff]

- Update documentation with cloudwatch:ListMetrics permission. {pull}11987[11987]
- Change some field type from scaled_float to long in aws module. {pull}11982[11982]
- Ignore prometheus metrics when their values are NaN or Inf. {pull}12084[12084] {issue}10849[10849]

*Packetbeat*

Expand Down
31 changes: 20 additions & 11 deletions metricbeat/helper/prometheus/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,28 +128,33 @@ func (m *commonMetric) GetField() string {
func (m *commonMetric) GetValue(metric *dto.Metric) interface{} {
counter := metric.GetCounter()
if counter != nil {
return int64(counter.GetValue())
if !math.IsNaN(counter.GetValue()) && !math.IsInf(counter.GetValue(), 0) {
return int64(counter.GetValue())
}
}

gauge := metric.GetGauge()
if gauge != nil {
return gauge.GetValue()
if !math.IsNaN(gauge.GetValue()) && !math.IsInf(gauge.GetValue(), 0) {
return gauge.GetValue()
}
}

summary := metric.GetSummary()
if summary != nil {
value := common.MapStr{}
value["sum"] = summary.GetSampleSum()
value["count"] = summary.GetSampleCount()
if !math.IsNaN(summary.GetSampleSum()) && !math.IsInf(summary.GetSampleSum(), 0) {
value["sum"] = summary.GetSampleSum()
value["count"] = summary.GetSampleCount()
}

quantiles := summary.GetQuantile()
percentileMap := common.MapStr{}
for _, quantile := range quantiles {
if !math.IsNaN(quantile.GetValue()) {
key := strconv.FormatFloat((100 * quantile.GetQuantile()), 'f', -1, 64)
if !math.IsNaN(quantile.GetValue()) && !math.IsInf(quantile.GetValue(), 0) {
key := strconv.FormatFloat(100*quantile.GetQuantile(), 'f', -1, 64)
percentileMap[key] = quantile.GetValue()
}

}

if len(percentileMap) != 0 {
Expand All @@ -162,14 +167,18 @@ func (m *commonMetric) GetValue(metric *dto.Metric) interface{} {
histogram := metric.GetHistogram()
if histogram != nil {
value := common.MapStr{}
value["sum"] = histogram.GetSampleSum()
value["count"] = histogram.GetSampleCount()
if !math.IsNaN(histogram.GetSampleSum()) && !math.IsInf(histogram.GetSampleSum(), 0) {
value["sum"] = histogram.GetSampleSum()
value["count"] = histogram.GetSampleCount()
}

buckets := histogram.GetBucket()
bucketMap := common.MapStr{}
for _, bucket := range buckets {
key := strconv.FormatFloat(bucket.GetUpperBound(), 'f', -1, 64)
bucketMap[key] = bucket.GetCumulativeCount()
if bucket.GetCumulativeCount() != uint64(math.NaN()) && bucket.GetCumulativeCount() != uint64(math.Inf(0)) {
key := strconv.FormatFloat(bucket.GetUpperBound(), 'f', -1, 64)
bucketMap[key] = bucket.GetCumulativeCount()
}
}

if len(bucketMap) != 0 {
Expand Down
6 changes: 5 additions & 1 deletion metricbeat/helper/prometheus/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,11 @@ func (p *prometheus) GetProcessedMetrics(mapping *MetricsMapping) ([]common.MapS
if field != "" {
// Put it in the event if it's a common metric
event := getEvent(eventsMap, keyLabels)
event.Put(field, value)
update := common.MapStr{}
update.Put(field, value)
// value may be a mapstr (for histograms and summaries), do a deep update to avoid smashing existing fields
event.DeepUpdate(update)

event.DeepUpdate(labels)
}
}
Expand Down
93 changes: 92 additions & 1 deletion metricbeat/helper/prometheus/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,22 @@ const promMetrics = `
first_metric{label1="value1",label2="value2",label3="Value3",label4="FOO"} 1
# TYPE second_metric gauge
second_metric{label1="value1",label3="othervalue"} 0
# TYPE metrics_one_count_total counter
metrics_one_count_total{name="jane",surname="foster"} NaN
metrics_one_count_total{name="john",surname="williams"} +Inf
metrics_one_count_total{name="jahn",surname="baldwin",age="30"} 3
# TYPE summary_metric summary
summary_metric{quantile="0.5"} 29735
summary_metric{quantile="0.9"} 47103
summary_metric{quantile="0.99"} 50681
summary_metric_sum 234892394
summary_metric_count 44000
# TYPE summary_metric_with_nan_inf summary
summary_metric_with_nan_inf{quantile="0.5"} NaN
summary_metric_with_nan_inf{quantile="0.9"} +Inf
summary_metric_with_nan_inf{quantile="0.99"} 50681
summary_metric_with_nan_inf_sum 50681
summary_metric_with_nan_inf_count 44000
# TYPE histogram_metric histogram
histogram_metric_bucket{le="1000"} 1
histogram_metric_bucket{le="10000"} 1
Expand All @@ -52,7 +62,16 @@ histogram_metric_bucket{le="1e+09"} 1
histogram_metric_bucket{le="+Inf"} 1
histogram_metric_sum 117
histogram_metric_count 1
# TYPE histogram_metric_with_nan_inf histogram
histogram_metric_with_nan_inf_bucket{le="1000"} NaN
histogram_metric_with_nan_inf_bucket{le="10000"} +Inf
histogram_metric_with_nan_inf_bucket{le="100000"} -Inf
histogram_metric_with_nan_inf_bucket{le="1e+06"} 1
histogram_metric_with_nan_inf_bucket{le="1e+08"} 1
histogram_metric_with_nan_inf_bucket{le="1e+09"} 1
histogram_metric_with_nan_inf_bucket{le="+Inf"} 1
histogram_metric_with_nan_inf_sum 117
histogram_metric_with_nan_inf_count 1
`

type mockFetcher struct{}
Expand Down Expand Up @@ -279,6 +298,33 @@ func TestPrometheus(t *testing.T) {
},
expected: []common.MapStr{},
},
{
msg: "Counter metric with NaN Inf",
mapping: &MetricsMapping{
Metrics: map[string]MetricMap{
"metrics_one_count_total": Metric("metrics.one.count"),
},
Labels: map[string]LabelMap{
"name": Label("metrics.one.labels.name"),
"surname": Label("metrics.one.labels.surname"),
"age": Label("metrics.one.labels.age"),
},
},
expected: []common.MapStr{
common.MapStr{
"metrics": common.MapStr{
"one": common.MapStr{
"count": int64(3.0),
"labels": common.MapStr{
"name": "jahn",
"surname": "baldwin",
"age": "30",
},
},
},
},
},
},
{
msg: "Summary metric",
mapping: &MetricsMapping{
Expand All @@ -302,6 +348,27 @@ func TestPrometheus(t *testing.T) {
},
},
},
{
msg: "Summary metric with NaN Inf",
mapping: &MetricsMapping{
Metrics: map[string]MetricMap{
"summary_metric_with_nan_inf": Metric("summary.metric"),
},
},
expected: []common.MapStr{
common.MapStr{
"summary": common.MapStr{
"metric": common.MapStr{
"sum": 50681.0,
"count": uint64(44000),
"percentile": common.MapStr{
"99": 50681.0,
},
},
},
},
},
},
{
msg: "Histogram metric",
mapping: &MetricsMapping{
Expand Down Expand Up @@ -329,6 +396,30 @@ func TestPrometheus(t *testing.T) {
},
},
},
{
msg: "Histogram metric with NaN Inf",
mapping: &MetricsMapping{
Metrics: map[string]MetricMap{
"histogram_metric_with_nan_inf": Metric("histogram.metric"),
},
},
expected: []common.MapStr{
common.MapStr{
"histogram": common.MapStr{
"metric": common.MapStr{
"count": uint64(1),
"bucket": common.MapStr{
"1000000000": uint64(1),
"+Inf": uint64(1),
"1000000": uint64(1),
"100000000": uint64(1),
},
"sum": 117.0,
},
},
},
},
},
}

for _, test := range tests {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# HELP kafka_consumer_records_lag_records The latest lag of the partition
# TYPE kafka_consumer_records_lag_records gauge
kafka_consumer_records_lag_records{client_id="consumer1",} NaN
kafka_consumer_records_lag_records{client_id="consumer2",} +Inf
kafka_consumer_records_lag_records{client_id="consumer3",} -Inf
kafka_consumer_records_lag_records{client_id="consumer4",} 5
# HELP http_failures Total number of http request failures
# TYPE http_failures counter
http_failures{method="GET"} 2
http_failures{method="POST"} NaN
http_failures{method="DELETE"} +Inf
# HELP go_gc_duration_seconds A summary of the GC invocation durations.
# TYPE go_gc_duration_seconds summary
go_gc_duration_seconds{quantile="0",} NaN
go_gc_duration_seconds{quantile="0.25",} +Inf
go_gc_duration_seconds{quantile="0.5",} -Inf
go_gc_duration_seconds{quantile="0.75"} 9.8154e-05
go_gc_duration_seconds{quantile="1",} 0.011689149
go_gc_duration_seconds_sum 3.451780079
go_gc_duration_seconds_count 13118
# HELP http_request_duration_seconds request duration histogram
# TYPE http_request_duration_seconds histogram
http_request_duration_seconds_bucket{le="0.1"} +Inf
http_request_duration_seconds_bucket{le="0.2"} -Inf
http_request_duration_seconds_bucket{le="0.5"} NaN
http_request_duration_seconds_bucket{le="1"} 1
http_request_duration_seconds_bucket{le="2"} 2
http_request_duration_seconds_bucket{le="3"} 3
http_request_duration_seconds_bucket{le="5"} 3
http_request_duration_seconds_bucket{le="+Inf"} 3
http_request_duration_seconds_sum 6
http_request_duration_seconds_count 3
Loading

0 comments on commit 1953317

Please sign in to comment.