Skip to content

Commit

Permalink
fix cache lookups and name sensing for recorded metrics
Browse files Browse the repository at this point in the history
A fix for most of the issues noted in Stackdriver#104

- update targets.targetMatch() to match labels correctly in
  the case of a recorded metric (thank you @StevenYCChou)

- update metadata.Cache.Get() to treat metrics with names prefixed with
  the value of the `--stackdriver.recorded-metric-prefix` flag (default:
  `recorded_`) as recorded metrics and log an error if a colon is found in
  the metric name (stackdriver metric names cannot have those)

- update README with specific notes for forwarding recorded metrics
  • Loading branch information
n-oden committed Jan 15, 2021
1 parent 799f9c5 commit c7dfcfa
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 17 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,18 @@ static_metadata:
# - ...
```

<<<<<<< HEAD
* All `static_metadata` entries must have `type` specified. This specifies the Stackdriver metric type and overrides the metric type chosen by the Prometheus client.
* If `value_type` is specified, it will override the default value type for counters and gauges. All Prometheus metrics have a default type of double.
=======
#### Forwarding recorded metrics

The [suggested format](https://prometheus.io/docs/practices/rules/) for Prometheus metrics generated by recording rules is `level:metric:operation`, but that name format is not compatible with Stackdriver: Stackdriver's [metric name rules](https://cloud.google.com/monitoring/api/v3/metrics-details#label_names) specify that only upper and lowercase letters, digits and underscores may be used in metric names.

The sidecar will, therefore, treat any Prometheus metric name prefixed with the value of the `--stackdriver.recorded-metric-prefix` flag (by default, `recorded_`) as a recorded metric, which will be created as a gauge on the Stackdriver side.

Note also that it is not currently possible to forward recorded metrics that lack an `instance` and `job` label, as those tags are used as cache keys.
>>>>>>> 49660fc (fix cache lookups and name sensing for recorded metrics)
#### Counter Aggregator

Expand Down
6 changes: 5 additions & 1 deletion cmd/stackdriver-prometheus-sidecar/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ type mainConfig struct {
GenericLabels genericConfig
StackdriverAddress *url.URL
MetricsPrefix string
RecordedMetricPrefix string
UseGKEResource bool
StoreInFilesDirectory string
WALDirectory string
Expand Down Expand Up @@ -230,6 +231,9 @@ func main() {
a.Flag("stackdriver.metrics-prefix", "Customized prefix for Stackdriver metrics. If not set, external.googleapis.com/prometheus will be used").
StringVar(&cfg.MetricsPrefix)

a.Flag("stackdriver.recorded-metric-prefix", "Prometheus metric name prefix used to detect recorded metrics. If not set, 'recorded_' will be used.").
StringVar(&cfg.RecordedMetricPrefix)

a.Flag("stackdriver.use-gke-resource",
"Whether to use the legacy gke_container MonitoredResource type instead of k8s_container").
Default("false").BoolVar(&cfg.UseGKEResource)
Expand Down Expand Up @@ -404,7 +408,7 @@ func main() {
if err != nil {
panic(err)
}
metadataCache := metadata.NewCache(httpClient, metadataURL, cfg.StaticMetadata)
metadataCache := metadata.NewCache(httpClient, metadataURL, cfg.RecordedMetricPrefix, cfg.StaticMetadata)

tailer, err := tail.Tail(ctx, cfg.WALDirectory)
if err != nil {
Expand Down
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ require (
github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9 // indirect
github.com/golang/lint v0.0.0-20180702182130-06c8688daad7 // indirect
github.com/golang/protobuf v1.3.2
github.com/google/btree v1.0.0 // indirect
github.com/google/go-cmp v0.3.1
github.com/google/pprof v0.0.0-20191105193234-27840fff0d09 // indirect
github.com/googleapis/gnostic v0.3.0 // indirect
Expand Down
37 changes: 25 additions & 12 deletions metadata/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ type Cache struct {
promURL *url.URL
client *http.Client

metadata map[string]*cacheEntry
seenJobs map[string]struct{}
staticMetadata map[string]*Entry
metadata map[string]*cacheEntry
seenJobs map[string]struct{}
staticMetadata map[string]*Entry
recordedMetricPrefix string
}

// DefaultEndpointPath is the default HTTP path on which Prometheus serves
Expand All @@ -60,16 +61,17 @@ type Entry struct {
// NewCache returns a new cache that gets populated by the metadata endpoint
// at the given URL.
// It uses the default endpoint path if no specific path is provided.
func NewCache(client *http.Client, promURL *url.URL, staticMetadata []*Entry) *Cache {
func NewCache(client *http.Client, promURL *url.URL, recordedMetricPrefix string, staticMetadata []*Entry) *Cache {
if client == nil {
client = http.DefaultClient
}
c := &Cache{
promURL: promURL,
client: client,
staticMetadata: map[string]*Entry{},
metadata: map[string]*cacheEntry{},
seenJobs: map[string]struct{}{},
promURL: promURL,
client: client,
recordedMetricPrefix: recordedMetricPrefix,
staticMetadata: map[string]*Entry{},
metadata: map[string]*cacheEntry{},
seenJobs: map[string]struct{}{},
}
for _, m := range staticMetadata {
c.staticMetadata[m.Metric] = m
Expand Down Expand Up @@ -128,13 +130,24 @@ func (c *Cache) Get(ctx context.Context, job, instance, metric string) (*Entry,
if md != nil && md.found {
return md.Entry, nil
}
// The metric might also be produced by a recording rule, which by convention
// contain at least one `:` character. In that case we can generally assume that
// it is a gauge. We leave the help text empty.

// The suggested format for recorded metric names is `level:metric:operation`,
// but stackdriver metric names cannot have colon characters in them, so
// return an error.
if strings.Contains(metric, ":") {
entry := &Entry{Metric: metric, MetricType: textparse.MetricTypeGauge}
return entry, nil
}

// Treat metric names prefixed with the flagged prefix as recorded metrics. In that
// case we can generally assume that it is a gauge. We leave the help text
// empty.
if strings.HasPrefix(metric, c.recordedMetricPrefix) {
return &Entry{
Metric: metric,
MetricType: textparse.MetricTypeGauge,
}, nil
}
return nil, nil
}

Expand Down
20 changes: 18 additions & 2 deletions metadata/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func TestCache_Get(t *testing.T) {
&Entry{Metric: "static_metric2", MetricType: textparse.MetricTypeCounter, ValueType: metric_pb.MetricDescriptor_DOUBLE, Help: "help_static2"},
&Entry{Metric: "metric_with_override", MetricType: textparse.MetricTypeCounter, ValueType: metric_pb.MetricDescriptor_INT64, Help: "help_metric_override"},
}
c := NewCache(nil, u, staticMetadata)
c := NewCache(nil, u, "recorded_", staticMetadata)

// First get for the job, we expect an initial batch request.
handler = func(qMetric, qMatch string) *apiResponse {
Expand Down Expand Up @@ -230,6 +230,22 @@ func TestCache_Get(t *testing.T) {
handler = func(qMetric, qMatch string) *apiResponse {
return nil
}
md, err = c.Get(ctx, "prometheus", "localhost:9090", "recorded_some_rule")
if err != nil {
t.Fatal(err)
}
want = &Entry{
Metric: "recorded_some_rule",
MetricType: textparse.MetricTypeGauge,
}
if !reflect.DeepEqual(md, want) {
t.Errorf("expected metadata %v but got %v", want, md)
}

// Test prometheus-style recording rule
handler = func(qMetric, qMatch string) *apiResponse {
return nil
}
md, err = c.Get(ctx, "prometheus", "localhost:9090", "some:recording:rule")
if err != nil {
t.Fatal(err)
Expand All @@ -248,7 +264,7 @@ func TestNewCache(t *testing.T) {
&Entry{Metric: "a", Help: "a"},
&Entry{Metric: "b", Help: "b"},
}
c := NewCache(nil, nil, static)
c := NewCache(nil, nil, "recorded_", static)

want := map[string]*Entry{
"a": &Entry{Metric: "a", Help: "a"},
Expand Down
3 changes: 2 additions & 1 deletion targets/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ func targetMatch(targets []*Target, lset labels.Labels) (*Target, bool) {
Outer:
for _, t := range targets {
for _, tl := range t.Labels {
if lset.Get(tl.Name) != tl.Value {
v := lset.Get(tl.Name)
if v != "" && v != tl.Value {
continue Outer
}
}
Expand Down

0 comments on commit c7dfcfa

Please sign in to comment.