Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix cache lookups and name sensing for recorded metrics #159

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,14 @@ static_metadata:
# - ...
```

#### Forwarding recorded metrics

The [suggested format](https://prometheus.io/docs/practices/rules/) for Prometheus metrics generated by recording rules is `level:metric:operation`, but that name format is not compatible with Stackdriver: Stackdriver's [metric name rules](https://cloud.google.com/monitoring/api/v3/metrics-details#label_names) specify that only upper and lowercase letters, digits and underscores may be used in metric names.

The sidecar will, therefore, treat any Prometheus metric name prefixed with the value of the `--stackdriver.recorded-metric-prefix` flag (by default, `recorded_`) as a recorded metric, which will be created as a gauge on the Stackdriver side.

Note also that it is not currently possible to forward recorded metrics that lack an `instance` and `job` label, as those tags are used as cache keys.

#### Counter Aggregator

Counter Aggregator is an advanced feature of the sidecar that can be used to export a sum of multiple Prometheus counters to Stackdriver as a single CUMULATIVE metric.
Expand Down
6 changes: 5 additions & 1 deletion cmd/stackdriver-prometheus-sidecar/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ type mainConfig struct {
GenericLabels genericConfig
StackdriverAddress *url.URL
MetricsPrefix string
RecordedMetricPrefix string
UseGKEResource bool
StoreInFilesDirectory string
WALDirectory string
Expand Down Expand Up @@ -240,6 +241,9 @@ func main() {
a.Flag("stackdriver.metrics-prefix", "Customized prefix for Stackdriver metrics. If not set, external.googleapis.com/prometheus will be used").
StringVar(&cfg.MetricsPrefix)

a.Flag("stackdriver.recorded-metric-prefix", "Prometheus metric name prefix used to detect recorded metrics. If not set, 'recorded_' will be used.").
StringVar(&cfg.RecordedMetricPrefix)

a.Flag("stackdriver.use-gke-resource",
"Whether to use the legacy gke_container MonitoredResource type instead of k8s_container").
Default("false").BoolVar(&cfg.UseGKEResource)
Expand Down Expand Up @@ -383,7 +387,7 @@ func main() {
if err != nil {
panic(err)
}
metadataCache := metadata.NewCache(httpClient, metadataURL, cfg.StaticMetadata)
metadataCache := metadata.NewCache(httpClient, metadataURL, cfg.RecordedMetricPrefix, cfg.StaticMetadata)

// We instantiate a context here since the tailer is used by two other components.
// The context will be used in the lifecycle of prometheusReader further down.
Expand Down
34 changes: 22 additions & 12 deletions metadata/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ type Cache struct {
promURL *url.URL
client *http.Client

metadata map[string]*metadataEntry
seenJobs map[string]struct{}
staticMetadata map[string]scrape.MetricMetadata
metadata map[string]*metadataEntry
seenJobs map[string]struct{}
staticMetadata map[string]scrape.MetricMetadata
recordedMetricPrefix string
}

// DefaultEndpointPath is the default HTTP path on which Prometheus serves
Expand All @@ -53,16 +54,17 @@ const MetricTypeUntyped = "untyped"
// NewCache returns a new cache that gets populated by the metadata endpoint
// at the given URL.
// It uses the default endpoint path if no specific path is provided.
func NewCache(client *http.Client, promURL *url.URL, staticMetadata []scrape.MetricMetadata) *Cache {
func NewCache(client *http.Client, promURL *url.URL, recordedMetricPrefix string, staticMetadata []scrape.MetricMetadata) *Cache {
if client == nil {
client = http.DefaultClient
}
c := &Cache{
promURL: promURL,
client: client,
staticMetadata: map[string]scrape.MetricMetadata{},
metadata: map[string]*metadataEntry{},
seenJobs: map[string]struct{}{},
promURL: promURL,
client: client,
recordedMetricPrefix: recordedMetricPrefix,
staticMetadata: map[string]scrape.MetricMetadata{},
metadata: map[string]*metadataEntry{},
seenJobs: map[string]struct{}{},
}
for _, m := range staticMetadata {
c.staticMetadata[m.Metric] = m
Expand Down Expand Up @@ -121,10 +123,18 @@ func (c *Cache) Get(ctx context.Context, job, instance, metric string) (*scrape.
if md != nil && md.found {
return &md.MetricMetadata, nil
}
// The metric might also be produced by a recording rule, which by convention
// contain at least one `:` character. In that case we can generally assume that
// it is a gauge. We leave the help text empty.

// The suggested format for recorded metric names is `level:metric:operation`,
// but stackdriver metric names cannot have colon characters in them, so
// return an error.
if strings.Contains(metric, ":") {
return nil, errors.New(fmt.Sprintf("metric name '%s' cannot be forwarded due to illegal characters", metric))
}

// Treat metric names prefixed with the flagged prefix as recorded metrics. In that
// case we can generally assume that it is a gauge. We leave the help text
// empty.
if strings.HasPrefix(metric, c.recordedMetricPrefix) {
return &scrape.MetricMetadata{
Metric: metric,
Type: textparse.MetricTypeGauge,
Expand Down
17 changes: 13 additions & 4 deletions metadata/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func TestCache_Get(t *testing.T) {
t.Fatal(err)
}
// Create cache with static metadata.
c := NewCache(nil, u, []scrape.MetricMetadata{
c := NewCache(nil, u, "recorded_", []scrape.MetricMetadata{
{Metric: "static_metric1", Type: textparse.MetricTypeCounter, Help: "help_static1"},
{Metric: "static_metric2", Type: textparse.MetricTypeCounter, Help: "help_static2"},
{Metric: "metric_with_override", Type: textparse.MetricTypeCounter, Help: "help_metric_override"},
Expand Down Expand Up @@ -237,25 +237,34 @@ func TestCache_Get(t *testing.T) {
handler = func(qMetric, qMatch string) *apiResponse {
return nil
}
md, err = c.Get(ctx, "prometheus", "localhost:9090", "some:recording:rule")
md, err = c.Get(ctx, "prometheus", "localhost:9090", "recorded_some_rule")
if err != nil {
t.Fatal(err)
}
want = &scrape.MetricMetadata{
Metric: "some:recording:rule",
Metric: "recorded_some_rule",
Type: textparse.MetricTypeGauge,
}
if !reflect.DeepEqual(md, want) {
t.Fatalf("expected metadata %v but got %v", want, md)
}

// Test prometheus-style recording rule
handler = func(qMetric, qMatch string) *apiResponse {
return nil
}
md, err = c.Get(ctx, "prometheus", "localhost:9090", "some:recording:rule")
if err == nil {
t.Fatal(err)
}
}

func TestNewCache(t *testing.T) {
static := []scrape.MetricMetadata{
{Metric: "a", Help: "a"},
{Metric: "b", Help: "b"},
}
c := NewCache(nil, nil, static)
c := NewCache(nil, nil, "recorded_", static)

want := map[string]scrape.MetricMetadata{
"a": {Metric: "a", Help: "a"},
Expand Down
3 changes: 2 additions & 1 deletion targets/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ func targetMatch(targets []*Target, lset labels.Labels) (*Target, bool) {
Outer:
for _, t := range targets {
for _, tl := range t.Labels {
if lset.Get(tl.Name) != tl.Value {
v := lset.Get(tl.Name)
if v != "" && v != tl.Value {
continue Outer
}
}
Expand Down