Skip to content

Commit

Permalink
runtime/metrics: Delete metrics on object delete
Browse files Browse the repository at this point in the history
Delete the object metrics when the object is deleted. This ensures
that stale metrics about a deleted object is no longer exported.

As a result, the `ConditionDelete` is no longer needed. Another reason
to not have `ConditionDelete` is that a condition can only be one of
True, False or Unknown.

This introduces new delete methods in the low level metrics Recorder. In
the high level controller metrics, a list of owned finalizers is
introduced which is used to determine if an object is being deleted.
The existing Record*() methods are updated to check if the given object
is deleted, and call record or delete based on that. This helps make
this change transparent. The user of this API has to pass in the
finalizer they write on object they maintain and record the metrics at
the very end of the reconciliation so that the final object state can be
used to determine if the metrics can be deleted safely.

Signed-off-by: Sunny <darkowlzz@protonmail.com>
  • Loading branch information
darkowlzz committed Aug 10, 2023
1 parent ff3e247 commit e5ac39e
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 27 deletions.
54 changes: 51 additions & 3 deletions runtime/controller/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/go-logr/logr"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/tools/reference"
"k8s.io/utils/strings/slices"
ctrl "sigs.k8s.io/controller-runtime"
crtlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"

Expand All @@ -50,23 +51,36 @@ import (
type Metrics struct {
Scheme *runtime.Scheme
MetricsRecorder *metrics.Recorder
ownedFinalizers []string
}

// MustMakeMetrics creates a new Metrics with a new metrics.Recorder, and the Metrics.Scheme set to that of the given
// mgr.
// mgr, along with an optional set of owned finalizers which is used to determine when an object is being deleted.
// It attempts to register the metrics collectors in the controller-runtime metrics registry, which panics upon the
// first registration that causes an error. Which usually happens if you try to initialise a Metrics value twice for
// your controller.
func MustMakeMetrics(mgr ctrl.Manager) Metrics {
func MustMakeMetrics(mgr ctrl.Manager, finalizers ...string) Metrics {
metricsRecorder := metrics.NewRecorder()
crtlmetrics.Registry.MustRegister(metricsRecorder.Collectors()...)

return Metrics{
Scheme: mgr.GetScheme(),
MetricsRecorder: metricsRecorder,
ownedFinalizers: finalizers,
}
}

// IsDelete returns if the object is deleted by checking for deletion timestamp
// and owned finalizers in the object.
func (m Metrics) IsDelete(obj conditions.Getter) bool {
for _, f := range m.ownedFinalizers {
if slices.Contains(obj.GetFinalizers(), f) {
return false
}
}
return !obj.GetDeletionTimestamp().IsZero()
}

// RecordDuration records the duration of a reconcile attempt for the given obj based on the given startTime.
func (m Metrics) RecordDuration(ctx context.Context, obj conditions.Getter, startTime time.Time) {
if m.MetricsRecorder != nil {
Expand All @@ -75,6 +89,10 @@ func (m Metrics) RecordDuration(ctx context.Context, obj conditions.Getter, star
logr.FromContextOrDiscard(ctx).Error(err, "unable to get object reference to record duration")
return
}
if m.IsDelete(obj) {
m.MetricsRecorder.DeleteDuration(*ref)
return
}
m.MetricsRecorder.RecordDuration(*ref, startTime)
}
}
Expand All @@ -87,22 +105,38 @@ func (m Metrics) RecordSuspend(ctx context.Context, obj conditions.Getter, suspe
logr.FromContextOrDiscard(ctx).Error(err, "unable to get object reference to record suspend")
return
}
if m.IsDelete(obj) {
m.MetricsRecorder.DeleteSuspend(*ref)
return
}
m.MetricsRecorder.RecordSuspend(*ref, suspend)
}
}

// RecordReadiness records the meta.ReadyCondition status for the given obj.
func (m Metrics) RecordReadiness(ctx context.Context, obj conditions.Getter) {
if m.IsDelete(obj) {
m.DeleteCondition(ctx, obj, meta.ReadyCondition)
return
}
m.RecordCondition(ctx, obj, meta.ReadyCondition)
}

// RecordReconciling records the meta.ReconcilingCondition status for the given obj.
func (m Metrics) RecordReconciling(ctx context.Context, obj conditions.Getter) {
if m.IsDelete(obj) {
m.DeleteCondition(ctx, obj, meta.ReconcilingCondition)
return
}
m.RecordCondition(ctx, obj, meta.ReconcilingCondition)
}

// RecordStalled records the meta.StalledCondition status for the given obj.
func (m Metrics) RecordStalled(ctx context.Context, obj conditions.Getter) {
if m.IsDelete(obj) {
m.DeleteCondition(ctx, obj, meta.StalledCondition)
return
}
m.RecordCondition(ctx, obj, meta.StalledCondition)
}

Expand All @@ -120,5 +154,19 @@ func (m Metrics) RecordCondition(ctx context.Context, obj conditions.Getter, con
if rc == nil {
rc = conditions.UnknownCondition(conditionType, "", "")
}
m.MetricsRecorder.RecordCondition(*ref, *rc, !obj.GetDeletionTimestamp().IsZero())
m.MetricsRecorder.RecordCondition(*ref, *rc)
}

// DeleteCondition deletes the condition metrics of the given conditionType for
// the given object.
func (m Metrics) DeleteCondition(ctx context.Context, obj conditions.Getter, conditionType string) {
if m.MetricsRecorder == nil {
return
}
ref, err := reference.GetReference(m.Scheme, obj)
if err != nil {
logr.FromContextOrDiscard(ctx).Error(err, "unable to get object reference to delete condition metric")
return
}
m.MetricsRecorder.DeleteCondition(*ref, conditionType)
}
61 changes: 61 additions & 0 deletions runtime/controller/metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
Copyright 2023 The Flux authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package controller

import (
"testing"
"time"

. "github.com/onsi/gomega"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/fluxcd/pkg/runtime/conditions/testdata"
)

func TestMetrics_IsDelete(t *testing.T) {
testFinalizers := []string{"finalizers.fluxcd.io", "finalizers.foo.bar"}
timenow := metav1.NewTime(time.Now())

tests := []struct {
name string
finalizers []string
deleteTimestamp *metav1.Time
ownedFinalizers []string
want bool
}{
{"equal finalizers, no delete timestamp", testFinalizers, nil, testFinalizers, false},
{"partial finalizers, no delete timestamp", []string{"finalizers.fluxcd.io"}, nil, testFinalizers, false},
{"unknown finalizers, no delete timestamp", []string{"foo"}, nil, testFinalizers, false},
{"unknown finalizers, delete timestamp", []string{"foo"}, &timenow, testFinalizers, true},
{"no finalizers, no delete timestamp", []string{}, nil, testFinalizers, false},
{"no owned finalizers, no delete timestamp", []string{"foo"}, nil, nil, false},
{"no finalizers, delete timestamp", []string{}, &timenow, testFinalizers, true},
{"no finalizers, no delete timestamp", nil, nil, nil, false},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)

metrics := Metrics{ownedFinalizers: tt.ownedFinalizers}
obj := &testdata.Fake{}
obj.SetFinalizers(tt.finalizers)
obj.SetDeletionTimestamp(tt.deleteTimestamp)
g.Expect(metrics.IsDelete(obj)).To(Equal(tt.want))
})
}
}
2 changes: 1 addition & 1 deletion runtime/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ require (
k8s.io/client-go v0.27.4
k8s.io/component-base v0.27.4
k8s.io/klog/v2 v2.100.1
k8s.io/utils v0.0.0-20230209194617-a36077c30491
sigs.k8s.io/cli-utils v0.35.0
sigs.k8s.io/controller-runtime v0.15.1
sigs.k8s.io/yaml v1.3.0
Expand Down Expand Up @@ -104,7 +105,6 @@ require (
k8s.io/cli-runtime v0.26.0 // indirect
k8s.io/kube-openapi v0.0.0-20230501164219-8b0f38b5fd1f // indirect
k8s.io/kubectl v0.26.0 // indirect
k8s.io/utils v0.0.0-20230209194617-a36077c30491 // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/kustomize/api v0.12.1 // indirect
sigs.k8s.io/kustomize/kyaml v0.13.9 // indirect
Expand Down
37 changes: 22 additions & 15 deletions runtime/metrics/recorder.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

const (
ConditionDeleted = "Deleted"
)

// Recorder is a struct for recording GitOps Toolkit metrics for a controller.
//
// Use NewRecorder to initialise it with properly configured metric names.
Expand Down Expand Up @@ -77,19 +73,20 @@ func (r *Recorder) Collectors() []prometheus.Collector {
}

// RecordCondition records the condition as given for the ref.
func (r *Recorder) RecordCondition(ref corev1.ObjectReference, condition metav1.Condition, deleted bool) {
for _, status := range []string{string(metav1.ConditionTrue), string(metav1.ConditionFalse), string(metav1.ConditionUnknown), ConditionDeleted} {
func (r *Recorder) RecordCondition(ref corev1.ObjectReference, condition metav1.Condition) {
for _, status := range []metav1.ConditionStatus{metav1.ConditionTrue, metav1.ConditionFalse, metav1.ConditionUnknown} {
var value float64
if deleted {
if status == ConditionDeleted {
value = 1
}
} else {
if status == string(condition.Status) {
value = 1
}
if status == condition.Status {
value = 1
}
r.conditionGauge.WithLabelValues(ref.Kind, ref.Name, ref.Namespace, condition.Type, status).Set(value)
r.conditionGauge.WithLabelValues(ref.Kind, ref.Name, ref.Namespace, condition.Type, string(status)).Set(value)
}
}

// DeleteCondition deletes the condition metrics for the ref.
func (r *Recorder) DeleteCondition(ref corev1.ObjectReference, conditionType string) {
for _, status := range []metav1.ConditionStatus{metav1.ConditionTrue, metav1.ConditionFalse, metav1.ConditionUnknown} {
r.conditionGauge.DeleteLabelValues(ref.Kind, ref.Name, ref.Namespace, conditionType, string(status))
}
}

Expand All @@ -102,7 +99,17 @@ func (r *Recorder) RecordSuspend(ref corev1.ObjectReference, suspend bool) {
r.suspendGauge.WithLabelValues(ref.Kind, ref.Name, ref.Namespace).Set(value)
}

// DeleteSuspend deletes the suspend metric for the ref.
func (r *Recorder) DeleteSuspend(ref corev1.ObjectReference) {
r.suspendGauge.DeleteLabelValues(ref.Kind, ref.Name, ref.Namespace)
}

// RecordDuration records the duration since start for the given ref.
func (r *Recorder) RecordDuration(ref corev1.ObjectReference, start time.Time) {
r.durationHistogram.WithLabelValues(ref.Kind, ref.Name, ref.Namespace).Observe(time.Since(start).Seconds())
}

// DeleteDuration deletes the duration metric for the ref.
func (r *Recorder) DeleteDuration(ref corev1.ObjectReference) {
r.durationHistogram.DeleteLabelValues(ref.Kind, ref.Name, ref.Namespace)
}
68 changes: 60 additions & 8 deletions runtime/metrics/recorder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,13 @@ func TestRecorder_RecordCondition(t *testing.T) {
Status: metav1.ConditionTrue,
}

rec.RecordCondition(ref, cond, false)
rec.RecordCondition(ref, cond)

metricFamilies, err := reg.Gather()
if err != nil {
require.NoError(t, err)
}
require.NoError(t, err)

require.Equal(t, len(metricFamilies), 1)
require.Equal(t, len(metricFamilies[0].Metric), 4)
require.Equal(t, len(metricFamilies[0].Metric), 3)

var conditionTrueValue float64
for _, m := range metricFamilies[0].Metric {
Expand All @@ -69,6 +67,13 @@ func TestRecorder_RecordCondition(t *testing.T) {
}

require.Equal(t, conditionTrueValue, float64(1))

// Delete metrics.
rec.DeleteCondition(ref, cond.Type)

metricFamilies, err = reg.Gather()
require.NoError(t, err)
require.Equal(t, len(metricFamilies), 0)
}

func TestRecorder_RecordDuration(t *testing.T) {
Expand All @@ -86,9 +91,7 @@ func TestRecorder_RecordDuration(t *testing.T) {
rec.RecordDuration(ref, reconcileStart)

metricFamilies, err := reg.Gather()
if err != nil {
require.NoError(t, err)
}
require.NoError(t, err)

require.Equal(t, len(metricFamilies), 1)
require.Equal(t, len(metricFamilies[0].Metric), 1)
Expand All @@ -110,4 +113,53 @@ func TestRecorder_RecordDuration(t *testing.T) {
t.Errorf("expected namespace label to be %s, got %s", ref.Namespace, *pair.Value)
}
}

// Delete metrics.
rec.DeleteDuration(ref)

metricFamilies, err = reg.Gather()
require.NoError(t, err)
require.Equal(t, len(metricFamilies), 0)
}

func TestRecorder_RecordSuspend(t *testing.T) {
rec := NewRecorder()
reg := prometheus.NewRegistry()
reg.MustRegister(rec.suspendGauge)

ref := corev1.ObjectReference{
Kind: "GitRepository",
Namespace: "default",
Name: "test",
}

rec.RecordSuspend(ref, true)

metricFamilies, err := reg.Gather()
require.NoError(t, err)

require.Equal(t, len(metricFamilies), 1)
require.Equal(t, len(metricFamilies[0].Metric), 1)

value := *metricFamilies[0].Metric[0].GetGauge().Value
require.EqualValues(t, value, 1, "expected gauge value")

for _, pair := range metricFamilies[0].Metric[0].GetLabel() {
if *pair.Name == "kind" {
require.Equal(t, *pair.Value, ref.Kind, "unexpected kind")
}
if *pair.Name == "name" {
require.Equal(t, *pair.Value, ref.Name, "unexpected name")
}
if *pair.Name == "namespace" {
require.Equal(t, *pair.Value, ref.Namespace, "unexpected namespace")
}
}

// Delete metrics.
rec.DeleteSuspend(ref)

metricFamilies, err = reg.Gather()
require.NoError(t, err)
require.Equal(t, len(metricFamilies), 0)
}

0 comments on commit e5ac39e

Please sign in to comment.