diff --git a/pkg/neg/metrics/metrics.go b/pkg/neg/metrics/metrics.go index 572ec33438..7265060908 100644 --- a/pkg/neg/metrics/metrics.go +++ b/pkg/neg/metrics/metrics.go @@ -31,6 +31,7 @@ const ( negOpLatencyKey = "neg_operation_duration_seconds" negOpEndpointsKey = "neg_operation_endpoints" lastSyncTimestampKey = "sync_timestamp" + syncerStalenessKey = "syncer_staleness" resultSuccess = "success" resultError = "error" @@ -127,6 +128,17 @@ var ( Help: "The timestamp of the last execution of NEG controller sync loop.", }, ) + + // SyncerStaleness tracks for every syncer, how long since the syncer last syncs + SyncerStaleness = prometheus.NewHistogram( + prometheus.HistogramOpts{ + Subsystem: negControllerSubsystem, + Name: syncerStalenessKey, + Help: "The duration of a syncer since it last syncs", + // custom buckets - [1s, 2s, 4s, 8s, 16s, 32s, 64s, 128s, 256s(~4min), 512s(~8min), 1024s(~17min), 2048 (~34min), 4096(~68min), 8192(~136min), +Inf] + Buckets: prometheus.ExponentialBuckets(1, 2, 14), + }, + ) ) var register sync.Once @@ -139,6 +151,7 @@ func RegisterMetrics() { prometheus.MustRegister(SyncerSyncLatency) prometheus.MustRegister(LastSyncTimestamp) prometheus.MustRegister(InitializationLatency) + prometheus.MustRegister(SyncerStaleness) RegisterSyncerMetrics() }) @@ -170,6 +183,10 @@ func PublishNegInitializationMetrics(latency time.Duration) { InitializationLatency.Observe(latency.Seconds()) } +func PublishNegSyncerStalenessMetrics(syncerStaleness time.Duration) { + SyncerStaleness.Observe(syncerStaleness.Seconds()) +} + func getResult(err error) string { if err != nil { return resultError diff --git a/pkg/neg/syncers/transaction.go b/pkg/neg/syncers/transaction.go index c966cdd506..cc4614ae3d 100644 --- a/pkg/neg/syncers/transaction.go +++ b/pkg/neg/syncers/transaction.go @@ -710,6 +710,7 @@ func (s *transactionSyncer) updateStatus(syncErr error) { if _, _, exists := findCondition(neg.Status.Conditions, negv1beta1.Initialized); !exists { s.needInit = true } + metrics.PublishNegSyncerStalenessMetrics(ts.Sub(neg.Status.LastSyncTime.Time)) ensureCondition(neg, getSyncedCondition(syncErr)) neg.Status.LastSyncTime = ts