Skip to content

Commit

Permalink
Add metrics to track endpointslice staleness
Browse files Browse the repository at this point in the history
Added metrics to track the sync staleness of endpointslices, where
staleness is defined as how long since it has been last processed.
  • Loading branch information
sawsa307 committed Feb 15, 2023
1 parent b68db51 commit 0824e66
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 1 deletion.
17 changes: 17 additions & 0 deletions pkg/neg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const (
negOpLatencyKey = "neg_operation_duration_seconds"
negOpEndpointsKey = "neg_operation_endpoints"
lastSyncTimestampKey = "sync_timestamp"
epsStalenessKey = "endpointslice_staleness"

resultSuccess = "success"
resultError = "error"
Expand Down Expand Up @@ -127,6 +128,17 @@ var (
Help: "The timestamp of the last execution of NEG controller sync loop.",
},
)

// EPSStaleness tracks for every endpoint slice, how long since it was last processed
EPSStaleness = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: negControllerSubsystem,
Name: epsStalenessKey,
Help: "The duration for an endpoint slice since it was last processed by syncer",
// custom buckets - [1s, 2s, 4s, 8s, 16s, 32s, 64s, 128s, 256s(~4min), 512s(~8min), 1024s(~17min), 2048 (~34min), 4096(~68min), 8192(~136min), +Inf]
Buckets: prometheus.ExponentialBuckets(1, 2, 14),
},
)
)

var register sync.Once
Expand All @@ -139,6 +151,7 @@ func RegisterMetrics() {
prometheus.MustRegister(SyncerSyncLatency)
prometheus.MustRegister(LastSyncTimestamp)
prometheus.MustRegister(InitializationLatency)
prometheus.MustRegister(EPSStaleness)

RegisterSyncerMetrics()
})
Expand Down Expand Up @@ -170,6 +183,10 @@ func PublishNegInitializationMetrics(latency time.Duration) {
InitializationLatency.Observe(latency.Seconds())
}

func PublishNegEPSStalenessMetrics(epsStaleness time.Duration) {
EPSStaleness.Observe(epsStaleness.Seconds())
}

func getResult(err error) string {
if err != nil {
return resultError
Expand Down
20 changes: 19 additions & 1 deletion pkg/neg/syncers/transaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,25 @@ func (s *transactionSyncer) syncInternalImpl() error {
}
endpointSlices := make([]*discovery.EndpointSlice, len(slices))
for i, slice := range slices {
endpointSlices[i] = slice.(*discovery.EndpointSlice)
endpointslice := slice.(*discovery.EndpointSlice)
endpointSlices[i] = endpointslice

negCR, err := getNegFromStore(s.svcNegLister, s.Namespace, s.NegSyncerKey.NegName)
if err != nil {
s.logger.Error(err, "unable to retrieve neg from the store", "neg", klog.KRef(s.Namespace, s.NegName))
} else {
var epsStaleness time.Duration
lastSyncTimestamp := negCR.Status.LastSyncTime
epsCreationTimestamp := endpointslice.ObjectMeta.CreationTimestamp

epsStaleness = time.Since(lastSyncTimestamp.Time)
// if this endpoint slice is newly created/created after last sync
if lastSyncTimestamp.Before(&epsCreationTimestamp) {
epsStaleness = time.Since(epsCreationTimestamp.Time)
}
metrics.PublishNegEPSStalenessMetrics(epsStaleness)
s.logger.V(3).Info("Endpoint slice syncs", "Namespace", endpointslice.Namespace, "Name", endpointslice.Name, "staleness", epsStaleness)
}
}
endpointsData := negtypes.EndpointsDataFromEndpointSlices(endpointSlices)
targetMap, endpointPodMap, dupCount, err = s.endpointsCalculator.CalculateEndpoints(endpointsData, currentMap)
Expand Down

0 comments on commit 0824e66

Please sign in to comment.