Skip to content

Commit

Permalink
Add metrics to track endpointslice staleness
Browse files Browse the repository at this point in the history
Added metrics to track the sync staleness of endpointslices, where
staleness is defined as how long since it has been last processed.
  • Loading branch information
sawsa307 committed Feb 15, 2023
1 parent 009e314 commit 8669f63
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 1 deletion.
15 changes: 15 additions & 0 deletions pkg/neg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const (
negOpEndpointsKey = "neg_operation_endpoints"
lastSyncTimestampKey = "sync_timestamp"
syncerStalenessKey = "syncer_staleness"
epsStalenessKey = "endpointslice_staleness"

resultSuccess = "success"
resultError = "error"
Expand Down Expand Up @@ -135,6 +136,15 @@ var (
Subsystem: negControllerSubsystem,
Name: syncerStalenessKey,
Help: "The duration of a syncer since it last syncs",
},
)

// EPSStaleness tracks for every endpoint slice, how long since it was last processed
EPSStaleness = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: negControllerSubsystem,
Name: epsStalenessKey,
Help: "The duration for an endpoint slice since it was last processed by syncer",
// custom buckets - [1s, 2s, 4s, 8s, 16s, 32s, 64s, 128s, 256s(~4min), 512s(~8min), 1024s(~17min), 2048 (~34min), 4096(~68min), 8192(~136min), +Inf]
Buckets: prometheus.ExponentialBuckets(1, 2, 14),
},
Expand All @@ -152,6 +162,7 @@ func RegisterMetrics() {
prometheus.MustRegister(LastSyncTimestamp)
prometheus.MustRegister(InitializationLatency)
prometheus.MustRegister(SyncerStaleness)
prometheus.MustRegister(EPSStaleness)

RegisterSyncerMetrics()
})
Expand Down Expand Up @@ -187,6 +198,10 @@ func PublishNegSyncerStalenessMetrics(syncerStaleness time.Duration) {
SyncerStaleness.Observe(syncerStaleness.Seconds())
}

func PublishNegEPSStalenessMetrics(epsStaleness time.Duration) {
EPSStaleness.Observe(epsStaleness.Seconds())
}

func getResult(err error) string {
if err != nil {
return resultError
Expand Down
19 changes: 18 additions & 1 deletion pkg/neg/syncers/transaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,25 @@ func (s *transactionSyncer) syncInternalImpl() error {
return nil
}
endpointSlices := make([]*discovery.EndpointSlice, len(slices))
negCR, err := getNegFromStore(s.svcNegLister, s.Namespace, s.NegSyncerKey.NegName)
for i, slice := range slices {
endpointSlices[i] = slice.(*discovery.EndpointSlice)
endpointslice := slice.(*discovery.EndpointSlice)
endpointSlices[i] = endpointslice
if err != nil {
s.logger.Error(err, "unable to retrieve neg from the store", "neg", klog.KRef(s.Namespace, s.NegName))
continue
}
lastSyncTimestamp := negCR.Status.LastSyncTime
epsCreationTimestamp := endpointslice.ObjectMeta.CreationTimestamp

epsStaleness := time.Since(lastSyncTimestamp.Time)
// if this endpoint slice is newly created/created after last sync
if lastSyncTimestamp.Before(&epsCreationTimestamp) {
epsStaleness = time.Since(epsCreationTimestamp.Time)
}
metrics.PublishNegEPSStalenessMetrics(epsStaleness)
s.logger.V(3).Info("Endpoint slice syncs", "Namespace", endpointslice.Namespace, "Name", endpointslice.Name, "staleness", epsStaleness)

}
endpointsData := negtypes.EndpointsDataFromEndpointSlices(endpointSlices)
targetMap, endpointPodMap, dupCount, err = s.endpointsCalculator.CalculateEndpoints(endpointsData, currentMap)
Expand Down

0 comments on commit 8669f63

Please sign in to comment.