Skip to content

Commit

Permalink
Merge pull request #1930 from sawsa307/neg-eps-staleness-metrics
Browse files Browse the repository at this point in the history
Add metrics to track endpointslice staleness
  • Loading branch information
k8s-ci-robot authored Feb 16, 2023
2 parents 009e314 + 8669f63 commit 98aa2cf
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 1 deletion.
15 changes: 15 additions & 0 deletions pkg/neg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const (
negOpEndpointsKey = "neg_operation_endpoints"
lastSyncTimestampKey = "sync_timestamp"
syncerStalenessKey = "syncer_staleness"
epsStalenessKey = "endpointslice_staleness"

resultSuccess = "success"
resultError = "error"
Expand Down Expand Up @@ -135,6 +136,15 @@ var (
Subsystem: negControllerSubsystem,
Name: syncerStalenessKey,
Help: "The duration of a syncer since it last syncs",
},
)

// EPSStaleness tracks for every endpoint slice, how long since it was last processed
EPSStaleness = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: negControllerSubsystem,
Name: epsStalenessKey,
Help: "The duration for an endpoint slice since it was last processed by syncer",
// custom buckets - [1s, 2s, 4s, 8s, 16s, 32s, 64s, 128s, 256s(~4min), 512s(~8min), 1024s(~17min), 2048 (~34min), 4096(~68min), 8192(~136min), +Inf]
Buckets: prometheus.ExponentialBuckets(1, 2, 14),
},
Expand All @@ -152,6 +162,7 @@ func RegisterMetrics() {
prometheus.MustRegister(LastSyncTimestamp)
prometheus.MustRegister(InitializationLatency)
prometheus.MustRegister(SyncerStaleness)
prometheus.MustRegister(EPSStaleness)

RegisterSyncerMetrics()
})
Expand Down Expand Up @@ -187,6 +198,10 @@ func PublishNegSyncerStalenessMetrics(syncerStaleness time.Duration) {
SyncerStaleness.Observe(syncerStaleness.Seconds())
}

func PublishNegEPSStalenessMetrics(epsStaleness time.Duration) {
EPSStaleness.Observe(epsStaleness.Seconds())
}

func getResult(err error) string {
if err != nil {
return resultError
Expand Down
19 changes: 18 additions & 1 deletion pkg/neg/syncers/transaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,25 @@ func (s *transactionSyncer) syncInternalImpl() error {
return nil
}
endpointSlices := make([]*discovery.EndpointSlice, len(slices))
negCR, err := getNegFromStore(s.svcNegLister, s.Namespace, s.NegSyncerKey.NegName)
for i, slice := range slices {
endpointSlices[i] = slice.(*discovery.EndpointSlice)
endpointslice := slice.(*discovery.EndpointSlice)
endpointSlices[i] = endpointslice
if err != nil {
s.logger.Error(err, "unable to retrieve neg from the store", "neg", klog.KRef(s.Namespace, s.NegName))
continue
}
lastSyncTimestamp := negCR.Status.LastSyncTime
epsCreationTimestamp := endpointslice.ObjectMeta.CreationTimestamp

epsStaleness := time.Since(lastSyncTimestamp.Time)
// if this endpoint slice is newly created/created after last sync
if lastSyncTimestamp.Before(&epsCreationTimestamp) {
epsStaleness = time.Since(epsCreationTimestamp.Time)
}
metrics.PublishNegEPSStalenessMetrics(epsStaleness)
s.logger.V(3).Info("Endpoint slice syncs", "Namespace", endpointslice.Namespace, "Name", endpointslice.Name, "staleness", epsStaleness)

}
endpointsData := negtypes.EndpointsDataFromEndpointSlices(endpointSlices)
targetMap, endpointPodMap, dupCount, err = s.endpointsCalculator.CalculateEndpoints(endpointsData, currentMap)
Expand Down

0 comments on commit 98aa2cf

Please sign in to comment.