Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add metrics for syncer states #1912

Merged
merged 1 commit into from
Feb 28, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 56 additions & 8 deletions pkg/neg/metrics/neg_metrics_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ var (
syncResultLabel = "result"
syncResultKey = "sync_result"

syncerStateLabel = "state"
syncerStateKey = "syncer_state"

// syncerSyncResult tracks the count for each sync result
syncerSyncResult = prometheus.NewCounterVec(
prometheus.CounterOpts{
Expand All @@ -39,6 +42,16 @@ var (
},
[]string{syncResultLabel},
)

// syncerSyncerState tracks the count of syncer in different states
syncerSyncerState = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: negControllerSubsystem,
Name: syncerStateKey,
Help: "Current count of syncers in each state",
},
[]string{syncerStateLabel},
)
)

type SyncerMetricsCollector interface {
Expand All @@ -47,8 +60,8 @@ type SyncerMetricsCollector interface {
}

type SyncerMetrics struct {
// syncerStatusMap tracks the status of each syncer
syncerStatusMap map[negtypes.NegSyncerKey]string
// syncerStateMap tracks the state of each syncer
syncerStateMap map[negtypes.NegSyncerKey]string
// syncerEndpointStateMap is a map between syncer and endpoint state counts
syncerEndpointStateMap map[negtypes.NegSyncerKey]negtypes.StateCountMap
// syncerEPSStateMap is a map between syncer and endpoint slice state counts
Expand All @@ -64,7 +77,7 @@ type SyncerMetrics struct {
// NewNEGMetricsCollector initializes SyncerMetrics and starts a go routine to compute and export metrics periodically.
func NewNegMetricsCollector(exportInterval time.Duration, logger klog.Logger) *SyncerMetrics {
return &SyncerMetrics{
syncerStatusMap: make(map[negtypes.NegSyncerKey]string),
syncerStateMap: make(map[negtypes.NegSyncerKey]string),
syncerEndpointStateMap: make(map[negtypes.NegSyncerKey]negtypes.StateCountMap),
syncerEPSStateMap: make(map[negtypes.NegSyncerKey]negtypes.StateCountMap),
metricsInterval: exportInterval,
Expand All @@ -80,6 +93,7 @@ func FakeSyncerMetrics() *SyncerMetrics {
// RegisterSyncerMetrics registers syncer related metrics
func RegisterSyncerMetrics() {
prometheus.MustRegister(syncerSyncResult)
prometheus.MustRegister(syncerSyncerState)
}

func (sm *SyncerMetrics) Run(stopCh <-chan struct{}) {
Expand All @@ -94,9 +108,14 @@ func (sm *SyncerMetrics) Run(stopCh <-chan struct{}) {

// export exports syncer metrics.
func (sm *SyncerMetrics) export() {
stateCount, syncerCount := sm.computeSyncerStateMetrics()
sm.logger.V(3).Info("Exporting syncer state metrics.", "Syncer count", syncerCount)
for syncerState, count := range stateCount {
syncerSyncerState.WithLabelValues(syncerState).Set(float64(count))
}
}

// UpdateSyncer update the status of corresponding syncer based on the syncResult.
// UpdateSyncer update the state of corresponding syncer based on the syncResult.
func (sm *SyncerMetrics) UpdateSyncer(key negtypes.NegSyncerKey, syncResult *negtypes.NegSyncResult) {
if syncResult.Result == negtypes.ResultInProgress {
return
Expand All @@ -105,11 +124,11 @@ func (sm *SyncerMetrics) UpdateSyncer(key negtypes.NegSyncerKey, syncResult *neg

sm.mu.Lock()
defer sm.mu.Unlock()
if sm.syncerStatusMap == nil {
sm.syncerStatusMap = make(map[negtypes.NegSyncerKey]string)
sm.logger.V(3).Info("Syncer Metrics failed to initialize correctly, reinitializing syncerStatusMap: %v", sm.syncerStatusMap)
if sm.syncerStateMap == nil {
sm.syncerStateMap = make(map[negtypes.NegSyncerKey]string)
sm.logger.V(3).Info("Syncer Metrics failed to initialize correctly, reinitializing syncerStateMap: %v", sm.syncerStateMap)
}
sm.syncerStatusMap[key] = syncResult.Result
sm.syncerStateMap[key] = syncResult.Result
}

// SetSyncerEPMetrics update the endpoint count based on the endpointStat
Expand All @@ -128,3 +147,32 @@ func (sm *SyncerMetrics) SetSyncerEPMetrics(key negtypes.NegSyncerKey, endpointS
}
sm.syncerEPSStateMap[key] = endpointStat.EndpointSliceStateCount
}

func (sm *SyncerMetrics) computeSyncerStateMetrics() (map[string]int, int) {
sm.mu.Lock()
defer sm.mu.Unlock()
sm.logger.V(3).Info("computing syncer state metrics")

stateCount := map[string]int{
negtypes.ResultEPCountsDiffer: 0,
negtypes.ResultEPMissingNodeName: 0,
negtypes.ResultNodeNotFound: 0,
negtypes.ResultEPMissingZone: 0,
negtypes.ResultEPSEndpointCountZero: 0,
negtypes.ResultEPCalculationCountZero: 0,
negtypes.ResultInvalidAPIResponse: 0,
negtypes.ResultInvalidEPAttach: 0,
negtypes.ResultInvalidEPDetach: 0,
negtypes.ResultNegNotFound: 0,
negtypes.ResultCurrentEPNotFound: 0,
negtypes.ResultEPSNotFound: 0,
negtypes.ResultOtherError: 0,
negtypes.ResultSuccess: 0,
}
syncerCount := 0
for _, syncerState := range sm.syncerStateMap {
stateCount[syncerState] += 1
syncerCount += 1
}
return stateCount, syncerCount
}