Skip to content

Commit

Permalink
improve L4FailedHealthCheckCount metric (do no report multiple times …
Browse files Browse the repository at this point in the history
…the same case)
  • Loading branch information
cezarygerard committed Feb 15, 2023
1 parent 41b2aa7 commit b5cbe60
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 3 deletions.
6 changes: 5 additions & 1 deletion pkg/l4lb/l4controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,10 +151,14 @@ func (l4c *L4Controller) checkHealth() error {
// This indicates that the controller was stuck handling a previous update, or sync function did not get invoked.
syncTimeLatest := lastEnqueueTime.Add(enqueueToSyncDelayThreshold)
if lastSyncTime.After(syncTimeLatest) {
msg := fmt.Sprintf("L4 ILB Sync happened at time %v - %v after enqueue time, threshold is %v", lastSyncTime, lastSyncTime.Sub(lastEnqueueTime), enqueueToSyncDelayThreshold)
msg := fmt.Sprintf("L4 ILB Sync happened at time %v, last enqueue time %v- %v after enqueue time, threshold is %v", lastSyncTime, lastEnqueueTime, lastSyncTime.Sub(lastEnqueueTime), enqueueToSyncDelayThreshold)
// Log here, context/http handler do no log the error.
klog.Error(msg)
l4metrics.PublishL4FailedHealthCheckCount(l4ILBControllerName)
// Reset trackers. Otherwise, if there is nothing in the queue then it will report the FailedHealthCheckCount every time the checkHealth is called
// If checkHealth returned error (as it is meant to) then container would be restarted and trackers would be reset either
l4c.enqueueTracker.Track()
l4c.syncTracker.Track()
}
return nil
}
Expand Down
8 changes: 6 additions & 2 deletions pkg/l4lb/l4netlbcontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -355,10 +355,14 @@ func (lc *L4NetLBController) checkHealth() error {
// This indicates that the controller was stuck handling a previous update, or sync function did not get invoked.
syncTimeLatest := lastEnqueueTime.Add(enqueueToSyncDelayThreshold)
if lastSyncTime.After(syncTimeLatest) {
msg := fmt.Sprintf("L4 External LoadBalancer Sync happened at time %v - %v after enqueue time, threshold is %v", lastSyncTime, lastSyncTime.Sub(lastEnqueueTime), enqueueToSyncDelayThreshold)
msg := fmt.Sprintf("L4 ILB Sync happened at time %v, last enqueue time %v- %v after enqueue time, threshold is %v", lastSyncTime, lastEnqueueTime, lastSyncTime.Sub(lastEnqueueTime), enqueueToSyncDelayThreshold)
// Log here, context/http handler do no log the error.
klog.Error(msg)
metrics.PublishL4FailedHealthCheckCount(l4NetLBControllerName)
metrics.PublishL4FailedHealthCheckCount(l4ILBControllerName)
// Reset trackers. Otherwise, if there is nothing in the queue then it will report the FailedHealthCheckCount every time the checkHealth is called
// If checkHealth returned error (as it is meant to) then container would be restarted and trackers would be reset either
lc.enqueueTracker.Track()
lc.syncTracker.Track()
}
return nil
}
Expand Down

0 comments on commit b5cbe60

Please sign in to comment.