Skip to content

Commit

Permalink
Merge pull request #1955 from cezarygerard/master
Browse files Browse the repository at this point in the history
improve L4FailedHealthCheckCount metric
  • Loading branch information
k8s-ci-robot authored Feb 17, 2023
2 parents 95cebd3 + b1304fb commit b387d39
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
6 changes: 5 additions & 1 deletion pkg/l4lb/l4controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,15 @@ func (l4c *L4Controller) checkHealth() error {
syncTimeLatest := lastEnqueueTime.Add(enqueueToSyncDelayThreshold)
controllerHealth := l4metrics.ControllerHealthyStatus
if lastSyncTime.After(syncTimeLatest) {
msg := fmt.Sprintf("L4 ILB Sync happened at time %v - %v after enqueue time, threshold is %v", lastSyncTime, lastSyncTime.Sub(lastEnqueueTime), enqueueToSyncDelayThreshold)
msg := fmt.Sprintf("L4 ILB Sync happened at time %v, %v after enqueue time, last enqueue time %v, threshold is %v", lastSyncTime, lastSyncTime.Sub(lastEnqueueTime), lastEnqueueTime, enqueueToSyncDelayThreshold)
// Log here, context/http handler do no log the error.
klog.Error(msg)
l4metrics.PublishL4FailedHealthCheckCount(l4ILBControllerName)
controllerHealth = l4metrics.ControllerUnhealthyStatus
// Reset trackers. Otherwise, if there is nothing in the queue then it will report the FailedHealthCheckCount every time the checkHealth is called
// If checkHealth returned error (as it is meant to) then container would be restarted and trackers would be reset either
l4c.enqueueTracker.Track()
l4c.syncTracker.Track()
}
if l4c.enableDualStack {
l4metrics.PublishL4ControllerHealthCheckStatus(l4ILBDualStackControllerName, controllerHealth)
Expand Down
6 changes: 5 additions & 1 deletion pkg/l4lb/l4netlbcontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,11 +359,15 @@ func (lc *L4NetLBController) checkHealth() error {
syncTimeLatest := lastEnqueueTime.Add(enqueueToSyncDelayThreshold)
controllerHealth := metrics.ControllerHealthyStatus
if lastSyncTime.After(syncTimeLatest) {
msg := fmt.Sprintf("L4 External LoadBalancer Sync happened at time %v - %v after enqueue time, threshold is %v", lastSyncTime, lastSyncTime.Sub(lastEnqueueTime), enqueueToSyncDelayThreshold)
msg := fmt.Sprintf("L4 NetLB Sync happened at time %v, %v after enqueue time, last enqueue time %v, threshold is %v", lastSyncTime, lastSyncTime.Sub(lastEnqueueTime), lastEnqueueTime, enqueueToSyncDelayThreshold)
// Log here, context/http handler do no log the error.
klog.Error(msg)
metrics.PublishL4FailedHealthCheckCount(l4NetLBControllerName)
controllerHealth = metrics.ControllerUnhealthyStatus
// Reset trackers. Otherwise, if there is nothing in the queue then it will report the FailedHealthCheckCount every time the checkHealth is called
// If checkHealth returned error (as it is meant to) then container would be restarted and trackers would be reset either
lc.enqueueTracker.Track()
lc.syncTracker.Track()
}
if lc.enableDualStack {
metrics.PublishL4ControllerHealthCheckStatus(l4NetLBDualStackControllerName, controllerHealth)
Expand Down

0 comments on commit b387d39

Please sign in to comment.