From bbc4208d1cc0f12ab145c3d857322c0974d8b471 Mon Sep 17 00:00:00 2001 From: Max Jonas Werner Date: Thu, 24 Jun 2021 17:57:57 +0200 Subject: [PATCH] feat: make it easier to reason about health check failures Whenever a health check times out now, the most recently collected error for each resource will be printed as part of the error message. This excludes errors for those resources for which no error was reported in the last update. This is because whenever a timeout occurs, an error is reported on ALL resources, even those that have been seen as healthy before. Also, this commit causes all successfully checked resources to be omitted in the error event. Signed-off-by: Max Jonas Werner --- controllers/kustomization_healthcheck.go | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/controllers/kustomization_healthcheck.go b/controllers/kustomization_healthcheck.go index e6d3f13c..7e269d0a 100644 --- a/controllers/kustomization_healthcheck.go +++ b/controllers/kustomization_healthcheck.go @@ -59,10 +59,14 @@ func (hc *KustomizeHealthCheck) Assess(pollInterval time.Duration) error { opts := polling.Options{PollInterval: pollInterval, UseCache: true} eventsChan := hc.statusPoller.Poll(ctx, objMetadata, opts) coll := collector.NewResourceStatusCollector(objMetadata) + lastStatus := make(map[object.ObjMetadata]*event.ResourceStatus) done := coll.ListenWithObserver(eventsChan, collector.ObserverFunc( func(statusCollector *collector.ResourceStatusCollector, e event.Event) { var rss []*event.ResourceStatus for _, rs := range statusCollector.ResourceStatuses { + if rs.Error == nil { + lastStatus[rs.Identifier] = rs + } rss = append(rss, rs) } desired := status.CurrentStatus @@ -81,14 +85,19 @@ func (hc *KustomizeHealthCheck) Assess(pollInterval time.Duration) error { } if ctx.Err() == context.DeadlineExceeded { - ids := []string{} + errors := []string{} for _, rs := range coll.ResourceStatuses { - if rs.Status != status.CurrentStatus { + if lastStatus[rs.Identifier].Status != status.CurrentStatus { id := hc.objMetadataToString(rs.Identifier) - ids = append(ids, id) + var bld strings.Builder + bld.WriteString(fmt.Sprintf("%s (status '%s')", id, lastStatus[rs.Identifier].Status)) + if rs.Error != nil { + bld.WriteString(fmt.Sprintf(": %s", rs.Error)) + } + errors = append(errors, bld.String()) } } - return fmt.Errorf("Health check timed out for [%v]", strings.Join(ids, ", ")) + return fmt.Errorf("Health check failed for [%s]", strings.Join(errors, ", ")) } return nil