Skip to content

Commit

Permalink
feat: make it easier to reason about health check failures
Browse files Browse the repository at this point in the history
Whenever a health check times out now, the most recently collected
error for each resource (if any) will be printed as part of the error
message. This excludes errors for those resources for which no error
was reported in the last update. This is because whenever a timeout
occurs, an error is reported on ALL resources, even those that have
been seen as healthy before.

Signed-off-by: Max Jonas Werner <mail@makk.es>
  • Loading branch information
Max Jonas Werner committed Jul 4, 2021
1 parent 57f6610 commit f73621b
Showing 1 changed file with 13 additions and 4 deletions.
17 changes: 13 additions & 4 deletions controllers/kustomization_healthcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,14 @@ func (hc *KustomizeHealthCheck) Assess(pollInterval time.Duration) error {
opts := polling.Options{PollInterval: pollInterval, UseCache: true}
eventsChan := hc.statusPoller.Poll(ctx, objMetadata, opts)
coll := collector.NewResourceStatusCollector(objMetadata)
lastStatus := make(map[object.ObjMetadata]*event.ResourceStatus)
done := coll.ListenWithObserver(eventsChan, collector.ObserverFunc(
func(statusCollector *collector.ResourceStatusCollector, e event.Event) {
var rss []*event.ResourceStatus
for _, rs := range statusCollector.ResourceStatuses {
if rs.Error == nil {
lastStatus[rs.Identifier] = rs
}
rss = append(rss, rs)
}
desired := status.CurrentStatus
Expand All @@ -81,14 +85,19 @@ func (hc *KustomizeHealthCheck) Assess(pollInterval time.Duration) error {
}

if ctx.Err() == context.DeadlineExceeded {
ids := []string{}
errors := []string{}
for _, rs := range coll.ResourceStatuses {
if rs.Status != status.CurrentStatus {
if lastStatus[rs.Identifier].Status != status.CurrentStatus {
id := hc.objMetadataToString(rs.Identifier)
ids = append(ids, id)
var bld strings.Builder
bld.WriteString(fmt.Sprintf("%s (status '%s')", id, lastStatus[rs.Identifier].Status))
if rs.Error != nil {
bld.WriteString(fmt.Sprintf(": %s", rs.Error))
}
errors = append(errors, bld.String())
}
}
return fmt.Errorf("Health check timed out for [%v]", strings.Join(ids, ", "))
return fmt.Errorf("Health check failed for [%s]", strings.Join(errors, ", "))
}

return nil
Expand Down

0 comments on commit f73621b

Please sign in to comment.