From 168bcb7cf409c38ffe31f36fb2310850d6da1e3f Mon Sep 17 00:00:00 2001 From: Michael Crenshaw <350466+crenshaw-dev@users.noreply.github.com> Date: Wed, 26 Jul 2023 21:03:03 +0000 Subject: [PATCH] fix(controller): log failed attempts to update operation state (#14273) * fix(controller): log failed attempts to update operation state Signed-off-by: Michael Crenshaw <350466+crenshaw-dev@users.noreply.github.com> * new package name Signed-off-by: Michael Crenshaw <350466+crenshaw-dev@users.noreply.github.com> * Update controller/appcontroller_test.go Signed-off-by: Michael Crenshaw <350466+crenshaw-dev@users.noreply.github.com> --------- Signed-off-by: Michael Crenshaw <350466+crenshaw-dev@users.noreply.github.com> --- controller/appcontroller.go | 114 +++++++++++++++++-------------- controller/appcontroller_test.go | 37 ++++++++++ 2 files changed, 98 insertions(+), 53 deletions(-) diff --git a/controller/appcontroller.go b/controller/appcontroller.go index 052dd35d094cc..0ce5e8b558a90 100644 --- a/controller/appcontroller.go +++ b/controller/appcontroller.go @@ -1240,40 +1240,44 @@ func (ctrl *ApplicationController) processRequestedAppOperation(app *appv1.Appli } func (ctrl *ApplicationController) setOperationState(app *appv1.Application, state *appv1.OperationState) { - kube.RetryUntilSucceed(context.Background(), updateOperationStateTimeout, "Update application operation state", logutils.NewLogrusLogger(logutils.NewWithCurrentConfig()), func() error { - if state.Phase == "" { - // expose any bugs where we neglect to set phase - panic("no phase was set") - } - if state.Phase.Completed() { - now := metav1.Now() - state.FinishedAt = &now - } - patch := map[string]interface{}{ - "status": map[string]interface{}{ - "operationState": state, - }, - } - if state.Phase.Completed() { - // If operation is completed, clear the operation field to indicate no operation is - // in progress. - patch["operation"] = nil - } - if reflect.DeepEqual(app.Status.OperationState, state) { - log.Infof("No operation updates necessary to '%s'. Skipping patch", app.QualifiedName()) - return nil - } - patchJSON, err := json.Marshal(patch) + logCtx := log.WithFields(log.Fields{"application": app.Name, "appNamespace": app.Namespace, "project": app.Spec.Project}) + + if state.Phase == "" { + // expose any bugs where we neglect to set phase + panic("no phase was set") + } + if state.Phase.Completed() { + now := metav1.Now() + state.FinishedAt = &now + } + patch := map[string]interface{}{ + "status": map[string]interface{}{ + "operationState": state, + }, + } + if state.Phase.Completed() { + // If operation is completed, clear the operation field to indicate no operation is + // in progress. + patch["operation"] = nil + } + if reflect.DeepEqual(app.Status.OperationState, state) { + logCtx.Infof("No operation updates necessary to '%s'. Skipping patch", app.QualifiedName()) + return + } + patchJSON, err := json.Marshal(patch) + if err != nil { + logCtx.Errorf("error marshaling json: %v", err) + return + } + if app.Status.OperationState != nil && app.Status.OperationState.FinishedAt != nil && state.FinishedAt == nil { + patchJSON, err = jsonpatch.MergeMergePatches(patchJSON, []byte(`{"status": {"operationState": {"finishedAt": null}}}`)) if err != nil { - return fmt.Errorf("error marshaling json: %w", err) - } - if app.Status.OperationState != nil && app.Status.OperationState.FinishedAt != nil && state.FinishedAt == nil { - patchJSON, err = jsonpatch.MergeMergePatches(patchJSON, []byte(`{"status": {"operationState": {"finishedAt": null}}}`)) - if err != nil { - return fmt.Errorf("error merging operation state patch: %w", err) - } + logCtx.Errorf("error merging operation state patch: %v", err) + return } + } + kube.RetryUntilSucceed(context.Background(), updateOperationStateTimeout, "Update application operation state", logutils.NewLogrusLogger(logutils.NewWithCurrentConfig()), func() error { appClient := ctrl.applicationClientset.ArgoprojV1alpha1().Applications(app.Namespace) _, err = appClient.Patch(context.Background(), app.Name, types.MergePatchType, patchJSON, metav1.PatchOptions{}) if err != nil { @@ -1281,32 +1285,36 @@ func (ctrl *ApplicationController) setOperationState(app *appv1.Application, sta if apierr.IsNotFound(err) { return nil } + // kube.RetryUntilSucceed logs failed attempts at "debug" level, but we want to know if this fails. Log a + // warning. + logCtx.Warnf("error patching application with operation state: %v", err) return fmt.Errorf("error patching application with operation state: %w", err) } - log.Infof("updated '%s' operation (phase: %s)", app.QualifiedName(), state.Phase) - if state.Phase.Completed() { - eventInfo := argo.EventInfo{Reason: argo.EventReasonOperationCompleted} - var messages []string - if state.Operation.Sync != nil && len(state.Operation.Sync.Resources) > 0 { - messages = []string{"Partial sync operation"} - } else { - messages = []string{"Sync operation"} - } - if state.SyncResult != nil { - messages = append(messages, "to", state.SyncResult.Revision) - } - if state.Phase.Successful() { - eventInfo.Type = v1.EventTypeNormal - messages = append(messages, "succeeded") - } else { - eventInfo.Type = v1.EventTypeWarning - messages = append(messages, "failed:", state.Message) - } - ctrl.auditLogger.LogAppEvent(app, eventInfo, strings.Join(messages, " "), "") - ctrl.metricsServer.IncSync(app, state) - } return nil }) + + logCtx.Infof("updated '%s' operation (phase: %s)", app.QualifiedName(), state.Phase) + if state.Phase.Completed() { + eventInfo := argo.EventInfo{Reason: argo.EventReasonOperationCompleted} + var messages []string + if state.Operation.Sync != nil && len(state.Operation.Sync.Resources) > 0 { + messages = []string{"Partial sync operation"} + } else { + messages = []string{"Sync operation"} + } + if state.SyncResult != nil { + messages = append(messages, "to", state.SyncResult.Revision) + } + if state.Phase.Successful() { + eventInfo.Type = v1.EventTypeNormal + messages = append(messages, "succeeded") + } else { + eventInfo.Type = v1.EventTypeWarning + messages = append(messages, "failed:", state.Message) + } + ctrl.auditLogger.LogAppEvent(app, eventInfo, strings.Join(messages, " "), "") + ctrl.metricsServer.IncSync(app, state) + } } func (ctrl *ApplicationController) processAppRefreshQueueItem() (processNext bool) { diff --git a/controller/appcontroller_test.go b/controller/appcontroller_test.go index 04a5b421f75ad..a43dcfd643062 100644 --- a/controller/appcontroller_test.go +++ b/controller/appcontroller_test.go @@ -3,9 +3,11 @@ package controller import ( "context" "encoding/json" + "errors" "testing" "time" + "github.com/sirupsen/logrus" "k8s.io/apimachinery/pkg/api/resource" clustercache "github.com/argoproj/gitops-engine/pkg/cache" @@ -927,6 +929,41 @@ func TestSetOperationStateOnDeletedApp(t *testing.T) { assert.True(t, patched) } +type logHook struct { + entries []logrus.Entry +} + +func (h *logHook) Levels() []logrus.Level { + return []logrus.Level{logrus.WarnLevel} +} + +func (h *logHook) Fire(entry *logrus.Entry) error { + h.entries = append(h.entries, *entry) + return nil +} + +func TestSetOperationStateLogRetries(t *testing.T) { + hook := logHook{} + logrus.AddHook(&hook) + t.Cleanup(func() { + logrus.StandardLogger().ReplaceHooks(logrus.LevelHooks{}) + }) + ctrl := newFakeController(&fakeData{apps: []runtime.Object{}}) + fakeAppCs := ctrl.applicationClientset.(*appclientset.Clientset) + fakeAppCs.ReactionChain = nil + patched := false + fakeAppCs.AddReactor("patch", "*", func(action kubetesting.Action) (handled bool, ret runtime.Object, err error) { + if !patched { + patched = true + return true, nil, errors.New("fake error") + } + return true, nil, nil + }) + ctrl.setOperationState(newFakeApp(), &v1alpha1.OperationState{Phase: synccommon.OperationSucceeded}) + assert.True(t, patched) + assert.Contains(t, hook.entries[0].Message, "fake error") +} + func TestNeedRefreshAppStatus(t *testing.T) { testCases := []struct { name string