Skip to content

Commit

Permalink
Merge pull request #848 from fluxcd/remediation-stall
Browse files Browse the repository at this point in the history
Early stall condition detection after remediation
  • Loading branch information
hiddeco authored Dec 14, 2023
2 parents abcdfef + fa5e284 commit 603d321
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 1 deletion.
10 changes: 9 additions & 1 deletion internal/reconcile/atomic_release.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,10 +262,18 @@ func (r *AtomicRelease) Reconcile(ctx context.Context, req *Request) error {
"instructed to stop after running %s action reconciler %s", next.Type(), next.Name()),
)

if remediation := req.Object.GetActiveRemediation(); remediation == nil || !remediation.RetriesExhausted(req.Object) {
remediation := req.Object.GetActiveRemediation()
if remediation == nil || !remediation.RetriesExhausted(req.Object) {
conditions.MarkReconciling(req.Object, meta.ProgressingWithRetryReason, conditions.GetMessage(req.Object, meta.ReadyCondition))
return ErrMustRequeue
}
// Check if retries have exhausted after remediation for early
// stall condition detection.
if remediation != nil && remediation.RetriesExhausted(req.Object) {
conditions.MarkStalled(req.Object, "RetriesExceeded", "Failed to %s after %d attempt(s)",
req.Object.Status.LastAttemptedReleaseAction, req.Object.GetActiveRemediation().GetFailureCount(req.Object))
return ErrExceededMaxRetries
}

conditions.Delete(req.Object, meta.ReconcilingCondition)
return nil
Expand Down
54 changes: 54 additions & 0 deletions internal/reconcile/atomic_release_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
}
},
wantErr: ErrExceededMaxRetries,
},
{
name: "install test failure with remediation",
Expand All @@ -654,6 +655,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
snap,
}
},
wantErr: ErrExceededMaxRetries,
},
{
name: "install test failure with test ignore",
Expand Down Expand Up @@ -759,6 +761,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
}
},
wantErr: ErrExceededMaxRetries,
},
{
name: "upgrade failure with uninstall remediation",
Expand Down Expand Up @@ -799,6 +802,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
}
},
wantErr: ErrExceededMaxRetries,
},
{
name: "upgrade test failure with remediation",
Expand Down Expand Up @@ -841,6 +845,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
}
},
wantErr: ErrExceededMaxRetries,
},
{
name: "upgrade test failure with test ignore",
Expand Down Expand Up @@ -928,6 +933,55 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
},
wantErr: ErrExceededMaxRetries,
},
{
name: "upgrade remediation results in exhausted retries",
releases: func(namespace string) []*helmrelease.Release {
return []*helmrelease.Release{
testutil.BuildRelease(&helmrelease.MockReleaseOptions{
Name: mockReleaseName,
Namespace: namespace,
Version: 1,
Chart: testutil.BuildChart(),
Status: helmrelease.StatusSuperseded,
}),
testutil.BuildRelease(&helmrelease.MockReleaseOptions{
Name: mockReleaseName,
Namespace: namespace,
Version: 2,
Chart: testutil.BuildChart(),
Status: helmrelease.StatusSuperseded,
}),
testutil.BuildRelease(&helmrelease.MockReleaseOptions{
Name: mockReleaseName,
Namespace: namespace,
Version: 3,
Chart: testutil.BuildChart(),
Status: helmrelease.StatusFailed,
}),
}
},
spec: func(spec *v2.HelmReleaseSpec) {
spec.Upgrade = &v2.Upgrade{
Remediation: &v2.UpgradeRemediation{
Retries: 1,
},
}
},
status: func(namespace string, releases []*helmrelease.Release) v2.HelmReleaseStatus {
return v2.HelmReleaseStatus{
History: v2.Snapshots{
release.ObservedToSnapshot(release.ObserveRelease(releases[2])),
release.ObservedToSnapshot(release.ObserveRelease(releases[1])),
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
},
LastAttemptedReleaseAction: v2.ReleaseActionUpgrade,
Failures: 2,
UpgradeFailures: 2,
}
},
chart: testutil.BuildChart(),
wantErr: ErrExceededMaxRetries,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down

0 comments on commit 603d321

Please sign in to comment.