From 041e68ff29b8e99a0298ad62d76b26d519714452 Mon Sep 17 00:00:00 2001 From: Zach Aller Date: Mon, 5 Feb 2024 13:59:33 -0600 Subject: [PATCH] fix: do not require pod readiness when switching desired service selector on abort (#3338) * do not switch service selectors back when using alb due to race between two controllers with pod readiness gates Signed-off-by: Zach Aller * update tests for alb Signed-off-by: Zach Aller * lets not check for readiness instead Signed-off-by: Zach Aller * clean up notes Signed-off-by: Zach Aller * fix / Signed-off-by: Zach Aller --------- Signed-off-by: Zach Aller --- rollout/trafficrouting.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/rollout/trafficrouting.go b/rollout/trafficrouting.go index a87e31a9e8..df914e2df0 100644 --- a/rollout/trafficrouting.go +++ b/rollout/trafficrouting.go @@ -179,13 +179,15 @@ func (c *rolloutContext) reconcileTrafficRouting() error { desiredWeight = c.calculateDesiredWeightOnAbortOrStableRollback() if (c.rollout.Spec.Strategy.Canary.DynamicStableScale && desiredWeight == 0) || !c.rollout.Spec.Strategy.Canary.DynamicStableScale { // If we are using dynamic stable scale we need to also make sure that desiredWeight=0 aka we are completely - // done with aborting before resetting the canary service selectors back to stable - err = c.ensureSVCTargets(c.rollout.Spec.Strategy.Canary.CanaryService, c.stableRS, true) + // done with aborting before resetting the canary service selectors back to stable. For non-dynamic scale we do not check for availability because we are + // fully aborted and stable pods will be there, if we check for availability it causes issues with ALB readiness gates if all stable pods + // have the desired readiness gate on them during an abort we get stuck in a loop because all the stable go unready and rollouts won't be able + // to switch the desired services because there is no ready pods which causes pods to get stuck progressing forever waiting for readiness. + err = c.ensureSVCTargets(c.rollout.Spec.Strategy.Canary.CanaryService, c.stableRS, false) if err != nil { return err } } - err := reconciler.RemoveManagedRoutes() if err != nil { return err