diff --git a/pkg/controllers/deprovisioning/consolidation.go b/pkg/controllers/deprovisioning/consolidation.go index f690683367..4e117a395e 100644 --- a/pkg/controllers/deprovisioning/consolidation.go +++ b/pkg/controllers/deprovisioning/consolidation.go @@ -80,8 +80,17 @@ func (c *consolidation) sortAndFilterCandidates(ctx context.Context, nodes []Can // filter out nodes that can't be terminated nodes = lo.Filter(nodes, func(cn CandidateNode, _ int) bool { - if reason, canTerminate := canBeTerminated(cn, pdbs); !canTerminate { - c.recorder.Publish(deprovisioningevents.UnconsolidatableReason(cn.Node, reason)) + if !cn.DeletionTimestamp.IsZero() { + c.recorder.Publish(deprovisioningevents.UnconsolidatableReason(cn.Node, "in the process of deletion")) + return false + } + if pdb, ok := pdbs.CanEvictPods(cn.pods); !ok { + c.recorder.Publish(deprovisioningevents.UnconsolidatableReason(cn.Node, fmt.Sprintf("pdb %s prevents pod evictions", pdb))) + return false + } + if p, ok := hasDoNotEvictPod(cn); ok { + c.recorder.Publish(deprovisioningevents.UnconsolidatableReason(cn.Node, + fmt.Sprintf("pod %s/%s has do not evict annotation", p.Namespace, p.Name))) return false } return true diff --git a/pkg/controllers/deprovisioning/controller.go b/pkg/controllers/deprovisioning/controller.go index 1a0c44ce3e..f5be95099a 100644 --- a/pkg/controllers/deprovisioning/controller.go +++ b/pkg/controllers/deprovisioning/controller.go @@ -81,9 +81,9 @@ func NewController(clk clock.Clock, kubeClient client.Client, provisioner *provi cloudProvider: cp, deprovisioners: []Deprovisioner{ // Expire any nodes that must be deleted, allowing their pods to potentially land on currently - NewExpiration(clk, kubeClient, cluster, provisioner), + NewExpiration(clk, kubeClient, cluster, provisioner, recorder), // Terminate any nodes that have drifted from provisioning specifications, allowing the pods to reschedule. - NewDrift(kubeClient, cluster, provisioner), + NewDrift(kubeClient, cluster, provisioner, recorder), // Delete any remaining empty nodes as there is zero cost in terms of dirsuption. Emptiness and // emptyNodeConsolidation are mutually exclusive, only one of these will operate NewEmptiness(clk), diff --git a/pkg/controllers/deprovisioning/drift.go b/pkg/controllers/deprovisioning/drift.go index 7998d27361..a96464416b 100644 --- a/pkg/controllers/deprovisioning/drift.go +++ b/pkg/controllers/deprovisioning/drift.go @@ -26,8 +26,10 @@ import ( "github.com/aws/karpenter-core/pkg/apis/settings" "github.com/aws/karpenter-core/pkg/apis/v1alpha5" + deprovisioningevents "github.com/aws/karpenter-core/pkg/controllers/deprovisioning/events" "github.com/aws/karpenter-core/pkg/controllers/provisioning" "github.com/aws/karpenter-core/pkg/controllers/state" + "github.com/aws/karpenter-core/pkg/events" "github.com/aws/karpenter-core/pkg/metrics" ) @@ -37,13 +39,15 @@ type Drift struct { kubeClient client.Client cluster *state.Cluster provisioner *provisioning.Provisioner + recorder events.Recorder } -func NewDrift(kubeClient client.Client, cluster *state.Cluster, provisioner *provisioning.Provisioner) *Drift { +func NewDrift(kubeClient client.Client, cluster *state.Cluster, provisioner *provisioning.Provisioner, recorder events.Recorder) *Drift { return &Drift{ kubeClient: kubeClient, cluster: cluster, provisioner: provisioner, + recorder: recorder, } } @@ -62,9 +66,21 @@ func (d *Drift) ComputeCommand(ctx context.Context, candidates ...CandidateNode) if err != nil { return Command{}, fmt.Errorf("tracking PodDisruptionBudgets, %w", err) } - candidates = lo.Filter(candidates, func(n CandidateNode, _ int) bool { - _, canTerminate := canBeTerminated(n, pdbs) - return canTerminate + // filter out nodes that can't be terminated + candidates = lo.Filter(candidates, func(cn CandidateNode, _ int) bool { + if !cn.DeletionTimestamp.IsZero() { + return false + } + if pdb, ok := pdbs.CanEvictPods(cn.pods); !ok { + d.recorder.Publish(deprovisioningevents.BlockedDeprovisioning(cn.Node, fmt.Sprintf("pdb %s prevents pod evictions", pdb))) + return false + } + if p, ok := hasDoNotEvictPod(cn); ok { + d.recorder.Publish(deprovisioningevents.BlockedDeprovisioning(cn.Node, + fmt.Sprintf("pod %s/%s has do not evict annotation", p.Namespace, p.Name))) + return false + } + return true }) for _, candidate := range candidates { diff --git a/pkg/controllers/deprovisioning/emptiness.go b/pkg/controllers/deprovisioning/emptiness.go index df02d95673..f35f3cbeed 100644 --- a/pkg/controllers/deprovisioning/emptiness.go +++ b/pkg/controllers/deprovisioning/emptiness.go @@ -66,8 +66,7 @@ func (e *Emptiness) ShouldDeprovision(ctx context.Context, n *state.Node, provis // ComputeCommand generates a deprovisioning command given deprovisionable nodes func (e *Emptiness) ComputeCommand(_ context.Context, nodes ...CandidateNode) (Command, error) { emptyNodes := lo.Filter(nodes, func(n CandidateNode, _ int) bool { - _, canTerminate := canBeTerminated(n, nil) - return len(n.pods) == 0 && canTerminate + return n.DeletionTimestamp.IsZero() && len(n.pods) == 0 }) if len(emptyNodes) == 0 { diff --git a/pkg/controllers/deprovisioning/events/events.go b/pkg/controllers/deprovisioning/events/events.go index 4150695ba7..23a357e38d 100644 --- a/pkg/controllers/deprovisioning/events/events.go +++ b/pkg/controllers/deprovisioning/events/events.go @@ -23,6 +23,16 @@ import ( "github.com/aws/karpenter-core/pkg/events" ) +func BlockedDeprovisioning(node *v1.Node, reason string) events.Event { + return events.Event{ + InvolvedObject: node, + Type: v1.EventTypeNormal, + Reason: "BlockedDeprovisioning", + Message: fmt.Sprintf("Cannot deprovision node due to %s", reason), + DedupeValues: []string{node.Name, reason}, + } +} + func TerminatingNode(node *v1.Node, reason string) events.Event { return events.Event{ InvolvedObject: node, diff --git a/pkg/controllers/deprovisioning/expiration.go b/pkg/controllers/deprovisioning/expiration.go index c3f9319b62..7753c89d5b 100644 --- a/pkg/controllers/deprovisioning/expiration.go +++ b/pkg/controllers/deprovisioning/expiration.go @@ -30,8 +30,10 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "github.com/aws/karpenter-core/pkg/apis/v1alpha5" + deprovisioningevents "github.com/aws/karpenter-core/pkg/controllers/deprovisioning/events" "github.com/aws/karpenter-core/pkg/controllers/provisioning" "github.com/aws/karpenter-core/pkg/controllers/state" + "github.com/aws/karpenter-core/pkg/events" "github.com/aws/karpenter-core/pkg/metrics" ) @@ -42,14 +44,16 @@ type Expiration struct { kubeClient client.Client cluster *state.Cluster provisioner *provisioning.Provisioner + recorder events.Recorder } -func NewExpiration(clk clock.Clock, kubeClient client.Client, cluster *state.Cluster, provisioner *provisioning.Provisioner) *Expiration { +func NewExpiration(clk clock.Clock, kubeClient client.Client, cluster *state.Cluster, provisioner *provisioning.Provisioner, recorder events.Recorder) *Expiration { return &Expiration{ clock: clk, kubeClient: kubeClient, cluster: cluster, provisioner: provisioner, + recorder: recorder, } } @@ -73,9 +77,21 @@ func (e *Expiration) ComputeCommand(ctx context.Context, candidates ...Candidate if err != nil { return Command{}, fmt.Errorf("tracking PodDisruptionBudgets, %w", err) } - candidates = lo.Filter(candidates, func(n CandidateNode, _ int) bool { - _, canTerminate := canBeTerminated(n, pdbs) - return canTerminate + // filter out nodes that can't be terminated + candidates = lo.Filter(candidates, func(cn CandidateNode, _ int) bool { + if !cn.DeletionTimestamp.IsZero() { + return false + } + if pdb, ok := pdbs.CanEvictPods(cn.pods); !ok { + e.recorder.Publish(deprovisioningevents.BlockedDeprovisioning(cn.Node, fmt.Sprintf("pdb %s prevents pod evictions", pdb))) + return false + } + if p, ok := hasDoNotEvictPod(cn); ok { + e.recorder.Publish(deprovisioningevents.BlockedDeprovisioning(cn.Node, + fmt.Sprintf("pod %s/%s has do not evict annotation", p.Namespace, p.Name))) + return false + } + return true }) for _, candidate := range candidates { diff --git a/pkg/controllers/deprovisioning/helpers.go b/pkg/controllers/deprovisioning/helpers.go index 0d86943794..fc39c1f0ae 100644 --- a/pkg/controllers/deprovisioning/helpers.go +++ b/pkg/controllers/deprovisioning/helpers.go @@ -325,22 +325,11 @@ func mapNodes(nodes []*v1.Node, candidateNodes []CandidateNode) []CandidateNode return ret } -func canBeTerminated(node CandidateNode, pdbs *PDBLimits) (string, bool) { - if !node.DeletionTimestamp.IsZero() { - return "in the process of deletion", false - } - if pdbs != nil { - if pdb, ok := pdbs.CanEvictPods(node.pods); !ok { - return fmt.Sprintf("pdb %s prevents pod evictions", pdb), false - } - } - if p, ok := lo.Find(node.pods, func(p *v1.Pod) bool { +func hasDoNotEvictPod(cn CandidateNode) (*v1.Pod, bool) { + return lo.Find(cn.pods, func(p *v1.Pod) bool { if pod.IsTerminating(p) || pod.IsTerminal(p) || pod.IsOwnedByNode(p) { return false } return pod.HasDoNotEvict(p) - }); ok { - return fmt.Sprintf("pod %s/%s has do not evict annotation", p.Namespace, p.Name), false - } - return "", true + }) }