Skip to content

Commit

Permalink
Prevent eviction hanging due to do-not-evict (#220)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-innis committed Feb 22, 2023
1 parent c4269ec commit 2fe0c76
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 265 deletions.
12 changes: 6 additions & 6 deletions pkg/controllers/deprovisioning/drift.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"errors"
"fmt"

"github.com/samber/lo"
v1 "k8s.io/api/core/v1"
"knative.dev/pkg/logging"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -61,13 +62,12 @@ func (d *Drift) ComputeCommand(ctx context.Context, candidates ...CandidateNode)
if err != nil {
return Command{}, fmt.Errorf("tracking PodDisruptionBudgets, %w", err)
}
for _, candidate := range candidates {
// is this a node that we can terminate? This check is meant to be fast so we can save the expense of simulated
// scheduling unless its really needed
if _, canTerminate := canBeTerminated(candidate, pdbs); !canTerminate {
continue
}
candidates = lo.Filter(candidates, func(n CandidateNode, _ int) bool {
_, canTerminate := canBeTerminated(n, pdbs)
return canTerminate
})

for _, candidate := range candidates {
// Check if we need to create any nodes.
newNodes, allPodsScheduled, err := simulateScheduling(ctx, d.kubeClient, d.cluster, d.provisioner, candidate)
if err != nil {
Expand Down
9 changes: 6 additions & 3 deletions pkg/controllers/deprovisioning/emptiness.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@ import (
"context"
"time"

"k8s.io/utils/clock"

v1 "k8s.io/api/core/v1"
"k8s.io/utils/clock"
"knative.dev/pkg/logging"
"knative.dev/pkg/ptr"

Expand Down Expand Up @@ -66,7 +65,11 @@ func (e *Emptiness) ShouldDeprovision(ctx context.Context, n *state.Node, provis

// ComputeCommand generates a deprovisioning command given deprovisionable nodes
func (e *Emptiness) ComputeCommand(_ context.Context, nodes ...CandidateNode) (Command, error) {
emptyNodes := lo.Filter(nodes, func(n CandidateNode, _ int) bool { return len(n.pods) == 0 })
emptyNodes := lo.Filter(nodes, func(n CandidateNode, _ int) bool {
_, canTerminate := canBeTerminated(n, nil)
return len(n.pods) == 0 && canTerminate
})

if len(emptyNodes) == 0 {
return Command{action: actionDoNothing}, nil
}
Expand Down
14 changes: 7 additions & 7 deletions pkg/controllers/deprovisioning/expiration.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"sort"
"time"

"github.com/samber/lo"
"k8s.io/utils/clock"

v1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -72,13 +73,12 @@ func (e *Expiration) ComputeCommand(ctx context.Context, candidates ...Candidate
if err != nil {
return Command{}, fmt.Errorf("tracking PodDisruptionBudgets, %w", err)
}
for _, candidate := range candidates {
// is this a node that we can terminate? This check is meant to be fast so we can save the expense of simulated
// scheduling unless its really needed
if _, canBeTerminated := canBeTerminated(candidate, pdbs); !canBeTerminated {
continue
}
candidates = lo.Filter(candidates, func(n CandidateNode, _ int) bool {
_, canTerminate := canBeTerminated(n, pdbs)
return canTerminate
})

for _, candidate := range candidates {
// Check if we need to create any nodes.
newNodes, allPodsScheduled, err := simulateScheduling(ctx, e.kubeClient, e.cluster, e.provisioner, candidate)
if err != nil {
Expand All @@ -93,7 +93,7 @@ func (e *Expiration) ComputeCommand(ctx context.Context, candidates ...Candidate
logging.FromContext(ctx).With("node", candidate.Name).Debugf("continuing to expire node after scheduling simulation failed to schedule all pods")
}

logging.FromContext(ctx).With("expirationTTL", time.Duration(ptr.Int64Value(candidates[0].provisioner.Spec.TTLSecondsUntilExpired))*time.Second).
logging.FromContext(ctx).With("ttl", time.Duration(ptr.Int64Value(candidates[0].provisioner.Spec.TTLSecondsUntilExpired))*time.Second).
With("delay", time.Since(getExpirationTime(candidates[0].Node, candidates[0].provisioner))).Infof("triggering termination for expired node after TTL")

// were we able to schedule all the pods on the inflight nodes?
Expand Down
29 changes: 10 additions & 19 deletions pkg/controllers/deprovisioning/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,27 +329,18 @@ func canBeTerminated(node CandidateNode, pdbs *PDBLimits) (string, bool) {
if !node.DeletionTimestamp.IsZero() {
return "in the process of deletion", false
}
if pdb, ok := pdbs.CanEvictPods(node.pods); !ok {
return fmt.Sprintf("pdb %s prevents pod evictions", pdb), false
}

if reason, ok := PodsPreventEviction(node.pods); ok {
return reason, false
if pdbs != nil {
if pdb, ok := pdbs.CanEvictPods(node.pods); !ok {
return fmt.Sprintf("pdb %s prevents pod evictions", pdb), false
}
}
return "", true
}

// PodsPreventEviction returns true if there are pods that would prevent eviction
func PodsPreventEviction(pods []*v1.Pod) (string, bool) {
for _, p := range pods {
// don't care about pods that are finishing, finished or owned by the node
if p, ok := lo.Find(node.pods, func(p *v1.Pod) bool {
if pod.IsTerminating(p) || pod.IsTerminal(p) || pod.IsOwnedByNode(p) {
continue
}

if pod.HasDoNotEvict(p) {
return fmt.Sprintf("pod %s/%s has do not evict annotation", p.Namespace, p.Name), true
return false
}
return pod.HasDoNotEvict(p)
}); ok {
return fmt.Sprintf("pod %s/%s has do not evict annotation", p.Namespace, p.Name), false
}
return "", false
return "", true
}
3 changes: 2 additions & 1 deletion pkg/controllers/deprovisioning/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,7 @@ var _ = Describe("Delete Node", func() {
ExpectNotFound(ctx, env.Client, node1)
})
It("can delete nodes, considers do-not-evict", func() {
// create our RS so we can link a pod to it
// create our RS, so we can link a pod to it
rs := test.ReplicaSet()
ExpectApplied(ctx, env.Client, rs)
Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(rs), rs)).To(Succeed())
Expand Down Expand Up @@ -1000,6 +1000,7 @@ var _ = Describe("Delete Node", func() {
ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node2))

fakeClock.Step(10 * time.Minute)

var wg sync.WaitGroup
ExpectTriggerVerifyAction(&wg)
_, err := deprovisioningController.Reconcile(ctx, reconcile.Request{})
Expand Down
9 changes: 0 additions & 9 deletions pkg/controllers/inflightchecks/termination.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ func (t *Termination) Check(ctx context.Context, node *v1.Node, provisioner *v1a
if node.DeletionTimestamp.IsZero() {
return nil, nil
}

pods, err := nodeutils.GetNodePods(ctx, t.kubeClient, node)
if err != nil {
return nil, err
Expand All @@ -54,13 +53,5 @@ func (t *Termination) Check(ctx context.Context, node *v1.Node, provisioner *v1a
message: fmt.Sprintf("Can't drain node, PDB %s is blocking evictions", pdb),
})
}

if reason, ok := deprovisioning.PodsPreventEviction(pods); ok {
issues = append(issues, Issue{
node: node,
message: fmt.Sprintf("Can't drain node, %s", reason),
})
}

return issues, nil
}
3 changes: 0 additions & 3 deletions pkg/controllers/machine/terminator/terminator.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,6 @@ func (t *Terminator) Drain(ctx context.Context, node *v1.Node) error {
var podsToEvict []*v1.Pod
// Skip node due to pods that are not able to be evicted
for _, p := range pods {
if podutil.HasDoNotEvict(p) {
return NewNodeDrainError(fmt.Errorf("pod %s/%s has do-not-evict annotation", p.Namespace, p.Name))
}
// Ignore if unschedulable is tolerated, since they will reschedule
if podutil.ToleratesUnschedulableTaint(p) {
continue
Expand Down
Loading

0 comments on commit 2fe0c76

Please sign in to comment.