Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

skip eviction when replica count is below evictor minReplicas threshold setting #1257

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ The Default Evictor Plugin is used by default for filtering pods before processi
|`labelSelector`|`metav1.LabelSelector`||(see [label filtering](#label-filtering))|
|`priorityThreshold`|`priorityThreshold`||(see [priority filtering](#priority-filtering))|
|`nodeFit`|`bool`|`false`|(see [node fit filtering](#node-fit-filtering))|
|`minReplicas`|`uint`|`0`| ignore eviction of pods where owner (e.g. `ReplicaSet`) replicas is below this threshold |

### Example policy

Expand All @@ -166,6 +167,7 @@ profiles:
evictFailedBarePods: true
evictLocalStoragePods: true
nodeFit: true
minReplicas: 2
plugins:
# DefaultEvictor is enabled for both `filter` and `preEvictionFilter`
# filter:
Expand Down
34 changes: 23 additions & 11 deletions pkg/descheduler/pod/pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,21 +142,25 @@ func BuildGetPodsAssignedToNodeFunc(podInformer cache.SharedIndexInformer) (GetP
if err != nil {
return nil, err
}
pods := make([]*v1.Pod, 0, len(objs))
for _, obj := range objs {
pod, ok := obj.(*v1.Pod)
if !ok {
continue
}
if filter(pod) {
pods = append(pods, pod)
}
}
return pods, nil
return ConvertToPods(objs, filter), nil
}
return getPodsAssignedToNode, nil
}

func ConvertToPods(objs []interface{}, filter FilterFunc) []*v1.Pod {
pods := make([]*v1.Pod, 0, len(objs))
for _, obj := range objs {
pod, ok := obj.(*v1.Pod)
if !ok {
continue
}
if filter == nil || filter(pod) {
pods = append(pods, pod)
}
}
return pods
}

// ListPodsOnNodes returns all pods on given nodes.
func ListPodsOnNodes(nodes []*v1.Node, getPodsAssignedToNode GetPodsAssignedToNodeFunc, filter FilterFunc) ([]*v1.Pod, error) {
pods := make([]*v1.Pod, 0)
Expand Down Expand Up @@ -215,6 +219,14 @@ func OwnerRef(pod *v1.Pod) []metav1.OwnerReference {
return pod.ObjectMeta.GetOwnerReferences()
}

func OwnerRefUIDs(pod *v1.Pod) []string {
var ownerRefUIDs []string
for _, ownerRef := range OwnerRef(pod) {
ownerRefUIDs = append(ownerRefUIDs, string(ownerRef.UID))
}
return ownerRefUIDs
}

func IsBestEffortPod(pod *v1.Pod) bool {
return utils.GetPodQOS(pod) == v1.PodQOSBestEffort
}
Expand Down
55 changes: 53 additions & 2 deletions pkg/framework/plugins/defaultevictor/defaultevictor.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@ package defaultevictor
import (
// "context"
"context"
"errors"
"fmt"

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/errors"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
Expand Down Expand Up @@ -143,6 +145,36 @@ func New(args runtime.Object, handle frameworktypes.Handle) (frameworktypes.Plug
})
}

if defaultEvictorArgs.MinReplicas > 1 {
indexName := "metadata.ownerReferences"
indexer, err := getPodIndexerByOwnerRefs(indexName, handle)
if err != nil {
return nil, err
}
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
if len(pod.OwnerReferences) == 0 {
return nil
}

if len(pod.OwnerReferences) > 1 {
klog.V(5).InfoS("pod has multiple owner references which is not supported for minReplicas check", "size", len(pod.OwnerReferences), "pod", klog.KObj(pod))
return nil
}

ownerRef := pod.OwnerReferences[0]
objs, err := indexer.ByIndex(indexName, string(ownerRef.UID))
if err != nil {
return fmt.Errorf("unable to list pods for minReplicas filter in the policy parameter")
}

if uint(len(objs)) < defaultEvictorArgs.MinReplicas {
return fmt.Errorf("owner has %d replicas which is less than minReplicas of %d", len(objs), defaultEvictorArgs.MinReplicas)
}

return nil
})
}

return ev, nil
}

Expand Down Expand Up @@ -199,9 +231,28 @@ func (d *DefaultEvictor) Filter(pod *v1.Pod) bool {
}

if len(checkErrs) > 0 {
klog.V(4).InfoS("Pod fails the following checks", "pod", klog.KObj(pod), "checks", errors.NewAggregate(checkErrs).Error())
klog.V(4).InfoS("Pod fails the following checks", "pod", klog.KObj(pod), "checks", utilerrors.NewAggregate(checkErrs).Error())
return false
}

return true
}

func getPodIndexerByOwnerRefs(indexName string, handle frameworktypes.Handle) (cache.Indexer, error) {
podInformer := handle.SharedInformerFactory().Core().V1().Pods().Informer()
if err := podInformer.AddIndexers(cache.Indexers{
indexName: func(obj interface{}) ([]string, error) {
pod, ok := obj.(*v1.Pod)
if !ok {
return []string{}, errors.New("unexpected object")
}

return podutil.OwnerRefUIDs(pod), nil
},
}); err != nil {
return nil, err
}

indexer := podInformer.GetIndexer()
return indexer, nil
}
72 changes: 59 additions & 13 deletions pkg/framework/plugins/defaultevictor/defaultevictor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
"sigs.k8s.io/descheduler/pkg/api"
Expand Down Expand Up @@ -363,6 +364,8 @@ func TestDefaultEvictorFilter(t *testing.T) {
nodeTaintKey := "hardware"
nodeTaintValue := "gpu"

ownerRefUUID := uuid.NewUUID()

type testCase struct {
description string
pods []*v1.Pod
Expand All @@ -372,6 +375,7 @@ func TestDefaultEvictorFilter(t *testing.T) {
evictSystemCriticalPods bool
priorityThreshold *int32
nodeFit bool
minReplicas uint
result bool
}

Expand Down Expand Up @@ -527,7 +531,7 @@ func TestDefaultEvictorFilter(t *testing.T) {
evictSystemCriticalPods: false,
result: true,
}, {
description: "Pod not evicted becasuse it is part of a daemonSet",
description: "Pod not evicted because it is part of a daemonSet",
pods: []*v1.Pod{
test.BuildTestPod("p8", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
Expand All @@ -538,7 +542,7 @@ func TestDefaultEvictorFilter(t *testing.T) {
evictSystemCriticalPods: false,
result: false,
}, {
description: "Pod is evicted becasuse it is part of a daemonSet, but it has scheduler.alpha.kubernetes.io/evict annotation",
description: "Pod is evicted because it is part of a daemonSet, but it has scheduler.alpha.kubernetes.io/evict annotation",
pods: []*v1.Pod{
test.BuildTestPod("p9", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
Expand All @@ -549,7 +553,7 @@ func TestDefaultEvictorFilter(t *testing.T) {
evictSystemCriticalPods: false,
result: true,
}, {
description: "Pod not evicted becasuse it is a mirror poddsa",
description: "Pod not evicted because it is a mirror poddsa",
pods: []*v1.Pod{
test.BuildTestPod("p10", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
Expand All @@ -560,7 +564,7 @@ func TestDefaultEvictorFilter(t *testing.T) {
evictSystemCriticalPods: false,
result: false,
}, {
description: "Pod is evicted becasuse it is a mirror pod, but it has scheduler.alpha.kubernetes.io/evict annotation",
description: "Pod is evicted because it is a mirror pod, but it has scheduler.alpha.kubernetes.io/evict annotation",
pods: []*v1.Pod{
test.BuildTestPod("p11", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
Expand All @@ -572,7 +576,7 @@ func TestDefaultEvictorFilter(t *testing.T) {
evictSystemCriticalPods: false,
result: true,
}, {
description: "Pod not evicted becasuse it has system critical priority",
description: "Pod not evicted because it has system critical priority",
pods: []*v1.Pod{
test.BuildTestPod("p12", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
Expand All @@ -584,7 +588,7 @@ func TestDefaultEvictorFilter(t *testing.T) {
evictSystemCriticalPods: false,
result: false,
}, {
description: "Pod is evicted becasuse it has system critical priority, but it has scheduler.alpha.kubernetes.io/evict annotation",
description: "Pod is evicted because it has system critical priority, but it has scheduler.alpha.kubernetes.io/evict annotation",
pods: []*v1.Pod{
test.BuildTestPod("p13", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
Expand All @@ -599,7 +603,7 @@ func TestDefaultEvictorFilter(t *testing.T) {
evictSystemCriticalPods: false,
result: true,
}, {
description: "Pod not evicted becasuse it has a priority higher than the configured priority threshold",
description: "Pod not evicted because it has a priority higher than the configured priority threshold",
pods: []*v1.Pod{
test.BuildTestPod("p14", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
Expand All @@ -611,7 +615,7 @@ func TestDefaultEvictorFilter(t *testing.T) {
priorityThreshold: &lowPriority,
result: false,
}, {
description: "Pod is evicted becasuse it has a priority higher than the configured priority threshold, but it has scheduler.alpha.kubernetes.io/evict annotation",
description: "Pod is evicted because it has a priority higher than the configured priority threshold, but it has scheduler.alpha.kubernetes.io/evict annotation",
pods: []*v1.Pod{
test.BuildTestPod("p15", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
Expand All @@ -624,7 +628,7 @@ func TestDefaultEvictorFilter(t *testing.T) {
priorityThreshold: &lowPriority,
result: true,
}, {
description: "Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true",
description: "Pod is evicted because it has system critical priority, but evictSystemCriticalPods = true",
pods: []*v1.Pod{
test.BuildTestPod("p16", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
Expand All @@ -636,7 +640,7 @@ func TestDefaultEvictorFilter(t *testing.T) {
evictSystemCriticalPods: true,
result: true,
}, {
description: "Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation",
description: "Pod is evicted because it has system critical priority, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation",
pods: []*v1.Pod{
test.BuildTestPod("p16", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
Expand All @@ -649,7 +653,7 @@ func TestDefaultEvictorFilter(t *testing.T) {
evictSystemCriticalPods: true,
result: true,
}, {
description: "Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true",
description: "Pod is evicted because it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true",
pods: []*v1.Pod{
test.BuildTestPod("p17", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
Expand All @@ -661,7 +665,7 @@ func TestDefaultEvictorFilter(t *testing.T) {
priorityThreshold: &lowPriority,
result: true,
}, {
description: "Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation",
description: "Pod is evicted because it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation",
pods: []*v1.Pod{
test.BuildTestPod("p17", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
Expand Down Expand Up @@ -704,6 +708,47 @@ func TestDefaultEvictorFilter(t *testing.T) {
evictSystemCriticalPods: false,
nodeFit: true,
result: true,
}, {
description: "minReplicas of 2, owner with 2 replicas, evicts",
pods: []*v1.Pod{
test.BuildTestPod("p1", 1, 1, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.ObjectMeta.OwnerReferences[0].UID = ownerRefUUID
}),
test.BuildTestPod("p2", 1, 1, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.ObjectMeta.OwnerReferences[0].UID = ownerRefUUID
}),
},
minReplicas: 2,
result: true,
}, {
description: "minReplicas of 3, owner with 2 replicas, no eviction",
pods: []*v1.Pod{
test.BuildTestPod("p1", 1, 1, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.ObjectMeta.OwnerReferences[0].UID = ownerRefUUID
}),
test.BuildTestPod("p2", 1, 1, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.ObjectMeta.OwnerReferences[0].UID = ownerRefUUID
}),
},
minReplicas: 3,
result: false,
}, {
description: "minReplicas of 2, multiple owners, no eviction",
pods: []*v1.Pod{
test.BuildTestPod("p1", 1, 1, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = append(test.GetNormalPodOwnerRefList(), test.GetNormalPodOwnerRefList()...)
pod.ObjectMeta.OwnerReferences[0].UID = ownerRefUUID
}),
test.BuildTestPod("p2", 1, 1, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
}),
},
minReplicas: 2,
result: true,
},
}

Expand Down Expand Up @@ -741,7 +786,8 @@ func TestDefaultEvictorFilter(t *testing.T) {
PriorityThreshold: &api.PriorityThreshold{
Value: test.priorityThreshold,
},
NodeFit: test.nodeFit,
NodeFit: test.nodeFit,
MinReplicas: test.minReplicas,
}

evictorPlugin, err := New(
Expand Down
1 change: 1 addition & 0 deletions pkg/framework/plugins/defaultevictor/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,5 @@ type DefaultEvictorArgs struct {
LabelSelector *metav1.LabelSelector `json:"labelSelector"`
PriorityThreshold *api.PriorityThreshold `json:"priorityThreshold"`
NodeFit bool `json:"nodeFit"`
MinReplicas uint `json:"minReplicas"`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So 0 and 1 means no effect. Will it ever make sense to interpret minReplicas=0 differently? Owner with 0 replicas means no pods. Owner with a single replica means the current behavior = skip the check. Would it make more sense to define minAdjointReplicas instead? To avoid the case where minReplicas=0. Just thinking aloud.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please feel free to take over this PR.

At the moment, I have other higher priority items and have low confidence in any movement on this PR (been around since October of last year).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will put it on my todo list where it will probably stay for some time before I find some free cycle for some more comprehensive though. Thank you for pushing this forward.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

0 replicas mean a pod does not exist. Which is never a case. Fine to keep it as default = disabled.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a warning message in validator to print that it's a no-op when set to 1

}
6 changes: 6 additions & 0 deletions pkg/framework/plugins/defaultevictor/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ package defaultevictor
import (
"fmt"

"k8s.io/klog/v2"

"k8s.io/apimachinery/pkg/runtime"
)

Expand All @@ -26,5 +28,9 @@ func ValidateDefaultEvictorArgs(obj runtime.Object) error {
return fmt.Errorf("priority threshold misconfigured, only one of priorityThreshold fields can be set, got %v", args)
}

if args.MinReplicas == 1 {
klog.V(4).Info("DefaultEvictor minReplicas must be greater than 1 to check for min pods during eviction. This check will be ignored during eviction.")
}

return nil
}
11 changes: 11 additions & 0 deletions test/e2e/e2e_duplicatepods_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ func TestRemoveDuplicates(t *testing.T) {
replicasNum int
beforeFunc func(deployment *appsv1.Deployment)
expectedEvictedPodCount uint
minReplicas uint
}{
{
description: "Evict Pod even Pods schedule to specific node",
Expand Down Expand Up @@ -135,6 +136,15 @@ func TestRemoveDuplicates(t *testing.T) {
},
expectedEvictedPodCount: 2,
},
{
description: "Ignores eviction with minReplicas of 4",
replicasNum: 3,
beforeFunc: func(deployment *appsv1.Deployment) {
deployment.Spec.Replicas = pointer.Int32(3)
},
expectedEvictedPodCount: 0,
minReplicas: 4,
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
Expand Down Expand Up @@ -179,6 +189,7 @@ func TestRemoveDuplicates(t *testing.T) {
IgnorePvcPods: false,
EvictFailedBarePods: false,
NodeFit: false,
MinReplicas: tc.minReplicas,
}

evictorFilter, err := defaultevictor.New(
Expand Down
Loading