Skip to content

Commit

Permalink
add ignorePodsWithoutPDB option (#1529)
Browse files Browse the repository at this point in the history
* add ignoreNonPDBPods option

* take2

* add test

* poddisruptionbudgets are now used by defaultevictor plugin

* add poddisruptionbudgets to rbac

* review comments

* don't use GetPodPodDisruptionBudgets

* review comment, don't hide error
  • Loading branch information
john7doe authored Oct 15, 2024
1 parent 7696f00 commit ef0c2c1
Show file tree
Hide file tree
Showing 10 changed files with 139 additions and 18 deletions.
23 changes: 12 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,18 +129,19 @@ These are top level keys in the Descheduler Policy that you can use to configure

The Default Evictor Plugin is used by default for filtering pods before processing them in an strategy plugin, or for applying a PreEvictionFilter of pods before eviction. You can also create your own Evictor Plugin or use the Default one provided by Descheduler. Other uses for the Evictor plugin can be to sort, filter, validate or group pods by different criteria, and that's why this is handled by a plugin and not configured in the top level config.

| Name |type| Default Value | Description |
|------|----|---------------|-------------|
| `nodeSelector` |`string`| `nil` | limiting the nodes which are processed |
| `evictLocalStoragePods` |`bool`| `false` | allows eviction of pods with local storage |
| Name |type| Default Value | Description |
|---------------------------|----|---------------|-----------------------------------------------------------------------------------------------------------------------------|
| `nodeSelector` |`string`| `nil` | limiting the nodes which are processed |
| `evictLocalStoragePods` |`bool`| `false` | allows eviction of pods with local storage |
| `evictSystemCriticalPods` |`bool`| `false` | [Warning: Will evict Kubernetes system pods] allows eviction of pods with any priority, including system pods like kube-dns |
| `ignorePvcPods` |`bool`| `false` | set whether PVC pods should be evicted or ignored |
| `evictFailedBarePods` |`bool`| `false` | allow eviction of pods without owner references and in failed phase |
|`labelSelector`|`metav1.LabelSelector`||(see [label filtering](#label-filtering))|
|`priorityThreshold`|`priorityThreshold`||(see [priority filtering](#priority-filtering))|
|`nodeFit`|`bool`|`false`|(see [node fit filtering](#node-fit-filtering))|
|`minReplicas`|`uint`|`0`| ignore eviction of pods where owner (e.g. `ReplicaSet`) replicas is below this threshold |
|`minPodAge`|`metav1.Duration`|`0`| ignore eviction of pods with a creation time within this threshold |
| `ignorePvcPods` |`bool`| `false` | set whether PVC pods should be evicted or ignored |
| `evictFailedBarePods` |`bool`| `false` | allow eviction of pods without owner references and in failed phase |
| `labelSelector` |`metav1.LabelSelector`|| (see [label filtering](#label-filtering)) |
| `priorityThreshold` |`priorityThreshold`|| (see [priority filtering](#priority-filtering)) |
| `nodeFit` |`bool`|`false`| (see [node fit filtering](#node-fit-filtering)) |
| `minReplicas` |`uint`|`0`| ignore eviction of pods where owner (e.g. `ReplicaSet`) replicas is below this threshold |
| `minPodAge` |`metav1.Duration`|`0`| ignore eviction of pods with a creation time within this threshold |
| `ignorePodsWithoutPDB` |`bool`|`false`| set whether pods without PodDisruptionBudget should be evicted or ignored |

### Example policy

Expand Down
3 changes: 3 additions & 0 deletions kubernetes/base/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ rules:
- apiGroups: ["scheduling.k8s.io"]
resources: ["priorityclasses"]
verbs: ["get", "watch", "list"]
- apiGroups: ["policy"]
resources: ["poddisruptionbudgets"]
verbs: ["get", "watch", "list"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["create", "update"]
Expand Down
8 changes: 6 additions & 2 deletions pkg/descheduler/descheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"strconv"
"time"

policyv1 "k8s.io/api/policy/v1"
schedulingv1 "k8s.io/api/scheduling/v1"
"k8s.io/apimachinery/pkg/runtime/schema"

Expand Down Expand Up @@ -132,8 +133,11 @@ func newDescheduler(rs *options.DeschedulerServer, deschedulerPolicy *api.Desche
v1.SchemeGroupVersion.WithResource("nodes"),
// Future work could be to let each plugin declare what type of resources it needs; that way dry runs would stay
// consistent with the real runs without having to keep the list here in sync.
v1.SchemeGroupVersion.WithResource("namespaces"), // Used by the defaultevictor plugin
schedulingv1.SchemeGroupVersion.WithResource("priorityclasses")) // Used by the defaultevictor plugin
v1.SchemeGroupVersion.WithResource("namespaces"), // Used by the defaultevictor plugin
schedulingv1.SchemeGroupVersion.WithResource("priorityclasses"), // Used by the defaultevictor plugin
policyv1.SchemeGroupVersion.WithResource("poddisruptionbudgets"), // Used by the defaultevictor plugin

) // Used by the defaultevictor plugin

getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
Expand Down
1 change: 1 addition & 0 deletions pkg/descheduler/policyconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ func setDefaultEvictor(profile api.DeschedulerProfile, client clientset.Interfac
EvictSystemCriticalPods: false,
IgnorePvcPods: false,
EvictFailedBarePods: false,
IgnorePodsWithoutPDB: false,
},
}

Expand Down
14 changes: 14 additions & 0 deletions pkg/framework/plugins/defaultevictor/defaultevictor.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,20 @@ func New(args runtime.Object, handle frameworktypes.Handle) (frameworktypes.Plug
return nil
})
}

if defaultEvictorArgs.IgnorePodsWithoutPDB {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
hasPdb, err := utils.IsPodCoveredByPDB(pod, handle.SharedInformerFactory().Policy().V1().PodDisruptionBudgets().Lister())
if err != nil {
return fmt.Errorf("unable to check if pod is covered by PodDisruptionBudget: %w", err)
}
if !hasPdb {
return fmt.Errorf("no PodDisruptionBudget found for pod")
}
return nil
})
}

return ev, nil
}

Expand Down
41 changes: 38 additions & 3 deletions pkg/framework/plugins/defaultevictor/defaultevictor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

v1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/uuid"
Expand All @@ -39,6 +40,7 @@ type testCase struct {
description string
pods []*v1.Pod
nodes []*v1.Node
pdbs []*policyv1.PodDisruptionBudget
evictFailedBarePods bool
evictLocalStoragePods bool
evictSystemCriticalPods bool
Expand All @@ -47,6 +49,7 @@ type testCase struct {
minReplicas uint
minPodAge *metav1.Duration
result bool
ignorePodsWithoutPDB bool
}

func TestDefaultEvictorPreEvictionFilter(t *testing.T) {
Expand Down Expand Up @@ -739,6 +742,33 @@ func TestDefaultEvictorFilter(t *testing.T) {
}),
},
result: true,
}, {
description: "ignorePodsWithoutPDB, pod with no PDBs, no eviction",
pods: []*v1.Pod{
test.BuildTestPod("p1", 1, 1, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Labels = map[string]string{
"app": "foo",
}
}),
},
ignorePodsWithoutPDB: true,
result: false,
}, {
description: "ignorePodsWithoutPDB, pod with PDBs, evicts",
pods: []*v1.Pod{
test.BuildTestPod("p1", 1, 1, n1.Name, func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Labels = map[string]string{
"app": "foo",
}
}),
},
pdbs: []*policyv1.PodDisruptionBudget{
test.BuildTestPDB("pdb1", "foo"),
},
ignorePodsWithoutPDB: true,
result: true,
},
}

Expand Down Expand Up @@ -811,11 +841,15 @@ func initializePlugin(ctx context.Context, test testCase) (frameworktypes.Plugin
for _, pod := range test.pods {
objs = append(objs, pod)
}
for _, pdb := range test.pdbs {
objs = append(objs, pdb)
}

fakeClient := fake.NewSimpleClientset(objs...)

sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods().Informer()
_ = sharedInformerFactory.Policy().V1().PodDisruptionBudgets().Lister()

getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
Expand All @@ -833,9 +867,10 @@ func initializePlugin(ctx context.Context, test testCase) (frameworktypes.Plugin
PriorityThreshold: &api.PriorityThreshold{
Value: test.priorityThreshold,
},
NodeFit: test.nodeFit,
MinReplicas: test.minReplicas,
MinPodAge: test.minPodAge,
NodeFit: test.nodeFit,
MinReplicas: test.minReplicas,
MinPodAge: test.minPodAge,
IgnorePodsWithoutPDB: test.ignorePodsWithoutPDB,
}

evictorPlugin, err := New(
Expand Down
7 changes: 5 additions & 2 deletions pkg/framework/plugins/defaultevictor/defaults_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ func TestSetDefaults_DefaultEvictorArgs(t *testing.T) {
LabelSelector: nil,
PriorityThreshold: nil,
NodeFit: false,
IgnorePodsWithoutPDB: false,
},
},
{
Expand All @@ -57,7 +58,8 @@ func TestSetDefaults_DefaultEvictorArgs(t *testing.T) {
PriorityThreshold: &api.PriorityThreshold{
Value: utilptr.To[int32](800),
},
NodeFit: true,
NodeFit: true,
IgnorePodsWithoutPDB: true,
},
want: &DefaultEvictorArgs{
NodeSelector: "NodeSelector",
Expand All @@ -70,7 +72,8 @@ func TestSetDefaults_DefaultEvictorArgs(t *testing.T) {
PriorityThreshold: &api.PriorityThreshold{
Value: utilptr.To[int32](800),
},
NodeFit: true,
NodeFit: true,
IgnorePodsWithoutPDB: true,
},
},
}
Expand Down
1 change: 1 addition & 0 deletions pkg/framework/plugins/defaultevictor/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ type DefaultEvictorArgs struct {
NodeFit bool `json:"nodeFit,omitempty"`
MinReplicas uint `json:"minReplicas,omitempty"`
MinPodAge *metav1.Duration `json:"minPodAge,omitempty"`
IgnorePodsWithoutPDB bool `json:"ignorePodsWithoutPDB,omitempty"`
}
37 changes: 37 additions & 0 deletions pkg/utils/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ package utils
import (
"fmt"

policy "k8s.io/api/policy/v1"
"k8s.io/apimachinery/pkg/labels"

policyv1 "k8s.io/client-go/listers/policy/v1"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -115,6 +120,38 @@ func IsPodWithPVC(pod *v1.Pod) bool {
return false
}

// IsPodCoveredByPDB returns true if the pod is covered by at least one PodDisruptionBudget.
func IsPodCoveredByPDB(pod *v1.Pod, lister policyv1.PodDisruptionBudgetLister) (bool, error) {
// We can't use the GetPodPodDisruptionBudgets expansion method here because it treats no pdb as an error,
// but we want to return false.

list, err := lister.PodDisruptionBudgets(pod.Namespace).List(labels.Everything())
if err != nil {
return false, err
}

if len(list) == 0 {
return false, nil
}

podLabels := labels.Set(pod.Labels)
var pdbList []*policy.PodDisruptionBudget
for _, pdb := range list {
selector, err := metav1.LabelSelectorAsSelector(pdb.Spec.Selector)
if err != nil {
// This object has an invalid selector, it will never match the pod
continue
}

if !selector.Matches(podLabels) {
continue
}
pdbList = append(pdbList, pdb)
}

return len(pdbList) > 0, nil
}

// GetPodSource returns the source of the pod based on the annotation.
func GetPodSource(pod *v1.Pod) (string, error) {
if pod.Annotations != nil {
Expand Down
22 changes: 22 additions & 0 deletions test/test_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ import (
"testing"
"time"

policyv1 "k8s.io/api/policy/v1"
"k8s.io/apimachinery/pkg/util/intstr"

appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
Expand Down Expand Up @@ -67,6 +70,25 @@ func BuildTestPod(name string, cpu, memory int64, nodeName string, apply func(*v
return pod
}

func BuildTestPDB(name, appLabel string) *policyv1.PodDisruptionBudget {
maxUnavailable := intstr.FromInt32(1)
pdb := &policyv1.PodDisruptionBudget{
ObjectMeta: metav1.ObjectMeta{
Namespace: "default",
Name: name,
},
Spec: policyv1.PodDisruptionBudgetSpec{
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": appLabel,
},
},
MaxUnavailable: &maxUnavailable,
},
}
return pdb
}

// GetMirrorPodAnnotation returns the annotation needed for mirror pod.
func GetMirrorPodAnnotation() map[string]string {
return map[string]string{
Expand Down

0 comments on commit ef0c2c1

Please sign in to comment.