Skip to content

Commit

Permalink
Update README and add priority/namespace filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
damemi committed Oct 7, 2020
1 parent c343f39 commit 5523bc5
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 18 deletions.
42 changes: 26 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ The policy also includes common configuration for all the strategies:
- `evictLocalStoragePods` - allowing to evict pods with local storage
- `maxNoOfPodsToEvictPerNode` - maximum number of pods evicted from each node (summed through all strategies)

```
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
nodeSelector: prod=dev
Expand Down Expand Up @@ -129,7 +129,7 @@ has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considere
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|

**Example:**
```
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
Expand Down Expand Up @@ -171,7 +171,7 @@ These thresholds, `thresholds` and `targetThresholds`, could be tuned as per you

**Example:**

```
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
Expand Down Expand Up @@ -221,7 +221,7 @@ node.

**Example:**

```
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
Expand Down Expand Up @@ -258,7 +258,7 @@ podA gets evicted from nodeA.

**Example:**

```
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
Expand Down Expand Up @@ -286,7 +286,7 @@ and will be evicted.

**Example:**

````
````yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
Expand All @@ -297,15 +297,25 @@ strategies:
### RemovePodsViolatingTopologySpreadConstraint

This strategy makes sure that pods violating [topology spread constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/)
are evicted from nodes. This strategy requires k8s version 1.18 at a minimum. To disable this strategy, the
policy should look like:
are evicted from nodes. Specifically, it tries to evict the minimum number of pods required to balance topology domains to within each constraint's `maxSkew`.
This strategy requires k8s version 1.18 at a minimum.

```
**Parameters:**

|Name|Type|
|---|---|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|

**Example:**

```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemovePodsViolatingTopologySpreadConstraint":
enabled: false
enabled: true
```


Expand All @@ -328,7 +338,7 @@ which determines whether init container restarts should be factored into that ca

**Example:**

```
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
Expand Down Expand Up @@ -359,7 +369,7 @@ to `Running` and `Pending`.

**Example:**

```
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
Expand All @@ -386,7 +396,7 @@ The following strategies accept a `namespaces` parameter which allows to specify

For example:

```
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
Expand All @@ -404,7 +414,7 @@ strategies:
In the examples `PodLifeTime` gets executed only over `namespace1` and `namespace2`.
The similar holds for `exclude` field:

```
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
Expand Down Expand Up @@ -432,7 +442,7 @@ is set to the value of `system-cluster-critical` priority class.
E.g.

Setting `thresholdPriority`
```
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
Expand All @@ -445,7 +455,7 @@ strategies:
```

Setting `thresholdPriorityClassName`
```
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
Expand Down
39 changes: 37 additions & 2 deletions pkg/descheduler/strategies/topologyspreadconstraint.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,17 @@ package strategies

import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
"math"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
"sigs.k8s.io/descheduler/pkg/utils"
"sort"
)

Expand All @@ -40,18 +43,47 @@ type topology struct {
pods []*v1.Pod
}

func validateTopologySpreadParams(params *api.StrategyParameters) error {
// At most one of include/exclude can be set
if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 {
return fmt.Errorf("only one of Include/Exclude namespaces can be set")
}
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
}

return nil
}

func RemovePodsViolatingTopologySpreadConstraint(
ctx context.Context,
client clientset.Interface,
strategy api.DeschedulerStrategy,
nodes []*v1.Node,
podEvictor *evictions.PodEvictor,
) {
if err := validateTopologySpreadParams(strategy.Params); err != nil {
klog.ErrorS(err, "Invalid PodLifeTime parameters")
return
}

thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
if err != nil {
klog.ErrorS(err, "Failed to get threshold priority from strategy's params")
return
}

var includedNamespaces, excludedNamespaces sets.String
if strategy.Params.Namespaces != nil {
includedNamespaces = sets.NewString(strategy.Params.Namespaces.Include...)
excludedNamespaces = sets.NewString(strategy.Params.Namespaces.Exclude...)
}

nodeMap := make(map[string]*v1.Node, len(nodes))
for _, node := range nodes {
nodeMap[node.Name] = node
}
evictable := podEvictor.Evictable()
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority))

// 1. for each namespace for which there is Topology Constraint
// 2. for each TopologySpreadyConstraint in that namespace
Expand All @@ -74,6 +106,9 @@ func RemovePodsViolatingTopologySpreadConstraint(
podsForEviction := make(map[*v1.Pod]struct{})
// 1. for each namespace...
for _, namespace := range namespaces.Items {
if (!includedNamespaces.Has(namespace.Name) || excludedNamespaces.Has(namespace.Name)) && (includedNamespaces.Len()+excludedNamespaces.Len() > 0) {
continue
}
namespacePods, err := client.CoreV1().Pods(namespace.Name).List(ctx, metav1.ListOptions{})
if err != nil {
klog.ErrorS(err, "Couldn't list pods in namespace", "namespace", namespace)
Expand Down Expand Up @@ -196,7 +231,7 @@ func RemovePodsViolatingTopologySpreadConstraint(
// (This is the basic principle of keeping all sizes within ~skew of the average)
movePods := int(math.Min(
math.Min(math.Ceil(skew/2), float64(len(sortedTopologies[j].pods)-idealAvg)),
float64(idealAvg - len(sortedTopologies[i].pods))))
float64(idealAvg-len(sortedTopologies[i].pods))))
if movePods <= 0 {
i++
continue
Expand Down

0 comments on commit 5523bc5

Please sign in to comment.