Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move maximum-pods-per-nodes-evicted logic under a single invocation #266

Merged
merged 2 commits into from
Apr 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions pkg/descheduler/descheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ import (
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/client"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
"sigs.k8s.io/descheduler/pkg/descheduler/strategies"
"sigs.k8s.io/descheduler/pkg/utils"
)

func Run(rs *options.DeschedulerServer) error {
Expand Down Expand Up @@ -77,13 +77,19 @@ func RunDeschedulerStrategies(rs *options.DeschedulerServer, deschedulerPolicy *
return
}

nodePodCount := utils.InitializeNodePodCount(nodes)

strategies.RemoveDuplicatePods(rs, deschedulerPolicy.Strategies["RemoveDuplicates"], evictionPolicyGroupVersion, nodes, nodePodCount)
strategies.LowNodeUtilization(rs, deschedulerPolicy.Strategies["LowNodeUtilization"], evictionPolicyGroupVersion, nodes, nodePodCount)
strategies.RemovePodsViolatingInterPodAntiAffinity(rs, deschedulerPolicy.Strategies["RemovePodsViolatingInterPodAntiAffinity"], evictionPolicyGroupVersion, nodes, nodePodCount)
strategies.RemovePodsViolatingNodeAffinity(rs, deschedulerPolicy.Strategies["RemovePodsViolatingNodeAffinity"], evictionPolicyGroupVersion, nodes, nodePodCount)
strategies.RemovePodsViolatingNodeTaints(rs, deschedulerPolicy.Strategies["RemovePodsViolatingNodeTaints"], evictionPolicyGroupVersion, nodes, nodePodCount)
podEvictor := evictions.NewPodEvictor(
rs.Client,
evictionPolicyGroupVersion,
rs.DryRun,
rs.MaxNoOfPodsToEvictPerNode,
nodes,
)

strategies.RemoveDuplicatePods(rs, deschedulerPolicy.Strategies["RemoveDuplicates"], nodes, podEvictor)
strategies.LowNodeUtilization(rs, deschedulerPolicy.Strategies["LowNodeUtilization"], nodes, podEvictor)
strategies.RemovePodsViolatingInterPodAntiAffinity(rs, deschedulerPolicy.Strategies["RemovePodsViolatingInterPodAntiAffinity"], nodes, podEvictor)
strategies.RemovePodsViolatingNodeAffinity(rs, deschedulerPolicy.Strategies["RemovePodsViolatingNodeAffinity"], nodes, podEvictor)
strategies.RemovePodsViolatingNodeTaints(rs, deschedulerPolicy.Strategies["RemovePodsViolatingNodeTaints"], nodes, podEvictor)

// If there was no interval specified, send a signal to the stopChannel to end the wait.Until loop after 1 iteration
if rs.DeschedulingInterval.Seconds() == 0 {
Expand Down
77 changes: 72 additions & 5 deletions pkg/descheduler/evictions/evictions.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,72 @@ import (
eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils"
)

// nodePodEvictedCount keeps count of pods evicted on node
type nodePodEvictedCount map[*v1.Node]int

type PodEvictor struct {
client clientset.Interface
policyGroupVersion string
dryRun bool
maxPodsToEvict int
nodepodCount nodePodEvictedCount
}

func NewPodEvictor(
client clientset.Interface,
policyGroupVersion string,
dryRun bool,
maxPodsToEvict int,
nodes []*v1.Node,
) *PodEvictor {
var nodePodCount = make(nodePodEvictedCount)
for _, node := range nodes {
// Initialize podsEvicted till now with 0.
nodePodCount[node] = 0
}

return &PodEvictor{
client: client,
policyGroupVersion: policyGroupVersion,
dryRun: dryRun,
maxPodsToEvict: maxPodsToEvict,
nodepodCount: nodePodCount,
}
}

// NodeEvicted gives a number of pods evicted for node
func (pe *PodEvictor) NodeEvicted(node *v1.Node) int {
return pe.nodepodCount[node]
}

// TotalEvicted gives a number of pods evicted through all nodes
func (pe *PodEvictor) TotalEvicted() int {
var total int
for _, count := range pe.nodepodCount {
total += count
}
return total
}

// EvictPod returns non-nil error only when evicting a pod on a node is not
// possible (due to maxPodsToEvict constraint). Success is true when the pod
// is evicted on the server side.
func (pe *PodEvictor) EvictPod(pod *v1.Pod, node *v1.Node) (success bool, err error) {
if pe.maxPodsToEvict > 0 && pe.nodepodCount[node]+1 > pe.maxPodsToEvict {
return false, fmt.Errorf("Maximum number %v of evicted pods per %q node reached", pe.maxPodsToEvict, node.Name)
}

success, err = EvictPod(pe.client, pod, pe.policyGroupVersion, pe.dryRun)
if success {
pe.nodepodCount[node]++
klog.V(1).Infof("Evicted pod: %#v (%#v)", pod.Name, err)
return success, nil
}
// err is used only for logging purposes
klog.Errorf("Error when evicting pod: %#v (%#v)", pod.Name, err)
return false, nil
}

func EvictPod(client clientset.Interface, pod *v1.Pod, policyGroupVersion string, dryRun bool) (bool, error) {
if dryRun {
return true, nil
Expand All @@ -58,11 +124,12 @@ func EvictPod(client clientset.Interface, pod *v1.Pod, policyGroupVersion string
r := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "sigs.k8s.io.descheduler"})
r.Event(pod, v1.EventTypeNormal, "Descheduled", "pod evicted by sigs.k8s.io/descheduler")
return true, nil
} else if apierrors.IsTooManyRequests(err) {
}
if apierrors.IsTooManyRequests(err) {
return false, fmt.Errorf("error when evicting pod (ignoring) %q: %v", pod.Name, err)
} else if apierrors.IsNotFound(err) {
return true, fmt.Errorf("pod not found when evicting %q: %v", pod.Name, err)
} else {
return false, err
}
if apierrors.IsNotFound(err) {
return false, fmt.Errorf("pod not found when evicting %q: %v", pod.Name, err)
}
return false, err
}
24 changes: 9 additions & 15 deletions pkg/descheduler/strategies/duplicates.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import (
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)

//type creator string
Expand All @@ -36,16 +35,20 @@ type DuplicatePodsMap map[string][]*v1.Pod
// RemoveDuplicatePods removes the duplicate pods on node. This strategy evicts all duplicate pods on node.
// A pod is said to be a duplicate of other if both of them are from same creator, kind and are within the same
// namespace. As of now, this strategy won't evict daemonsets, mirror pods, critical pods and pods with local storages.
func RemoveDuplicatePods(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, policyGroupVersion string, nodes []*v1.Node, nodepodCount utils.NodePodEvictedCount) {
func RemoveDuplicatePods(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
if !strategy.Enabled {
return
}
deleteDuplicatePods(ds.Client, policyGroupVersion, nodes, ds.DryRun, nodepodCount, ds.MaxNoOfPodsToEvictPerNode, ds.EvictLocalStoragePods)
deleteDuplicatePods(ds.Client, nodes, ds.EvictLocalStoragePods, podEvictor)
}

// deleteDuplicatePods evicts the pod from node and returns the count of evicted pods.
func deleteDuplicatePods(client clientset.Interface, policyGroupVersion string, nodes []*v1.Node, dryRun bool, nodepodCount utils.NodePodEvictedCount, maxPodsToEvict int, evictLocalStoragePods bool) int {
podsEvicted := 0
func deleteDuplicatePods(
client clientset.Interface,
nodes []*v1.Node,
evictLocalStoragePods bool,
podEvictor *evictions.PodEvictor,
) {
for _, node := range nodes {
klog.V(1).Infof("Processing node: %#v", node.Name)
dpm := ListDuplicatePodsOnANode(client, node, evictLocalStoragePods)
Expand All @@ -54,22 +57,13 @@ func deleteDuplicatePods(client clientset.Interface, policyGroupVersion string,
klog.V(1).Infof("%#v", creator)
// i = 0 does not evict the first pod
for i := 1; i < len(pods); i++ {
if maxPodsToEvict > 0 && nodepodCount[node]+1 > maxPodsToEvict {
if _, err := podEvictor.EvictPod(pods[i], node); err != nil {
break
}
success, err := evictions.EvictPod(client, pods[i], policyGroupVersion, dryRun)
if !success {
klog.Infof("Error when evicting pod: %#v (%#v)", pods[i].Name, err)
} else {
nodepodCount[node]++
klog.V(1).Infof("Evicted pod: %#v (%#v)", pods[i].Name, err)
}
}
}
}
podsEvicted += nodepodCount[node]
}
return podsEvicted
}

// ListDuplicatePodsOnANode lists duplicate pods on a given node.
Expand Down
15 changes: 11 additions & 4 deletions pkg/descheduler/strategies/duplicates_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
"sigs.k8s.io/descheduler/pkg/utils"
"sigs.k8s.io/descheduler/test"
)
Expand Down Expand Up @@ -127,17 +128,23 @@ func TestFindDuplicatePods(t *testing.T) {
}

for _, testCase := range testCases {

npe := utils.NodePodEvictedCount{}
npe[node] = 0
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.PodList{Items: testCase.pods}, nil
})
fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) {
return true, node, nil
})
podsEvicted := deleteDuplicatePods(fakeClient, "v1", []*v1.Node{node}, false, npe, testCase.maxPodsToEvict, false)
podEvictor := evictions.NewPodEvictor(
fakeClient,
"v1",
false,
testCase.maxPodsToEvict,
[]*v1.Node{node},
)

deleteDuplicatePods(fakeClient, []*v1.Node{node}, false, podEvictor)
podsEvicted := podEvictor.TotalEvicted()
if podsEvicted != testCase.expectedEvictedPodCount {
t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", testCase.description, testCase.expectedEvictedPodCount, podsEvicted)
}
Expand Down
64 changes: 22 additions & 42 deletions pkg/descheduler/strategies/lownodeutilization.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ type NodeUsageMap struct {

type NodePodsMap map[*v1.Node][]*v1.Pod

func LowNodeUtilization(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, evictionPolicyGroupVersion string, nodes []*v1.Node, nodepodCount utils.NodePodEvictedCount) {
func LowNodeUtilization(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
if !strategy.Enabled {
return
}
Expand Down Expand Up @@ -87,18 +87,14 @@ func LowNodeUtilization(ds *options.DeschedulerServer, strategy api.DeschedulerS
targetThresholds[v1.ResourceCPU], targetThresholds[v1.ResourceMemory], targetThresholds[v1.ResourcePods])
klog.V(1).Infof("Total number of nodes above target utilization: %v", len(targetNodes))

totalPodsEvicted := evictPodsFromTargetNodes(
ds.Client,
evictionPolicyGroupVersion,
evictPodsFromTargetNodes(
targetNodes,
lowNodes,
targetThresholds,
ds.DryRun,
ds.MaxNoOfPodsToEvictPerNode,
ds.EvictLocalStoragePods,
nodepodCount)
klog.V(1).Infof("Total number of pods evicted: %v", totalPodsEvicted)
podEvictor)

klog.V(1).Infof("Total number of pods evicted: %v", podEvictor.TotalEvicted())
}

func validateThresholds(thresholds api.ResourceThresholds) bool {
Expand Down Expand Up @@ -163,17 +159,11 @@ func classifyNodes(npm NodePodsMap, thresholds api.ResourceThresholds, targetThr
// evicts them based on QoS as fallback option.
// TODO: @ravig Break this function into smaller functions.
func evictPodsFromTargetNodes(
client clientset.Interface,
evictionPolicyGroupVersion string,
targetNodes,
lowNodes []NodeUsageMap,
targetNodes, lowNodes []NodeUsageMap,
targetThresholds api.ResourceThresholds,
dryRun bool,
maxPodsToEvict int,
evictLocalStoragePods bool,
nodepodCount utils.NodePodEvictedCount,
) int {
podsEvicted := 0
podEvictor *evictions.PodEvictor,
) {

SortNodesByUsage(targetNodes)

Expand Down Expand Up @@ -212,7 +202,6 @@ func evictPodsFromTargetNodes(
nodeCapacity = node.node.Status.Allocatable
}
klog.V(3).Infof("evicting pods from node %#v with usage: %#v", node.node.Name, node.usage)
currentPodsEvicted := nodepodCount[node.node]

nonRemovablePods, bestEffortPods, burstablePods, guaranteedPods := classifyPods(node.allPods, evictLocalStoragePods)
klog.V(2).Infof("allPods:%v, nonRemovablePods:%v, bestEffortPods:%v, burstablePods:%v, guaranteedPods:%v", len(node.allPods), len(nonRemovablePods), len(bestEffortPods), len(burstablePods), len(guaranteedPods))
Expand All @@ -225,46 +214,36 @@ func evictPodsFromTargetNodes(

// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
sortPodsBasedOnPriority(evictablePods)
evictPods(evictablePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes)
evictPods(evictablePods, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, taintsOfLowNodes, podEvictor, node.node)
} else {
// TODO: Remove this when we support only priority.
// Falling back to evicting pods based on priority.
klog.V(1).Infof("Evicting pods based on QoS")
klog.V(1).Infof("There are %v non-evictable pods on the node", len(nonRemovablePods))
// evict best effort pods
evictPods(bestEffortPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes)
evictPods(bestEffortPods, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, taintsOfLowNodes, podEvictor, node.node)
// evict burstable pods
evictPods(burstablePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes)
evictPods(burstablePods, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, taintsOfLowNodes, podEvictor, node.node)
// evict guaranteed pods
evictPods(guaranteedPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes)
evictPods(guaranteedPods, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, taintsOfLowNodes, podEvictor, node.node)
}
nodepodCount[node.node] = currentPodsEvicted
podsEvicted = podsEvicted + nodepodCount[node.node]
klog.V(1).Infof("%v pods evicted from node %#v with usage %v", nodepodCount[node.node], node.node.Name, node.usage)
klog.V(1).Infof("%v pods evicted from node %#v with usage %v", podEvictor.NodeEvicted(node.node), node.node.Name, node.usage)
}
return podsEvicted
}

func evictPods(inputPods []*v1.Pod,
client clientset.Interface,
evictionPolicyGroupVersion string,
targetThresholds api.ResourceThresholds,
nodeCapacity v1.ResourceList,
nodeUsage api.ResourceThresholds,
totalPods *float64,
totalCPU *float64,
totalMem *float64,
podsEvicted *int,
dryRun bool,
maxPodsToEvict int,
taintsOfLowNodes map[string][]v1.Taint) {
taintsOfLowNodes map[string][]v1.Taint,
podEvictor *evictions.PodEvictor,
node *v1.Node) {
if IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) && (*totalPods > 0 || *totalCPU > 0 || *totalMem > 0) {
onePodPercentage := api.Percentage((float64(1) * 100) / float64(nodeCapacity.Pods().Value()))
for _, pod := range inputPods {
if maxPodsToEvict > 0 && *podsEvicted+1 > maxPodsToEvict {
break
}

if !utils.PodToleratesTaints(pod, taintsOfLowNodes) {
klog.V(3).Infof("Skipping eviction for Pod: %#v, doesn't tolerate node taint", pod.Name)
continue
Expand All @@ -273,13 +252,14 @@ func evictPods(inputPods []*v1.Pod,
cUsage := utils.GetResourceRequest(pod, v1.ResourceCPU)
mUsage := utils.GetResourceRequest(pod, v1.ResourceMemory)

success, err := evictions.EvictPod(client, pod, evictionPolicyGroupVersion, dryRun)
if !success {
klog.Warningf("Error when evicting pod: %#v (%#v)", pod.Name, err)
} else {
klog.V(3).Infof("Evicted pod: %#v (%#v)", pod.Name, err)
success, err := podEvictor.EvictPod(pod, node)
if err != nil {
break
}

if success {
klog.V(3).Infof("Evicted pod: %#v", pod.Name)
// update remaining pods
*podsEvicted++
nodeUsage[v1.ResourcePods] -= onePodPercentage
*totalPods--

Expand Down
Loading