diff --git a/pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity.go b/pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity.go index 85d1738f8c..1284ecbce4 100644 --- a/pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity.go +++ b/pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity.go @@ -78,24 +78,36 @@ func (d *RemovePodsViolatingInterPodAntiAffinity) Name() string { } func (d *RemovePodsViolatingInterPodAntiAffinity) Deschedule(ctx context.Context, nodes []*v1.Node) *frameworktypes.Status { + podsList, err := d.handle.ClientSet().CoreV1().Pods("").List(ctx, metav1.ListOptions{}) + if err != nil { + return &frameworktypes.Status{ + Err: fmt.Errorf("error listing all pods: %v", err), + } + } + + podsInANamespace := groupByNamespace(podsList) + + runningPodFilter := func(pod *v1.Pod) bool { + return pod.Status.Phase != v1.PodSucceeded && pod.Status.Phase != v1.PodFailed + } + podsOnANode := groupByNodeName(podsList, podutil.WrapFilterFuncs(runningPodFilter, d.podFilter)) + + nodeMap := createNodeMap(nodes) + loop: for _, node := range nodes { klog.V(1).InfoS("Processing node", "node", klog.KObj(node)) - pods, err := podutil.ListPodsOnANode(node.Name, d.handle.GetPodsAssignedToNodeFunc(), d.podFilter) - if err != nil { - return &frameworktypes.Status{ - Err: fmt.Errorf("error listing pods: %v", err), - } - } - // sort the evictable Pods based on priority, if there are multiple pods with same priority, they are sorted based on QoS tiers. + pods := podsOnANode[node.Name] + // sort the evict-able Pods based on priority, if there are multiple pods with same priority, they are sorted based on QoS tiers. podutil.SortPodsBasedOnPriorityLowToHigh(pods) totalPods := len(pods) for i := 0; i < totalPods; i++ { - if checkPodsWithAntiAffinityExist(pods[i], pods) && d.handle.Evictor().Filter(pods[i]) && d.handle.Evictor().PreEvictionFilter(pods[i]) { + if checkPodsWithAntiAffinityExist(pods[i], podsInANamespace, nodeMap) && d.handle.Evictor().Filter(pods[i]) && d.handle.Evictor().PreEvictionFilter(pods[i]) { if d.handle.Evictor().Evict(ctx, pods[i], evictions.EvictOptions{}) { // Since the current pod is evicted all other pods which have anti-affinity with this // pod need not be evicted. - // Update pods. + // Update allPods. + podsInANamespace = removePodFromNamespaceMap(pods[i], podsInANamespace) pods = append(pods[:i], pods[i+1:]...) i-- totalPods-- @@ -109,8 +121,54 @@ loop: return nil } +func removePodFromNamespaceMap(podToRemove *v1.Pod, podMap map[string][]*v1.Pod) map[string][]*v1.Pod { + podList, ok := podMap[podToRemove.Namespace] + if !ok { + return podMap + } + for i := 0; i < len(podList); i++ { + podToCheck := podList[i] + if podToRemove.Name == podToCheck.Name { + podMap[podToRemove.Namespace] = append(podList[:i], podList[i+1:]...) + return podMap + } + } + return podMap +} + +func groupByNamespace(pods *v1.PodList) map[string][]*v1.Pod { + m := make(map[string][]*v1.Pod) + for i := 0; i < len(pods.Items); i++ { + pod := &(pods.Items[i]) + m[pod.Namespace] = append(m[pod.Namespace], pod) + } + return m +} + +func groupByNodeName(pods *v1.PodList, filter podutil.FilterFunc) map[string][]*v1.Pod { + m := make(map[string][]*v1.Pod) + if filter == nil { + filter = func(p *v1.Pod) bool { return true } + } + for i := 0; i < len(pods.Items); i++ { + pod := &(pods.Items[i]) + if filter(pod) { + m[pod.Spec.NodeName] = append(m[pod.Spec.NodeName], pod) + } + } + return m +} + +func createNodeMap(nodes []*v1.Node) map[string]*v1.Node { + m := make(map[string]*v1.Node, len(nodes)) + for _, node := range nodes { + m[node.GetName()] = node + } + return m +} + // checkPodsWithAntiAffinityExist checks if there are other pods on the node that the current pod cannot tolerate. -func checkPodsWithAntiAffinityExist(pod *v1.Pod, pods []*v1.Pod) bool { +func checkPodsWithAntiAffinityExist(pod *v1.Pod, pods map[string][]*v1.Pod, nodeMap map[string]*v1.Node) bool { affinity := pod.Spec.Affinity if affinity != nil && affinity.PodAntiAffinity != nil { for _, term := range getPodAntiAffinityTerms(affinity.PodAntiAffinity) { @@ -120,9 +178,22 @@ func checkPodsWithAntiAffinityExist(pod *v1.Pod, pods []*v1.Pod) bool { klog.ErrorS(err, "Unable to convert LabelSelector into Selector") return false } - for _, existingPod := range pods { - if existingPod.Name != pod.Name && utils.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) { - return true + for namespace := range namespaces { + for _, existingPod := range pods[namespace] { + if existingPod.Name != pod.Name && utils.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) { + node, ok := nodeMap[pod.Spec.NodeName] + if !ok { + continue + } + nodeHavingExistingPod, ok := nodeMap[existingPod.Spec.NodeName] + if !ok { + continue + } + if hasSameLabelValue(node, nodeHavingExistingPod, term.TopologyKey) { + klog.V(1).InfoS("Found Pods violating PodAntiAffinity", "pod to evicted", klog.KObj(pod)) + return true + } + } } } } @@ -130,6 +201,29 @@ func checkPodsWithAntiAffinityExist(pod *v1.Pod, pods []*v1.Pod) bool { return false } +func hasSameLabelValue(node1, node2 *v1.Node, key string) bool { + if node1.Name == node2.Name { + return true + } + node1Labels := node1.Labels + if node1Labels == nil { + return false + } + node2Labels := node2.Labels + if node2Labels == nil { + return false + } + value1, ok := node1Labels[key] + if !ok { + return false + } + value2, ok := node2Labels[key] + if !ok { + return false + } + return value1 == value2 +} + // getPodAntiAffinityTerms gets the antiaffinity terms for the given pod. func getPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.PodAffinityTerm) { if podAntiAffinity != nil { diff --git a/pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity_test.go b/pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity_test.go index c46e761f20..9da0dc6b18 100644 --- a/pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity_test.go +++ b/pkg/framework/plugins/removepodsviolatinginterpodantiaffinity/pod_antiaffinity_test.go @@ -38,19 +38,27 @@ import ( ) func TestPodAntiAffinity(t *testing.T) { - node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil) + node1 := test.BuildTestNode("n1", 2000, 3000, 10, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + "region": "main-region", + } + }) node2 := test.BuildTestNode("n2", 2000, 3000, 10, func(node *v1.Node) { node.ObjectMeta.Labels = map[string]string{ "datacenter": "east", } }) - node3 := test.BuildTestNode("n3", 2000, 3000, 10, func(node *v1.Node) { node.Spec = v1.NodeSpec{ Unschedulable: true, } }) node4 := test.BuildTestNode("n4", 2, 2, 1, nil) + node5 := test.BuildTestNode("n5", 200, 3000, 10, func(node *v1.Node) { + node.ObjectMeta.Labels = map[string]string{ + "region": "main-region", + } + }) p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil) p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil) @@ -62,6 +70,7 @@ func TestPodAntiAffinity(t *testing.T) { p8 := test.BuildTestPod("p8", 100, 0, node1.Name, nil) p9 := test.BuildTestPod("p9", 100, 0, node1.Name, nil) p10 := test.BuildTestPod("p10", 100, 0, node1.Name, nil) + p11 := test.BuildTestPod("p11", 100, 0, node5.Name, nil) p9.DeletionTimestamp = &metav1.Time{} p10.DeletionTimestamp = &metav1.Time{} @@ -73,6 +82,7 @@ func TestPodAntiAffinity(t *testing.T) { p5.Labels = map[string]string{"foo": "bar"} p6.Labels = map[string]string{"foo": "bar"} p7.Labels = map[string]string{"foo1": "bar1"} + p11.Labels = map[string]string{"foo": "bar"} nonEvictablePod.Labels = map[string]string{"foo": "bar"} test.SetNormalOwnerRef(p1) test.SetNormalOwnerRef(p2) @@ -83,6 +93,7 @@ func TestPodAntiAffinity(t *testing.T) { test.SetNormalOwnerRef(p7) test.SetNormalOwnerRef(p9) test.SetNormalOwnerRef(p10) + test.SetNormalOwnerRef(p11) // set pod anti affinity setPodAntiAffinity(p1, "foo", "bar") @@ -186,6 +197,13 @@ func TestPodAntiAffinity(t *testing.T) { expectedEvictedPodCount: 0, nodeFit: true, }, + { + description: "Evict pod violating anti-affinity among different node (all pods have anti-affinity)", + pods: []*v1.Pod{p1, p11}, + nodes: []*v1.Node{node1, node5}, + expectedEvictedPodCount: 1, + nodeFit: false, + }, } for _, test := range tests {