diff --git a/cluster-autoscaler/core/scaledown/planner/planner.go b/cluster-autoscaler/core/scaledown/planner/planner.go index 3ece1db00490..c3346d3d0c96 100644 --- a/cluster-autoscaler/core/scaledown/planner/planner.go +++ b/cluster-autoscaler/core/scaledown/planner/planner.go @@ -73,10 +73,10 @@ type Planner struct { minUpdateInterval time.Duration eligibilityChecker eligibilityChecker nodeUtilizationMap map[string]utilization.Info - actuationStatus scaledown.ActuationStatus resourceLimitsFinder *resource.LimitsFinder cc controllerReplicasCalculator scaleDownSetProcessor nodes.ScaleDownSetProcessor + scaleDownContext *nodes.ScaleDownContext } // New creates a new Planner object. @@ -97,7 +97,11 @@ func New(context *context.AutoscalingContext, processors *processors.Autoscaling resourceLimitsFinder: resourceLimitsFinder, cc: newControllerReplicasCalculator(context.ListerRegistry), scaleDownSetProcessor: processors.ScaleDownSetProcessor, - minUpdateInterval: minUpdateInterval, + // No need to limit the number of nodes, since it will happen later, in the actuation stage. + // It will make a more appropriate decision by using additional information about deletions + // in progress. + scaleDownContext: nodes.NewDefaultScaleDownSetContext(math.MaxInt), + minUpdateInterval: minUpdateInterval, } } @@ -110,7 +114,7 @@ func (p *Planner) UpdateClusterState(podDestinations, scaleDownCandidates []*api p.minUpdateInterval = updateInterval } p.latestUpdate = currentTime - p.actuationStatus = as + p.scaleDownContext.ActuationStatus = as // Avoid persisting changes done by the simulation. p.context.ClusterSnapshot.Fork() defer p.context.ClusterSnapshot.Revert() @@ -147,22 +151,17 @@ func (p *Planner) NodesToDelete(_ time.Time) (empty, needDrain []*apiv1.Node) { klog.Errorf("Nothing will scale down, failed to create resource limiter: %v", err) return nil, nil } - limitsLeft := p.resourceLimitsFinder.LimitsLeft(p.context, nodes, resourceLimiter, p.latestUpdate) - emptyRemovable, needDrainRemovable, unremovable := p.unneededNodes.RemovableAt(p.context, p.latestUpdate, limitsLeft, resourceLimiter.GetResources(), p.actuationStatus) - for _, u := range unremovable { - p.unremovableNodes.Add(u) - } - needDrainRemovable = sortByRisk(needDrainRemovable) - nodesToRemove := p.scaleDownSetProcessor.GetNodesToRemove( - p.context, - // We need to pass empty nodes first, as there might be some non-empty scale - // downs already in progress. If we pass the empty nodes first, they will be first - // to get deleted, thus we decrease chances of hitting the limit on non-empty scale down. - append(emptyRemovable, needDrainRemovable...), - // No need to limit the number of nodes, since it will happen later, in the actuation stage. - // It will make a more appropriate decision by using additional information about deletions - // in progress. - math.MaxInt) + p.scaleDownContext.ResourcesLeft = p.resourceLimitsFinder.LimitsLeft(p.context, nodes, resourceLimiter, p.latestUpdate).DeepCopy() + p.scaleDownContext.ResourcesWithLimits = resourceLimiter.GetResources() + emptyRemovableNodes, needDrainRemovableNodes, unremovableNodes := p.unneededNodes.RemovableAt(p.context, *p.scaleDownContext, p.latestUpdate) + p.addUnremovabaleNodes(unremovableNodes) + + needDrainRemovableNodes = sortByRisk(needDrainRemovableNodes) + candidatesToBeRemoved := append(emptyRemovableNodes, needDrainRemovableNodes...) + + nodesToRemove, unremovableNodes := p.scaleDownSetProcessor.FilterUnremovableNodes(p.context, *p.scaleDownContext, candidatesToBeRemoved) + p.addUnremovabaleNodes(unremovableNodes) + for _, nodeToRemove := range nodesToRemove { if len(nodeToRemove.PodsToReschedule) > 0 { needDrain = append(needDrain, nodeToRemove.Node) @@ -174,6 +173,12 @@ func (p *Planner) NodesToDelete(_ time.Time) (empty, needDrain []*apiv1.Node) { return empty, needDrain } +func (p *Planner) addUnremovabaleNodes(unremovableNodes []simulator.UnremovableNode) { + for _, u := range unremovableNodes { + p.unremovableNodes.Add(&u) + } +} + func allNodes(s clustersnapshot.ClusterSnapshot) ([]*apiv1.Node, error) { nodeInfos, err := s.NodeInfos().List() if err != nil { @@ -212,7 +217,7 @@ func (p *Planner) NodeUtilizationMap() map[string]utilization.Info { // For pods that are controlled by controller known by CA, it will check whether // they have been recreated and will inject only not yet recreated pods. func (p *Planner) injectRecentlyEvictedPods() error { - recentlyEvictedRecreatablePods := pod_util.FilterRecreatablePods(p.actuationStatus.RecentEvictions()) + recentlyEvictedRecreatablePods := pod_util.FilterRecreatablePods(p.scaleDownContext.ActuationStatus.RecentEvictions()) return p.injectPods(filterOutRecreatedPods(recentlyEvictedRecreatablePods, p.cc)) } diff --git a/cluster-autoscaler/core/scaledown/planner/planner_test.go b/cluster-autoscaler/core/scaledown/planner/planner_test.go index 19f160b1b0af..476c8b3756a0 100644 --- a/cluster-autoscaler/core/scaledown/planner/planner_test.go +++ b/cluster-autoscaler/core/scaledown/planner/planner_test.go @@ -18,6 +18,7 @@ package planner import ( "fmt" + "math" "testing" "time" @@ -35,6 +36,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/status" "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/unremovable" . "k8s.io/autoscaler/cluster-autoscaler/core/test" + processorstest "k8s.io/autoscaler/cluster-autoscaler/processors/test" "k8s.io/autoscaler/cluster-autoscaler/simulator" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" @@ -498,7 +500,7 @@ func TestUpdateClusterState(t *testing.T) { assert.NoError(t, err) clustersnapshot.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, tc.nodes, tc.pods) deleteOptions := options.NodeDeleteOptions{} - p := New(&context, NewTestProcessors(&context), deleteOptions, nil) + p := New(&context, processorstest.NewTestProcessors(&context), deleteOptions, nil) p.eligibilityChecker = &fakeEligibilityChecker{eligible: asMap(tc.eligible)} if tc.isSimulationTimeout { context.AutoscalingOptions.ScaleDownSimulationTimeout = 1 * time.Second @@ -694,7 +696,7 @@ func TestUpdateClusterStatUnneededNodesLimit(t *testing.T) { assert.NoError(t, err) clustersnapshot.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, nodes, nil) deleteOptions := options.NodeDeleteOptions{} - p := New(&context, NewTestProcessors(&context), deleteOptions, nil) + p := New(&context, processorstest.NewTestProcessors(&context), deleteOptions, nil) p.eligibilityChecker = &fakeEligibilityChecker{eligible: asMap(nodeNames(nodes))} p.minUpdateInterval = tc.updateInterval p.unneededNodes.Update(previouslyUnneeded, time.Now()) @@ -706,16 +708,18 @@ func TestUpdateClusterStatUnneededNodesLimit(t *testing.T) { func TestNodesToDelete(t *testing.T) { testCases := []struct { - name string - nodes map[cloudprovider.NodeGroup][]simulator.NodeToBeRemoved - wantEmpty []*apiv1.Node - wantDrain []*apiv1.Node + name string + nodes map[cloudprovider.NodeGroup][]simulator.NodeToBeRemoved + wantEmpty []*apiv1.Node + wantDrain []*apiv1.Node + maxNodeCountToBeRemoved int }{ { - name: "empty", - nodes: map[cloudprovider.NodeGroup][]simulator.NodeToBeRemoved{}, - wantEmpty: []*apiv1.Node{}, - wantDrain: []*apiv1.Node{}, + name: "empty", + nodes: map[cloudprovider.NodeGroup][]simulator.NodeToBeRemoved{}, + wantEmpty: []*apiv1.Node{}, + wantDrain: []*apiv1.Node{}, + maxNodeCountToBeRemoved: math.MaxInt, }, { name: "single empty", @@ -727,7 +731,26 @@ func TestNodesToDelete(t *testing.T) { wantEmpty: []*apiv1.Node{ buildRemovableNode("test-node", 0).Node, }, - wantDrain: []*apiv1.Node{}, + wantDrain: []*apiv1.Node{}, + maxNodeCountToBeRemoved: math.MaxInt, + }, + { + name: "multiple empty with limit", + nodes: map[cloudprovider.NodeGroup][]simulator.NodeToBeRemoved{ + sizedNodeGroup("test-ng", 3, false): { + buildRemovableNode("node-1", 0), + buildRemovableNode("node-2", 0), + buildRemovableNode("node-3", 0), + buildRemovableNode("node-4", 1), + }, + }, + wantEmpty: []*apiv1.Node{ + buildRemovableNode("node-1", 0).Node, + buildRemovableNode("node-2", 0).Node, + buildRemovableNode("node-3", 0).Node, + }, + wantDrain: []*apiv1.Node{}, + maxNodeCountToBeRemoved: 3, }, { name: "single drain", @@ -740,6 +763,7 @@ func TestNodesToDelete(t *testing.T) { wantDrain: []*apiv1.Node{ buildRemovableNode("test-node", 1).Node, }, + maxNodeCountToBeRemoved: math.MaxInt, }, { name: "single empty atomic", @@ -748,8 +772,9 @@ func TestNodesToDelete(t *testing.T) { buildRemovableNode("node-1", 0), }, }, - wantEmpty: []*apiv1.Node{}, - wantDrain: []*apiv1.Node{}, + wantEmpty: []*apiv1.Node{}, + wantDrain: []*apiv1.Node{}, + maxNodeCountToBeRemoved: math.MaxInt, }, { name: "all empty atomic", @@ -765,7 +790,8 @@ func TestNodesToDelete(t *testing.T) { buildRemovableNode("node-2", 0).Node, buildRemovableNode("node-3", 0).Node, }, - wantDrain: []*apiv1.Node{}, + wantDrain: []*apiv1.Node{}, + maxNodeCountToBeRemoved: math.MaxInt, }, { name: "some drain atomic", @@ -783,6 +809,7 @@ func TestNodesToDelete(t *testing.T) { wantDrain: []*apiv1.Node{ buildRemovableNode("node-3", 1).Node, }, + maxNodeCountToBeRemoved: math.MaxInt, }, { name: "different groups", @@ -836,6 +863,52 @@ func TestNodesToDelete(t *testing.T) { buildRemovableNode("node-14", 0).Node, buildRemovableNode("node-15", 0).Node, }, + maxNodeCountToBeRemoved: math.MaxInt, + }, + { + name: "different groups with max count equal to all empty", + nodes: map[cloudprovider.NodeGroup][]simulator.NodeToBeRemoved{ + sizedNodeGroup("standard-empty-ng", 3, false): { + buildRemovableNode("node-1", 0), + buildRemovableNode("node-2", 0), + buildRemovableNode("node-3", 0), + }, + sizedNodeGroup("standard-drain-ng", 3, false): { + buildRemovableNode("node-4", 1), + buildRemovableNode("node-5", 2), + buildRemovableNode("node-6", 3), + }, + sizedNodeGroup("standard-mixed-ng", 3, false): { + buildRemovableNode("node-7", 0), + buildRemovableNode("node-8", 1), + buildRemovableNode("node-9", 2), + }, + sizedNodeGroup("atomic-empty-ng", 3, true): { + buildRemovableNode("node-10", 0), + buildRemovableNode("node-11", 0), + buildRemovableNode("node-12", 0), + }, + sizedNodeGroup("atomic-mixed-ng", 3, true): { + buildRemovableNode("node-13", 0), + buildRemovableNode("node-14", 1), + buildRemovableNode("node-15", 2), + }, + sizedNodeGroup("atomic-partial-ng", 3, true): { + buildRemovableNode("node-16", 0), + buildRemovableNode("node-17", 1), + }, + }, + wantEmpty: []*apiv1.Node{ + buildRemovableNode("node-1", 0).Node, + buildRemovableNode("node-2", 0).Node, + buildRemovableNode("node-3", 0).Node, + buildRemovableNode("node-7", 0).Node, + buildRemovableNode("node-10", 0).Node, + buildRemovableNode("node-11", 0).Node, + buildRemovableNode("node-12", 0).Node, + }, + wantDrain: []*apiv1.Node{}, + maxNodeCountToBeRemoved: 9, }, } for _, tc := range testCases { @@ -862,9 +935,10 @@ func TestNodesToDelete(t *testing.T) { assert.NoError(t, err) clustersnapshot.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, allNodes, nil) deleteOptions := options.NodeDeleteOptions{} - p := New(&context, NewTestProcessors(&context), deleteOptions, nil) + p := New(&context, processorstest.NewTestProcessors(&context), deleteOptions, nil) p.latestUpdate = time.Now() - p.actuationStatus = deletiontracker.NewNodeDeletionTracker(0 * time.Second) + p.scaleDownContext.ActuationStatus = deletiontracker.NewNodeDeletionTracker(0 * time.Second) + p.scaleDownContext.MaxNodeCountToBeRemoved = tc.maxNodeCountToBeRemoved p.unneededNodes.Update(allRemovables, time.Now().Add(-1*time.Hour)) p.eligibilityChecker = &fakeEligibilityChecker{eligible: asMap(nodeNames(allNodes))} empty, drain := p.NodesToDelete(time.Now()) diff --git a/cluster-autoscaler/core/scaledown/unneeded/nodes.go b/cluster-autoscaler/core/scaledown/unneeded/nodes.go index 47f4797ce2af..9c29787608f9 100644 --- a/cluster-autoscaler/core/scaledown/unneeded/nodes.go +++ b/cluster-autoscaler/core/scaledown/unneeded/nodes.go @@ -26,6 +26,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/eligibility" "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/resource" "k8s.io/autoscaler/cluster-autoscaler/metrics" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodes" "k8s.io/autoscaler/cluster-autoscaler/simulator" "k8s.io/autoscaler/cluster-autoscaler/utils" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" @@ -117,23 +118,22 @@ func (n *Nodes) Drop(node string) { // RemovableAt returns all nodes that can be removed at a given time, divided // into empty and non-empty node lists, as well as a list of nodes that were // unneeded, but are not removable, annotated by reason. -func (n *Nodes) RemovableAt(context *context.AutoscalingContext, ts time.Time, resourcesLeft resource.Limits, resourcesWithLimits []string, as scaledown.ActuationStatus) (empty, needDrain []simulator.NodeToBeRemoved, unremovable []*simulator.UnremovableNode) { +func (n *Nodes) RemovableAt(context *context.AutoscalingContext, scaleDownContext nodes.ScaleDownContext, ts time.Time) (empty, needDrain []simulator.NodeToBeRemoved, unremovable []simulator.UnremovableNode) { nodeGroupSize := utils.GetNodeGroupSizeMap(context.CloudProvider) - resourcesLeftCopy := resourcesLeft.DeepCopy() emptyNodes, drainNodes := n.splitEmptyAndNonEmptyNodes() for nodeName, v := range emptyNodes { klog.V(2).Infof("%s was unneeded for %s", nodeName, ts.Sub(v.since).String()) - if r := n.unremovableReason(context, v, ts, nodeGroupSize, resourcesLeftCopy, resourcesWithLimits, as); r != simulator.NoReason { - unremovable = append(unremovable, &simulator.UnremovableNode{Node: v.ntbr.Node, Reason: r}) + if r := n.unremovableReason(context, scaleDownContext, v, ts, nodeGroupSize); r != simulator.NoReason { + unremovable = append(unremovable, simulator.UnremovableNode{Node: v.ntbr.Node, Reason: r}) continue } empty = append(empty, v.ntbr) } for nodeName, v := range drainNodes { klog.V(2).Infof("%s was unneeded for %s", nodeName, ts.Sub(v.since).String()) - if r := n.unremovableReason(context, v, ts, nodeGroupSize, resourcesLeftCopy, resourcesWithLimits, as); r != simulator.NoReason { - unremovable = append(unremovable, &simulator.UnremovableNode{Node: v.ntbr.Node, Reason: r}) + if r := n.unremovableReason(context, scaleDownContext, v, ts, nodeGroupSize); r != simulator.NoReason { + unremovable = append(unremovable, simulator.UnremovableNode{Node: v.ntbr.Node, Reason: r}) continue } needDrain = append(needDrain, v.ntbr) @@ -141,7 +141,7 @@ func (n *Nodes) RemovableAt(context *context.AutoscalingContext, ts time.Time, r return } -func (n *Nodes) unremovableReason(context *context.AutoscalingContext, v *node, ts time.Time, nodeGroupSize map[string]int, resourcesLeft resource.Limits, resourcesWithLimits []string, as scaledown.ActuationStatus) simulator.UnremovableReason { +func (n *Nodes) unremovableReason(context *context.AutoscalingContext, scaleDownContext nodes.ScaleDownContext, v *node, ts time.Time, nodeGroupSize map[string]int) simulator.UnremovableReason { node := v.ntbr.Node // Check if node is marked with no scale down annotation. if eligibility.HasNoScaleDownAnnotation(node) { @@ -182,17 +182,17 @@ func (n *Nodes) unremovableReason(context *context.AutoscalingContext, v *node, } } - if reason := verifyMinSize(node.Name, nodeGroup, nodeGroupSize, as); reason != simulator.NoReason { + if reason := verifyMinSize(node.Name, nodeGroup, nodeGroupSize, scaleDownContext.ActuationStatus); reason != simulator.NoReason { return reason } - resourceDelta, err := n.limitsFinder.DeltaForNode(context, node, nodeGroup, resourcesWithLimits) + resourceDelta, err := n.limitsFinder.DeltaForNode(context, node, nodeGroup, scaleDownContext.ResourcesWithLimits) if err != nil { klog.Errorf("Error getting node resources: %v", err) return simulator.UnexpectedError } - checkResult := resourcesLeft.TryDecrementBy(resourceDelta) + checkResult := scaleDownContext.ResourcesLeft.TryDecrementBy(resourceDelta) if checkResult.Exceeded() { klog.V(4).Infof("Skipping %s - minimal limit exceeded for %v", node.Name, checkResult.ExceededResources) for _, resource := range checkResult.ExceededResources { diff --git a/cluster-autoscaler/core/scaledown/unneeded/nodes_test.go b/cluster-autoscaler/core/scaledown/unneeded/nodes_test.go index 704c8df361f2..18bb35f19521 100644 --- a/cluster-autoscaler/core/scaledown/unneeded/nodes_test.go +++ b/cluster-autoscaler/core/scaledown/unneeded/nodes_test.go @@ -18,6 +18,7 @@ package unneeded import ( "fmt" + "math" "testing" "time" @@ -28,6 +29,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/resource" "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/status" . "k8s.io/autoscaler/cluster-autoscaler/core/test" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodes" "k8s.io/autoscaler/cluster-autoscaler/simulator" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" @@ -186,10 +188,10 @@ func TestRemovableAt(t *testing.T) { }) } - nodes := append(empty, drain...) + removableNodes := append(empty, drain...) provider := testprovider.NewTestCloudProvider(nil, nil) provider.InsertNodeGroup(ng) - for _, node := range nodes { + for _, node := range removableNodes { provider.AddNode("ng", node.Node) } @@ -202,8 +204,13 @@ func TestRemovableAt(t *testing.T) { assert.NoError(t, err) n := NewNodes(&fakeScaleDownTimeGetter{}, &resource.LimitsFinder{}) - n.Update(nodes, time.Now()) - gotEmptyToRemove, gotDrainToRemove, _ := n.RemovableAt(&ctx, time.Now(), resource.Limits{}, []string{}, as) + n.Update(removableNodes, time.Now()) + gotEmptyToRemove, gotDrainToRemove, _ := n.RemovableAt(&ctx, nodes.ScaleDownContext{ + ActuationStatus: as, + ResourcesLeft: resource.Limits{}, + ResourcesWithLimits: []string{}, + MaxNodeCountToBeRemoved: math.MaxInt, + }, time.Now()) if len(gotDrainToRemove) != tc.numDrainToRemove || len(gotEmptyToRemove) != tc.numEmptyToRemove { t.Errorf("%s: getNodesToRemove() return %d, %d, want %d, %d", tc.name, len(gotEmptyToRemove), len(gotDrainToRemove), tc.numEmptyToRemove, tc.numDrainToRemove) } diff --git a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator_test.go b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator_test.go index d64d61c2df8d..bf875c7b5d24 100644 --- a/cluster-autoscaler/core/scaleup/orchestrator/orchestrator_test.go +++ b/cluster-autoscaler/core/scaleup/orchestrator/orchestrator_test.go @@ -45,6 +45,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" "k8s.io/autoscaler/cluster-autoscaler/processors/nodeinfosprovider" "k8s.io/autoscaler/cluster-autoscaler/processors/status" + processorstest "k8s.io/autoscaler/cluster-autoscaler/processors/test" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" "k8s.io/autoscaler/cluster-autoscaler/utils/taints" @@ -1052,7 +1053,7 @@ func runSimpleScaleUpTest(t *testing.T, config *ScaleUpTestConfig) *ScaleUpTestR assert.NoError(t, err) clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(options.NodeGroupDefaults), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker()) clusterState.UpdateNodes(nodes, nodeInfos, time.Now()) - processors := NewTestProcessors(&context) + processors := processorstest.NewTestProcessors(&context) processors.ScaleStateNotifier.Register(clusterState) if config.EnableAutoprovisioning { processors.NodeGroupListProcessor = &MockAutoprovisioningNodeGroupListProcessor{T: t} @@ -1160,7 +1161,7 @@ func TestScaleUpUnhealthy(t *testing.T) { clusterState.UpdateNodes(nodes, nodeInfos, time.Now()) p3 := BuildTestPod("p-new", 550, 0) - processors := NewTestProcessors(&context) + processors := processorstest.NewTestProcessors(&context) suOrchestrator := New() suOrchestrator.Initialize(&context, processors, clusterState, newEstimatorBuilder(), taints.TaintConfig{}) scaleUpStatus, err := suOrchestrator.ScaleUp([]*apiv1.Pod{p3}, nodes, []*appsv1.DaemonSet{}, nodeInfos, false) @@ -1206,7 +1207,7 @@ func TestBinpackingLimiter(t *testing.T) { extraPod := BuildTestPod("p-new", 500, 0) - processors := NewTestProcessors(&context) + processors := processorstest.NewTestProcessors(&context) // We should stop binpacking after finding expansion option from first node group. processors.BinpackingLimiter = &MockBinpackingLimiter{} @@ -1260,7 +1261,7 @@ func TestScaleUpNoHelp(t *testing.T) { clusterState.UpdateNodes(nodes, nodeInfos, time.Now()) p3 := BuildTestPod("p-new", 500, 0) - processors := NewTestProcessors(&context) + processors := processorstest.NewTestProcessors(&context) suOrchestrator := New() suOrchestrator.Initialize(&context, processors, clusterState, newEstimatorBuilder(), taints.TaintConfig{}) scaleUpStatus, err := suOrchestrator.ScaleUp([]*apiv1.Pod{p3}, nodes, []*appsv1.DaemonSet{}, nodeInfos, false) @@ -1482,7 +1483,7 @@ func TestScaleUpBalanceGroups(t *testing.T) { pods = append(pods, BuildTestPod(fmt.Sprintf("test-pod-%v", i), 80, 0)) } - processors := NewTestProcessors(&context) + processors := processorstest.NewTestProcessors(&context) suOrchestrator := New() suOrchestrator.Initialize(&context, processors, clusterState, newEstimatorBuilder(), taints.TaintConfig{}) scaleUpStatus, typedErr := suOrchestrator.ScaleUp(pods, nodes, []*appsv1.DaemonSet{}, nodeInfos, false) @@ -1538,7 +1539,7 @@ func TestScaleUpAutoprovisionedNodeGroup(t *testing.T) { clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker()) - processors := NewTestProcessors(&context) + processors := processorstest.NewTestProcessors(&context) processors.NodeGroupListProcessor = &MockAutoprovisioningNodeGroupListProcessor{T: t} processors.NodeGroupManager = &MockAutoprovisioningNodeGroupManager{T: t, ExtraGroups: 0} @@ -1593,7 +1594,7 @@ func TestScaleUpBalanceAutoprovisionedNodeGroups(t *testing.T) { clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker()) - processors := NewTestProcessors(&context) + processors := processorstest.NewTestProcessors(&context) processors.NodeGroupListProcessor = &MockAutoprovisioningNodeGroupListProcessor{T: t} processors.NodeGroupManager = &MockAutoprovisioningNodeGroupManager{T: t, ExtraGroups: 2} @@ -1651,7 +1652,7 @@ func TestScaleUpToMeetNodeGroupMinSize(t *testing.T) { nodes := []*apiv1.Node{n1, n2} nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, time.Now()) - processors := NewTestProcessors(&context) + processors := processorstest.NewTestProcessors(&context) clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker()) clusterState.UpdateNodes(nodes, nodeInfos, time.Now()) @@ -1743,7 +1744,7 @@ func TestScaleupAsyncNodeGroupsEnabled(t *testing.T) { clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker()) - processors := NewTestProcessors(&context) + processors := processorstest.NewTestProcessors(&context) processors.NodeGroupListProcessor = &MockAutoprovisioningNodeGroupListProcessor{T: t} processors.NodeGroupManager = &MockAutoprovisioningNodeGroupManager{T: t, ExtraGroups: 0} processors.AsyncNodeGroupStateChecker = &asyncnodegroups.MockAsyncNodeGroupStateChecker{IsUpcomingNodeGroup: tc.isUpcomingMockMap} diff --git a/cluster-autoscaler/core/scaleup/resource/manager_test.go b/cluster-autoscaler/core/scaleup/resource/manager_test.go index dfe7b9415770..8c36a30919d4 100644 --- a/cluster-autoscaler/core/scaleup/resource/manager_test.go +++ b/cluster-autoscaler/core/scaleup/resource/manager_test.go @@ -30,6 +30,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/context" "k8s.io/autoscaler/cluster-autoscaler/core/test" "k8s.io/autoscaler/cluster-autoscaler/processors/nodeinfosprovider" + processorstest "k8s.io/autoscaler/cluster-autoscaler/processors/test" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" "k8s.io/autoscaler/cluster-autoscaler/utils/taints" utils_test "k8s.io/autoscaler/cluster-autoscaler/utils/test" @@ -65,7 +66,7 @@ func TestDeltaForNode(t *testing.T) { for _, testCase := range testCases { cp := testprovider.NewTestCloudProvider(nil, nil) ctx := newContext(t, cp) - processors := test.NewTestProcessors(&ctx) + processors := processorstest.NewTestProcessors(&ctx) ng := testCase.nodeGroupConfig group, nodes := newNodeGroup(t, cp, ng.Name, ng.Min, ng.Max, ng.Size, ng.CPU, ng.Mem) @@ -106,7 +107,7 @@ func TestResourcesLeft(t *testing.T) { for _, testCase := range testCases { cp := newCloudProvider(t, 1000, 1000) ctx := newContext(t, cp) - processors := test.NewTestProcessors(&ctx) + processors := processorstest.NewTestProcessors(&ctx) ng := testCase.nodeGroupConfig _, nodes := newNodeGroup(t, cp, ng.Name, ng.Min, ng.Max, ng.Size, ng.CPU, ng.Mem) @@ -157,7 +158,7 @@ func TestApplyLimits(t *testing.T) { for _, testCase := range testCases { cp := testprovider.NewTestCloudProvider(nil, nil) ctx := newContext(t, cp) - processors := test.NewTestProcessors(&ctx) + processors := processorstest.NewTestProcessors(&ctx) ng := testCase.nodeGroupConfig group, nodes := newNodeGroup(t, cp, ng.Name, ng.Min, ng.Max, ng.Size, ng.CPU, ng.Mem) @@ -215,7 +216,7 @@ func TestResourceManagerWithGpuResource(t *testing.T) { provider.SetResourceLimiter(resourceLimiter) context := newContext(t, provider) - processors := test.NewTestProcessors(&context) + processors := processorstest.NewTestProcessors(&context) n1 := newNode(t, "n1", 8, 16) utils_test.AddGpusToNode(n1, 4) diff --git a/cluster-autoscaler/core/static_autoscaler_test.go b/cluster-autoscaler/core/static_autoscaler_test.go index 9cfc0a1107cb..e1786310b740 100644 --- a/cluster-autoscaler/core/static_autoscaler_test.go +++ b/cluster-autoscaler/core/static_autoscaler_test.go @@ -49,6 +49,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupconfig" "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups/asyncnodegroups" "k8s.io/autoscaler/cluster-autoscaler/processors/scaledowncandidates" + processorstest "k8s.io/autoscaler/cluster-autoscaler/processors/test" "k8s.io/autoscaler/cluster-autoscaler/simulator" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules" @@ -163,7 +164,7 @@ func (m *onNodeGroupDeleteMock) Delete(id string) error { func setUpScaleDownActuator(ctx *context.AutoscalingContext, autoscalingOptions config.AutoscalingOptions) { deleteOptions := options.NewNodeDeleteOptions(autoscalingOptions) - ctx.ScaleDownActuator = actuation.NewActuator(ctx, nil, deletiontracker.NewNodeDeletionTracker(0*time.Second), deleteOptions, rules.Default(deleteOptions), NewTestProcessors(ctx).NodeGroupConfigProcessor) + ctx.ScaleDownActuator = actuation.NewActuator(ctx, nil, deletiontracker.NewNodeDeletionTracker(0*time.Second), deleteOptions, rules.Default(deleteOptions), processorstest.NewTestProcessors(ctx).NodeGroupConfigProcessor) } type nodeGroup struct { @@ -283,7 +284,7 @@ func setupAutoscaler(config *autoscalerSetupConfig) (*StaticAutoscaler, error) { ngConfigProcesssor := nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.autoscalingOptions.NodeGroupDefaults) - processors := NewTestProcessors(&context) + processors := processorstest.NewTestProcessors(&context) clusterState := clusterstate.NewClusterStateRegistry(provider, config.clusterStateConfig, context.LogRecorder, NewBackoff(), ngConfigProcesssor, processors.AsyncNodeGroupStateChecker) @@ -380,7 +381,7 @@ func TestStaticAutoscalerRunOnce(t *testing.T) { clusterStateConfig := clusterstate.ClusterStateRegistryConfig{ OkTotalUnreadyCount: 1, } - processors := NewTestProcessors(&context) + processors := processorstest.NewTestProcessors(&context) clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(options.NodeGroupDefaults), processors.AsyncNodeGroupStateChecker) sdPlanner, sdActuator := newScaleDownPlannerAndActuator(&context, processors, clusterState, nil) suOrchestrator := orchestrator.New() @@ -648,7 +649,7 @@ func TestStaticAutoscalerRunOnceWithScaleDownDelayPerNG(t *testing.T) { clusterStateConfig := clusterstate.ClusterStateRegistryConfig{ OkTotalUnreadyCount: 1, } - processors := NewTestProcessors(&context) + processors := processorstest.NewTestProcessors(&context) sddProcessor := scaledowncandidates.NewScaleDownCandidatesDelayProcessor() processors.ScaleStateNotifier.Register(sddProcessor) scaleDownCandidatesComparers := []scaledowncandidates.CandidatesComparer{} @@ -794,7 +795,7 @@ func TestStaticAutoscalerRunOnceWithAutoprovisionedEnabled(t *testing.T) { setUpScaleDownActuator(&context, options) - processors := NewTestProcessors(&context) + processors := processorstest.NewTestProcessors(&context) processors.NodeGroupManager = nodeGroupManager processors.NodeGroupListProcessor = nodeGroupListProcessor @@ -947,7 +948,7 @@ func TestStaticAutoscalerRunOnceWithALongUnregisteredNode(t *testing.T) { clusterStateConfig := clusterstate.ClusterStateRegistryConfig{ OkTotalUnreadyCount: 1, } - processors := NewTestProcessors(&context) + processors := processorstest.NewTestProcessors(&context) clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(options.NodeGroupDefaults), processors.AsyncNodeGroupStateChecker) // broken node detected as unregistered @@ -1106,7 +1107,7 @@ func TestStaticAutoscalerRunOncePodsWithPriorities(t *testing.T) { OkTotalUnreadyCount: 1, } - processors := NewTestProcessors(&context) + processors := processorstest.NewTestProcessors(&context) clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(options.NodeGroupDefaults), processors.AsyncNodeGroupStateChecker) sdPlanner, sdActuator := newScaleDownPlannerAndActuator(&context, processors, clusterState, nil) suOrchestrator := orchestrator.New() @@ -1238,7 +1239,7 @@ func TestStaticAutoscalerRunOnceWithFilteringOnBinPackingEstimator(t *testing.T) OkTotalUnreadyCount: 1, } - processors := NewTestProcessors(&context) + processors := processorstest.NewTestProcessors(&context) clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(options.NodeGroupDefaults), processors.AsyncNodeGroupStateChecker) sdPlanner, sdActuator := newScaleDownPlannerAndActuator(&context, processors, clusterState, nil) @@ -1337,7 +1338,7 @@ func TestStaticAutoscalerRunOnceWithFilteringOnUpcomingNodesEnabledNoScaleUp(t * OkTotalUnreadyCount: 1, } - processors := NewTestProcessors(&context) + processors := processorstest.NewTestProcessors(&context) clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(options.NodeGroupDefaults), processors.AsyncNodeGroupStateChecker) sdPlanner, sdActuator := newScaleDownPlannerAndActuator(&context, processors, clusterState, nil) @@ -1440,7 +1441,7 @@ func TestStaticAutoscalerRunOnceWithUnselectedNodeGroups(t *testing.T) { clusterStateConfig := clusterstate.ClusterStateRegistryConfig{ OkTotalUnreadyCount: 1, } - processors := NewTestProcessors(&context) + processors := processorstest.NewTestProcessors(&context) clusterState := clusterstate.NewClusterStateRegistry(provider, clusterStateConfig, context.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(autoscalingOptions.NodeGroupDefaults), processors.AsyncNodeGroupStateChecker) @@ -2085,14 +2086,14 @@ func TestStaticAutoscalerUpcomingScaleDownCandidates(t *testing.T) { ctx, err := NewScaleTestAutoscalingContext(autoscalingOptions, &fake.Clientset{}, listerRegistry, provider, processorCallbacks, nil) assert.NoError(t, err) - processors := NewTestProcessors(&ctx) + processors := processorstest.NewTestProcessors(&ctx) // Create CSR with unhealthy cluster protection effectively disabled, to guarantee we reach the tested logic. csrConfig := clusterstate.ClusterStateRegistryConfig{OkTotalUnreadyCount: nodeGroupCount * unreadyNodesCount} csr := clusterstate.NewClusterStateRegistry(provider, csrConfig, ctx.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), processors.AsyncNodeGroupStateChecker) // Setting the Actuator is necessary for testing any scale-down logic, it shouldn't have anything to do in this test. - actuator := actuation.NewActuator(&ctx, csr, deletiontracker.NewNodeDeletionTracker(0*time.Second), options.NodeDeleteOptions{}, nil, NewTestProcessors(&ctx).NodeGroupConfigProcessor) + actuator := actuation.NewActuator(&ctx, csr, deletiontracker.NewNodeDeletionTracker(0*time.Second), options.NodeDeleteOptions{}, nil, processorstest.NewTestProcessors(&ctx).NodeGroupConfigProcessor) ctx.ScaleDownActuator = actuator // Fake planner that keeps track of the scale-down candidates passed to UpdateClusterState. diff --git a/cluster-autoscaler/core/test/common.go b/cluster-autoscaler/core/test/common.go index 5eb7cd20709b..49a44476a5c5 100644 --- a/cluster-autoscaler/core/test/common.go +++ b/cluster-autoscaler/core/test/common.go @@ -27,30 +27,17 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/clusterstate/utils" "k8s.io/autoscaler/cluster-autoscaler/config" "k8s.io/autoscaler/cluster-autoscaler/context" - "k8s.io/autoscaler/cluster-autoscaler/core/podlistprocessor" "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/deletiontracker" "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb" "k8s.io/autoscaler/cluster-autoscaler/debuggingsnapshot" "k8s.io/autoscaler/cluster-autoscaler/expander" "k8s.io/autoscaler/cluster-autoscaler/expander/random" "k8s.io/autoscaler/cluster-autoscaler/metrics" - "k8s.io/autoscaler/cluster-autoscaler/observers/nodegroupchange" - "k8s.io/autoscaler/cluster-autoscaler/processors" - "k8s.io/autoscaler/cluster-autoscaler/processors/actionablecluster" - "k8s.io/autoscaler/cluster-autoscaler/processors/binpacking" processor_callbacks "k8s.io/autoscaler/cluster-autoscaler/processors/callbacks" - "k8s.io/autoscaler/cluster-autoscaler/processors/customresources" - "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupconfig" "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups" - "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups/asyncnodegroups" - "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" - "k8s.io/autoscaler/cluster-autoscaler/processors/nodeinfosprovider" - "k8s.io/autoscaler/cluster-autoscaler/processors/nodes" - "k8s.io/autoscaler/cluster-autoscaler/processors/scaledowncandidates" "k8s.io/autoscaler/cluster-autoscaler/processors/status" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker" - "k8s.io/autoscaler/cluster-autoscaler/simulator/scheduling" "k8s.io/autoscaler/cluster-autoscaler/utils/backoff" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" @@ -175,32 +162,6 @@ func ExtractPodNames(pods []*apiv1.Pod) []string { return podNames } -// NewTestProcessors returns a set of simple processors for use in tests. -func NewTestProcessors(context *context.AutoscalingContext) *processors.AutoscalingProcessors { - return &processors.AutoscalingProcessors{ - PodListProcessor: podlistprocessor.NewDefaultPodListProcessor(context.PredicateChecker, scheduling.ScheduleAnywhere), - NodeGroupListProcessor: &nodegroups.NoOpNodeGroupListProcessor{}, - BinpackingLimiter: binpacking.NewTimeLimiter(context.MaxNodeGroupBinpackingDuration), - NodeGroupSetProcessor: nodegroupset.NewDefaultNodeGroupSetProcessor([]string{}, config.NodeGroupDifferenceRatios{}), - ScaleDownSetProcessor: nodes.NewCompositeScaleDownSetProcessor([]nodes.ScaleDownSetProcessor{ - nodes.NewMaxNodesProcessor(), - nodes.NewAtomicResizeFilteringProcessor(), - }), - // TODO(bskiba): change scale up test so that this can be a NoOpProcessor - ScaleUpStatusProcessor: &status.EventingScaleUpStatusProcessor{}, - ScaleDownStatusProcessor: &status.NoOpScaleDownStatusProcessor{}, - AutoscalingStatusProcessor: &status.NoOpAutoscalingStatusProcessor{}, - NodeGroupManager: nodegroups.NewDefaultNodeGroupManager(), - TemplateNodeInfoProvider: nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false), - NodeGroupConfigProcessor: nodegroupconfig.NewDefaultNodeGroupConfigProcessor(context.NodeGroupDefaults), - CustomResourcesProcessor: customresources.NewDefaultCustomResourcesProcessor(), - ActionableClusterProcessor: actionablecluster.NewDefaultActionableClusterProcessor(), - ScaleDownCandidatesNotifier: scaledowncandidates.NewObserversList(), - ScaleStateNotifier: nodegroupchange.NewNodeGroupChangeObserversList(), - AsyncNodeGroupStateChecker: asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker(), - } -} - // NewScaleTestAutoscalingContext creates a new test autoscaling context for scaling tests. func NewScaleTestAutoscalingContext( options config.AutoscalingOptions, fakeClient kube_client.Interface, diff --git a/cluster-autoscaler/processors/nodes/scale_down_context.go b/cluster-autoscaler/processors/nodes/scale_down_context.go new file mode 100644 index 000000000000..fda8ffc1f022 --- /dev/null +++ b/cluster-autoscaler/processors/nodes/scale_down_context.go @@ -0,0 +1,37 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodes + +import ( + "k8s.io/autoscaler/cluster-autoscaler/core/scaledown" + "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/resource" +) + +// ScaleDownContext holds the context for scaling down process +type ScaleDownContext struct { + ActuationStatus scaledown.ActuationStatus + MaxNodeCountToBeRemoved int + ResourcesLeft resource.Limits + ResourcesWithLimits []string +} + +// NewDefaultScaleDownSetContext returns ScaleDownContext with passed MaxNodeCountToBeRemoved +func NewDefaultScaleDownSetContext(MaxNodeCountToBeRemoved int) *ScaleDownContext { + return &ScaleDownContext{ + MaxNodeCountToBeRemoved: MaxNodeCountToBeRemoved, + } +} diff --git a/cluster-autoscaler/processors/nodes/scale_down_set_processor.go b/cluster-autoscaler/processors/nodes/scale_down_set_processor.go index 9c86aadafa8a..c6e858b0cb17 100644 --- a/cluster-autoscaler/processors/nodes/scale_down_set_processor.go +++ b/cluster-autoscaler/processors/nodes/scale_down_set_processor.go @@ -37,12 +37,27 @@ func NewCompositeScaleDownSetProcessor(orderedProcessorList []ScaleDownSetProces } } -// GetNodesToRemove selects nodes to remove. -func (p *CompositeScaleDownSetProcessor) GetNodesToRemove(ctx *context.AutoscalingContext, candidates []simulator.NodeToBeRemoved, maxCount int) []simulator.NodeToBeRemoved { - for _, p := range p.orderedProcessorList { - candidates = p.GetNodesToRemove(ctx, candidates, maxCount) +// FilterUnremovableNodes filters the passed removable candidates from unremovable nodes by calling orderedProcessorList in order +func (p *CompositeScaleDownSetProcessor) FilterUnremovableNodes(ctx *context.AutoscalingContext, scaleDownCtx ScaleDownContext, candidates []simulator.NodeToBeRemoved) ([]simulator.NodeToBeRemoved, []simulator.UnremovableNode) { + + nodesToBeRemoved := []simulator.NodeToBeRemoved{} + unremovableNodes := []simulator.UnremovableNode{} + + for indx, p := range p.orderedProcessorList { + candidsatesOriginalSize := len(candidates) + + processorRemovableNodes, processorUnremovableNodes := p.FilterUnremovableNodes(ctx, scaleDownCtx, candidates) + candidates = processorRemovableNodes + + if len(processorRemovableNodes)+len(processorUnremovableNodes) != candidsatesOriginalSize { + klog.Errorf("Scale deown set composite processor failed with processor at index %d : total number of nodes should be %d found %d to be removed and %d unremovable", + indx, candidsatesOriginalSize, len(processorRemovableNodes), len(processorUnremovableNodes)) + } + + unremovableNodes = append(unremovableNodes, processorUnremovableNodes...) + nodesToBeRemoved = candidates } - return candidates + return nodesToBeRemoved, unremovableNodes } // CleanUp is called at CA termination @@ -56,13 +71,21 @@ func (p *CompositeScaleDownSetProcessor) CleanUp() { type MaxNodesProcessor struct { } -// GetNodesToRemove selects up to maxCount nodes for deletion, by selecting a first maxCount candidates -func (p *MaxNodesProcessor) GetNodesToRemove(ctx *context.AutoscalingContext, candidates []simulator.NodeToBeRemoved, maxCount int) []simulator.NodeToBeRemoved { - end := len(candidates) - if len(candidates) > maxCount { - end = maxCount +// FilterUnremovableNodes selects up to maxCount nodes for deletion, and filters ones more than maxCount to be unremovable +func (p *MaxNodesProcessor) FilterUnremovableNodes(ctx *context.AutoscalingContext, scaleDownCtx ScaleDownContext, candidates []simulator.NodeToBeRemoved) ([]simulator.NodeToBeRemoved, []simulator.UnremovableNode) { + + nodesToBeRemoved := []simulator.NodeToBeRemoved{} + unremovableNodes := []simulator.UnremovableNode{} + + for idx, node := range candidates { + if idx < scaleDownCtx.MaxNodeCountToBeRemoved { + nodesToBeRemoved = append(nodesToBeRemoved, node) + } else { + unremovableNodes = append(unremovableNodes, simulator.UnremovableNode{Node: node.Node, Reason: simulator.NodeGroupMaxDeletionCountReached}) + } } - return candidates[:end] + + return nodesToBeRemoved, unremovableNodes } // CleanUp is called at CA termination @@ -82,21 +105,26 @@ func NewMaxNodesProcessor() *MaxNodesProcessor { type AtomicResizeFilteringProcessor struct { } -// GetNodesToRemove selects up to maxCount nodes for deletion, by selecting a first maxCount candidates -func (p *AtomicResizeFilteringProcessor) GetNodesToRemove(ctx *context.AutoscalingContext, candidates []simulator.NodeToBeRemoved, maxCount int) []simulator.NodeToBeRemoved { +// FilterUnremovableNodes filters candidate nodes that can't be removed when ZeroOrMaxNodeScaling enabled and number of nodes to remove are not equal to target size +func (p *AtomicResizeFilteringProcessor) FilterUnremovableNodes(ctx *context.AutoscalingContext, scaleDownCtx ScaleDownContext, candidates []simulator.NodeToBeRemoved) ([]simulator.NodeToBeRemoved, []simulator.UnremovableNode) { + + nodesToBeRemoved := []simulator.NodeToBeRemoved{} + unremovableNodes := []simulator.UnremovableNode{} + atomicQuota := klogx.NodesLoggingQuota() standardQuota := klogx.NodesLoggingQuota() nodesByGroup := map[cloudprovider.NodeGroup][]simulator.NodeToBeRemoved{} - result := []simulator.NodeToBeRemoved{} for _, node := range candidates { nodeGroup, err := ctx.CloudProvider.NodeGroupForNode(node.Node) if err != nil { klog.Errorf("Node %v will not scale down, failed to get node info: %s", node.Node.Name, err) + unremovableNodes = append(unremovableNodes, simulator.UnremovableNode{Node: node.Node, Reason: simulator.UnexpectedError}) continue } autoscalingOptions, err := nodeGroup.GetOptions(ctx.NodeGroupDefaults) if err != nil && err != cloudprovider.ErrNotImplemented { klog.Errorf("Failed to get autoscaling options for node group %s: %v", nodeGroup.Id(), err) + unremovableNodes = append(unremovableNodes, simulator.UnremovableNode{Node: node.Node, Reason: simulator.UnexpectedError}) continue } if autoscalingOptions != nil && autoscalingOptions.ZeroOrMaxNodeScaling { @@ -104,7 +132,7 @@ func (p *AtomicResizeFilteringProcessor) GetNodesToRemove(ctx *context.Autoscali nodesByGroup[nodeGroup] = append(nodesByGroup[nodeGroup], node) } else { klogx.V(2).UpTo(standardQuota).Infof("Considering node %s for standard scale down", node.Node.Name) - result = append(result, node) + nodesToBeRemoved = append(nodesToBeRemoved, node) } } klogx.V(2).Over(atomicQuota).Infof("Considering %d other nodes for atomic scale down", -atomicQuota.Left()) @@ -113,16 +141,22 @@ func (p *AtomicResizeFilteringProcessor) GetNodesToRemove(ctx *context.Autoscali ngSize, err := nodeGroup.TargetSize() if err != nil { klog.Errorf("Nodes from group %s will not scale down, failed to get target size: %s", nodeGroup.Id(), err) + for _, node := range nodes { + unremovableNodes = append(unremovableNodes, simulator.UnremovableNode{Node: node.Node, Reason: simulator.UnexpectedError}) + } continue } if ngSize == len(nodes) { klog.V(2).Infof("Scheduling atomic scale down for all %v nodes from node group %s", len(nodes), nodeGroup.Id()) - result = append(result, nodes...) + nodesToBeRemoved = append(nodesToBeRemoved, nodes...) } else { klog.V(2).Infof("Skipping scale down for %v nodes from node group %s, all %v nodes have to be scaled down atomically", len(nodes), nodeGroup.Id(), ngSize) + for _, node := range nodes { + unremovableNodes = append(unremovableNodes, simulator.UnremovableNode{Node: node.Node, Reason: simulator.AtomicScaleDownFailed}) + } } } - return result + return nodesToBeRemoved, unremovableNodes } // CleanUp is called at CA termination diff --git a/cluster-autoscaler/processors/nodes/scale_down_set_processor_test.go b/cluster-autoscaler/processors/nodes/scale_down_set_processor_test.go new file mode 100644 index 000000000000..c1c86acb01ea --- /dev/null +++ b/cluster-autoscaler/processors/nodes/scale_down_set_processor_test.go @@ -0,0 +1,183 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodes + +import ( + "testing" + + "github.com/stretchr/testify/assert" + testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test" + "k8s.io/autoscaler/cluster-autoscaler/config" + . "k8s.io/autoscaler/cluster-autoscaler/core/test" + "k8s.io/autoscaler/cluster-autoscaler/simulator" + . "k8s.io/autoscaler/cluster-autoscaler/utils/test" + "k8s.io/client-go/kubernetes/fake" +) + +func TestMaxNodesFilterUnremovableNodes(t *testing.T) { + + testCases := []struct { + name string + candidates []simulator.NodeToBeRemoved + scaleDownContext ScaleDownContext + expectedToBeRemoved []simulator.NodeToBeRemoved + expectedUnremovable []simulator.UnremovableNode + }{ + { + name: "Max nodes with simple limit", + candidates: []simulator.NodeToBeRemoved{ + buildRemovableNode("node-1"), + buildRemovableNode("node-2"), + buildRemovableNode("node-3"), + buildRemovableNode("node-4"), + buildRemovableNode("node-5"), + }, + scaleDownContext: *NewDefaultScaleDownSetContext(3), + expectedToBeRemoved: []simulator.NodeToBeRemoved{ + buildRemovableNode("node-1"), + buildRemovableNode("node-2"), + buildRemovableNode("node-3"), + }, + expectedUnremovable: []simulator.UnremovableNode{ + buildUnremovableNode("node-4", simulator.NodeGroupMaxDeletionCountReached), + buildUnremovableNode("node-5", simulator.NodeGroupMaxDeletionCountReached), + }, + }, + } + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + processor := NewMaxNodesProcessor() + toBeRemoved, unRemovable := processor.FilterUnremovableNodes(nil, tc.scaleDownContext, tc.candidates) + + assert.ElementsMatch(t, tc.expectedToBeRemoved, toBeRemoved) + assert.ElementsMatch(t, tc.expectedUnremovable, unRemovable) + }) + } +} + +func TestAtomicResizeFilterUnremovableNodes(t *testing.T) { + testCases := []struct { + name string + nodeGroups []struct { + nodeGroupName string + nodeGroupSize int + } + nodes []struct { + candidate simulator.NodeToBeRemoved + nodeGroup string + } + scaleDownContext ScaleDownContext + expectedToBeRemoved []simulator.NodeToBeRemoved + expectedUnremovable []simulator.UnremovableNode + }{ + { + name: "Atomic removal", + nodeGroups: []struct { + nodeGroupName string + nodeGroupSize int + }{ + { + nodeGroupName: "ng1", + nodeGroupSize: 3, + }, + { + nodeGroupName: "ng2", + nodeGroupSize: 4, + }, + }, + nodes: []struct { + candidate simulator.NodeToBeRemoved + nodeGroup string + }{ + { + candidate: buildRemovableNode("node-1"), + nodeGroup: "ng1", + }, + { + candidate: buildRemovableNode("node-2"), + nodeGroup: "ng1", + }, + { + candidate: buildRemovableNode("node-3"), + nodeGroup: "ng1", + }, + { + candidate: buildRemovableNode("node-4"), + nodeGroup: "ng2", + }, + { + candidate: buildRemovableNode("node-5"), + nodeGroup: "ng2", + }, + }, + scaleDownContext: *NewDefaultScaleDownSetContext(10), + expectedToBeRemoved: []simulator.NodeToBeRemoved{ + buildRemovableNode("node-1"), + buildRemovableNode("node-2"), + buildRemovableNode("node-3"), + }, + expectedUnremovable: []simulator.UnremovableNode{ + buildUnremovableNode("node-4", simulator.AtomicScaleDownFailed), + buildUnremovableNode("node-5", simulator.AtomicScaleDownFailed), + }, + }, + } + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + processor := NewAtomicResizeFilteringProcessor() + provider := testprovider.NewTestCloudProvider(nil, nil) + for _, ng := range tc.nodeGroups { + provider.AddNodeGroupWithCustomOptions(ng.nodeGroupName, 0, 100, ng.nodeGroupSize, &config.NodeGroupAutoscalingOptions{ + ZeroOrMaxNodeScaling: true, + }) + } + candidates := []simulator.NodeToBeRemoved{} + for _, node := range tc.nodes { + provider.AddNode(node.nodeGroup, node.candidate.Node) + candidates = append(candidates, node.candidate) + } + context, _ := NewScaleTestAutoscalingContext(config.AutoscalingOptions{ + NodeGroupDefaults: config.NodeGroupAutoscalingOptions{}, + }, &fake.Clientset{}, nil, provider, nil, nil) + + toBeRemoved, unRemovable := processor.FilterUnremovableNodes(&context, tc.scaleDownContext, candidates) + + assert.ElementsMatch(t, tc.expectedToBeRemoved, toBeRemoved) + assert.ElementsMatch(t, tc.expectedUnremovable, unRemovable) + }) + } +} + +func buildRemovableNode(name string) simulator.NodeToBeRemoved { + + return simulator.NodeToBeRemoved{ + Node: BuildTestNode(name, 1000, 10), + } +} + +func buildUnremovableNode(name string, reason simulator.UnremovableReason) simulator.UnremovableNode { + return simulator.UnremovableNode{ + Node: BuildTestNode(name, 1000, 10), + Reason: reason, + } +} diff --git a/cluster-autoscaler/processors/nodes/types.go b/cluster-autoscaler/processors/nodes/types.go index 3bc671084fc9..6d89b4856ab9 100644 --- a/cluster-autoscaler/processors/nodes/types.go +++ b/cluster-autoscaler/processors/nodes/types.go @@ -18,7 +18,6 @@ package nodes import ( apiv1 "k8s.io/api/core/v1" - "k8s.io/autoscaler/cluster-autoscaler/context" "k8s.io/autoscaler/cluster-autoscaler/simulator" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" @@ -37,8 +36,8 @@ type ScaleDownNodeProcessor interface { // ScaleDownSetProcessor contains a method to select nodes for deletion type ScaleDownSetProcessor interface { - // GetNodesToRemove selects up to maxCount nodes for deletion - GetNodesToRemove(ctx *context.AutoscalingContext, candidates []simulator.NodeToBeRemoved, maxCount int) []simulator.NodeToBeRemoved + // FilterUnremovableNodes takes removable candidates and filters the unremovable nodes from them + FilterUnremovableNodes(ctx *context.AutoscalingContext, scaleDownCtx ScaleDownContext, candidates []simulator.NodeToBeRemoved) ([]simulator.NodeToBeRemoved, []simulator.UnremovableNode) // CleanUp is called at CA termination CleanUp() } diff --git a/cluster-autoscaler/processors/processors.go b/cluster-autoscaler/processors/processors.go index 69ae6273cc4d..dbeb8fc0fa8d 100644 --- a/cluster-autoscaler/processors/processors.go +++ b/cluster-autoscaler/processors/processors.go @@ -89,6 +89,8 @@ func DefaultProcessors(options config.AutoscalingOptions) *AutoscalingProcessors }), ScaleUpStatusProcessor: status.NewDefaultScaleUpStatusProcessor(), ScaleDownNodeProcessor: nodes.NewPreFilteringScaleDownNodeProcessor(), + // TODO NewMaxNodesProcessor should be called after NewAtomicResizeFilteringProcessor + // to have maximum removable nodes returned by ScaleDownSetProcessor ScaleDownSetProcessor: nodes.NewCompositeScaleDownSetProcessor( []nodes.ScaleDownSetProcessor{ nodes.NewMaxNodesProcessor(), diff --git a/cluster-autoscaler/processors/test/common.go b/cluster-autoscaler/processors/test/common.go new file mode 100644 index 000000000000..be7b2d31edb9 --- /dev/null +++ b/cluster-autoscaler/processors/test/common.go @@ -0,0 +1,180 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package test + +import ( + testcloudprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test" + "k8s.io/autoscaler/cluster-autoscaler/config" + "k8s.io/autoscaler/cluster-autoscaler/context" + "k8s.io/autoscaler/cluster-autoscaler/core/podlistprocessor" + "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/deletiontracker" + "k8s.io/autoscaler/cluster-autoscaler/observers/nodegroupchange" + "k8s.io/autoscaler/cluster-autoscaler/processors" + "k8s.io/autoscaler/cluster-autoscaler/processors/actionablecluster" + "k8s.io/autoscaler/cluster-autoscaler/processors/binpacking" + "k8s.io/autoscaler/cluster-autoscaler/processors/customresources" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupconfig" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups/asyncnodegroups" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodeinfosprovider" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodes" + "k8s.io/autoscaler/cluster-autoscaler/processors/scaledowncandidates" + "k8s.io/autoscaler/cluster-autoscaler/processors/status" + "k8s.io/autoscaler/cluster-autoscaler/simulator/scheduling" + "k8s.io/autoscaler/cluster-autoscaler/utils/errors" + + apiv1 "k8s.io/api/core/v1" + schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" +) + +// NodeConfig is a node config used in tests +type NodeConfig struct { + Name string + Cpu int64 + Memory int64 + Gpu int64 + Ready bool + Group string +} + +// PodConfig is a pod config used in tests +type PodConfig struct { + Name string + Cpu int64 + Memory int64 + Gpu int64 + Node string + ToleratesGpu bool +} + +// GroupSizeChange represents a change in group size +type GroupSizeChange struct { + GroupName string + SizeChange int +} + +// ScaleTestConfig represents a config of a scale test +type ScaleTestConfig struct { + Nodes []NodeConfig + Pods []PodConfig + ExtraPods []PodConfig + Options config.AutoscalingOptions + NodeDeletionTracker *deletiontracker.NodeDeletionTracker + ExpansionOptionToChoose GroupSizeChange // this will be selected by assertingStrategy.BestOption + + ExpectedScaleDowns []string + ExpectedScaleDownCount int +} + +// NodeGroupConfig is a node group config used in tests +type NodeGroupConfig struct { + Name string + MinSize int + MaxSize int +} + +// NodeTemplateConfig is a structure to provide node info in tests +type NodeTemplateConfig struct { + MachineType string + NodeInfo *schedulerframework.NodeInfo + NodeGroupName string +} + +// ScaleUpTestConfig represents a config of a scale test +type ScaleUpTestConfig struct { + Groups []NodeGroupConfig + Nodes []NodeConfig + Pods []PodConfig + ExtraPods []PodConfig + OnScaleUp testcloudprovider.OnScaleUpFunc + OnCreateGroup testcloudprovider.OnNodeGroupCreateFunc + ExpansionOptionToChoose *GroupSizeChange + Options *config.AutoscalingOptions + NodeTemplateConfigs map[string]*NodeTemplateConfig + EnableAutoprovisioning bool + AllOrNothing bool +} + +// ScaleUpTestResult represents a node groups scale up result +type ScaleUpTestResult struct { + ScaleUpError errors.AutoscalerError + ScaleUpStatus ScaleUpStatusInfo + GroupSizeChanges []GroupSizeChange + ExpansionOptions []GroupSizeChange + Events []string + GroupTargetSizes map[string]int +} + +// ScaleTestResults contains results of a scale test +type ScaleTestResults struct { + ExpansionOptions []GroupSizeChange + GroupTargetSizes map[string]int + FinalOption GroupSizeChange + NoScaleUpReason string + FinalScaleDowns []string + Events []string + ScaleUpStatus ScaleUpStatusInfo +} + +// ScaleUpStatusInfo is a simplified form of a ScaleUpStatus, to avoid mocking actual NodeGroup and Pod objects in test config. +type ScaleUpStatusInfo struct { + Result status.ScaleUpResult + PodsTriggeredScaleUp []string + PodsRemainUnschedulable []string + PodsAwaitEvaluation []string +} + +// WasSuccessful returns true iff scale up was successful +func (s *ScaleUpStatusInfo) WasSuccessful() bool { + return s.Result == status.ScaleUpSuccessful +} + +// ExtractPodNames extract pod names from a list of pods +func ExtractPodNames(pods []*apiv1.Pod) []string { + podNames := []string{} + for _, pod := range pods { + podNames = append(podNames, pod.Name) + } + return podNames +} + +// NewTestProcessors returns a set of simple processors for use in tests. +func NewTestProcessors(context *context.AutoscalingContext) *processors.AutoscalingProcessors { + return &processors.AutoscalingProcessors{ + PodListProcessor: podlistprocessor.NewDefaultPodListProcessor(context.PredicateChecker, scheduling.ScheduleAnywhere), + NodeGroupListProcessor: &nodegroups.NoOpNodeGroupListProcessor{}, + BinpackingLimiter: binpacking.NewTimeLimiter(context.MaxNodeGroupBinpackingDuration), + NodeGroupSetProcessor: nodegroupset.NewDefaultNodeGroupSetProcessor([]string{}, config.NodeGroupDifferenceRatios{}), + ScaleDownSetProcessor: nodes.NewCompositeScaleDownSetProcessor([]nodes.ScaleDownSetProcessor{ + nodes.NewMaxNodesProcessor(), + nodes.NewAtomicResizeFilteringProcessor(), + }), + // TODO(bskiba): change scale up test so that this can be a NoOpProcessor + ScaleUpStatusProcessor: &status.EventingScaleUpStatusProcessor{}, + ScaleDownStatusProcessor: &status.NoOpScaleDownStatusProcessor{}, + AutoscalingStatusProcessor: &status.NoOpAutoscalingStatusProcessor{}, + NodeGroupManager: nodegroups.NewDefaultNodeGroupManager(), + TemplateNodeInfoProvider: nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false), + NodeGroupConfigProcessor: nodegroupconfig.NewDefaultNodeGroupConfigProcessor(context.NodeGroupDefaults), + CustomResourcesProcessor: customresources.NewDefaultCustomResourcesProcessor(), + ActionableClusterProcessor: actionablecluster.NewDefaultActionableClusterProcessor(), + ScaleDownCandidatesNotifier: scaledowncandidates.NewObserversList(), + ScaleStateNotifier: nodegroupchange.NewNodeGroupChangeObserversList(), + AsyncNodeGroupStateChecker: asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker(), + } +} diff --git a/cluster-autoscaler/provisioningrequest/orchestrator/orchestrator_test.go b/cluster-autoscaler/provisioningrequest/orchestrator/orchestrator_test.go index eaa3a56fb4f6..aafb10131342 100644 --- a/cluster-autoscaler/provisioningrequest/orchestrator/orchestrator_test.go +++ b/cluster-autoscaler/provisioningrequest/orchestrator/orchestrator_test.go @@ -26,7 +26,7 @@ import ( appsv1 "k8s.io/api/apps/v1" apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1" + v1 "k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1" testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test" "k8s.io/autoscaler/cluster-autoscaler/clusterstate" "k8s.io/autoscaler/cluster-autoscaler/config" @@ -35,6 +35,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupconfig" "k8s.io/autoscaler/cluster-autoscaler/processors/nodeinfosprovider" "k8s.io/autoscaler/cluster-autoscaler/processors/status" + processorstest "k8s.io/autoscaler/cluster-autoscaler/processors/test" "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/besteffortatomic" "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/checkcapacity" "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/pods" @@ -297,7 +298,7 @@ func setupTest(t *testing.T, nodes []*apiv1.Node, prs []*provreqwrapper.Provisio clustersnapshot.InitializeClusterSnapshotOrDie(t, autoscalingContext.ClusterSnapshot, nodes, nil) client := provreqclient.NewFakeProvisioningRequestClient(context.Background(), t, prs...) - processors := NewTestProcessors(&autoscalingContext) + processors := processorstest.NewTestProcessors(&autoscalingContext) if autoprovisioning { processors.NodeGroupListProcessor = &MockAutoprovisioningNodeGroupListProcessor{T: t} processors.NodeGroupManager = &MockAutoprovisioningNodeGroupManager{T: t, ExtraGroups: 2} diff --git a/cluster-autoscaler/simulator/cluster.go b/cluster-autoscaler/simulator/cluster.go index 52ceacff8a37..dbd539610044 100644 --- a/cluster-autoscaler/simulator/cluster.go +++ b/cluster-autoscaler/simulator/cluster.go @@ -73,6 +73,10 @@ const ( NotUnreadyLongEnough // NodeGroupMinSizeReached - node can't be removed because its node group is at its minimal size already. NodeGroupMinSizeReached + // NodeGroupMaxDeletionCountReached - node can't be removed because max node count to be removed reached + NodeGroupMaxDeletionCountReached + // AtomicScaleDownFailed - node can't be removed as node group has ZeroOrMaxNodeScaling enabled and number of nodes to remove are not equal to target size + AtomicScaleDownFailed // MinimalResourceLimitExceeded - node can't be removed because it would violate cluster-wide minimal resource limits. MinimalResourceLimitExceeded // CurrentlyBeingDeleted - node can't be removed because it's already in the process of being deleted.