Merge pull request #5672 from vadasambar/feat/5399/ignore-daemonsets-…

…utilization-per-nodegroup feat: set `IgnoreDaemonSetsUtilization` per nodegroup for AWS
kubernetes · Jul 12, 2023 · c6893e9 · c6893e9
2 parents d7fb388 + 8a73d8e
commit c6893e9
Show file tree

Hide file tree

Showing 16 changed files with 608 additions and 307 deletions.
diff --git a/cluster-autoscaler/cloudprovider/aws/README.md b/cluster-autoscaler/cloudprovider/aws/README.md
@@ -246,6 +246,8 @@ as string). Currently supported autoscaling options (and example values) are:
   (overrides `--scale-down-unneeded-time` value for that specific ASG)
 * `k8s.io/cluster-autoscaler/node-template/autoscaling-options/scaledownunreadytime`: `20m0s`
   (overrides `--scale-down-unready-time` value for that specific ASG)
+* `k8s.io/cluster-autoscaler/node-template/autoscaling-options/ignoredaemonsetsutilization`: `true`
+  (overrides `--ignore-daemonsets-utilization` value for that specific ASG) 
 
 **NOTE:** It is your responsibility to ensure such labels and/or taints are
 applied via the node's kubelet configuration at startup. Cluster Autoscaler will not set the node taints for you.

diff --git a/cluster-autoscaler/cloudprovider/aws/aws_manager.go b/cluster-autoscaler/cloudprovider/aws/aws_manager.go
@@ -245,6 +245,15 @@ func (m *AwsManager) GetAsgOptions(asg asg, defaults config.NodeGroupAutoscaling
 		}
 	}
 
+	if stringOpt, found := options[config.DefaultIgnoreDaemonSetsUtilizationKey]; found {
+		if opt, err := strconv.ParseBool(stringOpt); err != nil {
+			klog.Warningf("failed to convert asg %s %s tag to bool: %v",
+				asg.Name, config.DefaultIgnoreDaemonSetsUtilizationKey, err)
+		} else {
+			defaults.IgnoreDaemonSetsUtilization = opt
+		}
+	}
+
 	return &defaults
 }
 

diff --git a/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go b/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go
@@ -130,6 +130,7 @@ func TestGetAsgOptions(t *testing.T) {
 		ScaleDownGpuUtilizationThreshold: 0.2,
 		ScaleDownUnneededTime:            time.Second,
 		ScaleDownUnreadyTime:             time.Minute,
+		IgnoreDaemonSetsUtilization:      false,
 	}
 
 	tests := []struct {
@@ -145,39 +146,60 @@ func TestGetAsgOptions(t *testing.T) {
 		{
 			description: "keep defaults on invalid tags values",
 			tags: map[string]string{
-				"scaledownutilizationthreshold": "not-a-float",
-				"scaledownunneededtime":         "not-a-duration",
-				"ScaleDownUnreadyTime":          "",
+				config.DefaultScaleDownUtilizationThresholdKey: "not-a-float",
+				config.DefaultScaleDownUnneededTimeKey:         "not-a-duration",
+				"ScaleDownUnreadyTime":                         "",
+				config.DefaultIgnoreDaemonSetsUtilizationKey:   "not-a-bool",
 			},
 			expected: &defaultOptions,
 		},
 		{
 			description: "use provided tags and fill missing with defaults",
 			tags: map[string]string{
-				"scaledownutilizationthreshold": "0.42",
-				"scaledownunneededtime":         "1h",
+				config.DefaultScaleDownUtilizationThresholdKey: "0.42",
+				config.DefaultScaleDownUnneededTimeKey:         "1h",
+				config.DefaultIgnoreDaemonSetsUtilizationKey:   "true",
 			},
 			expected: &config.NodeGroupAutoscalingOptions{
 				ScaleDownUtilizationThreshold:    0.42,
 				ScaleDownGpuUtilizationThreshold: defaultOptions.ScaleDownGpuUtilizationThreshold,
 				ScaleDownUnneededTime:            time.Hour,
 				ScaleDownUnreadyTime:             defaultOptions.ScaleDownUnreadyTime,
+				IgnoreDaemonSetsUtilization:      true,
+			},
+		},
+		{
+			description: "use provided tags (happy path)",
+			tags: map[string]string{
+				config.DefaultScaleDownUtilizationThresholdKey:    "0.42",
+				config.DefaultScaleDownUnneededTimeKey:            "1h",
+				config.DefaultScaleDownGpuUtilizationThresholdKey: "0.7",
+				config.DefaultScaleDownUnreadyTimeKey:             "25m",
+				config.DefaultIgnoreDaemonSetsUtilizationKey:      "true",
+			},
+			expected: &config.NodeGroupAutoscalingOptions{
+				ScaleDownUtilizationThreshold:    0.42,
+				ScaleDownGpuUtilizationThreshold: 0.7,
+				ScaleDownUnneededTime:            time.Hour,
+				ScaleDownUnreadyTime:             25 * time.Minute,
+				IgnoreDaemonSetsUtilization:      true,
 			},
 		},
 		{
 			description: "ignore unknown tags",
 			tags: map[string]string{
-				"scaledownutilizationthreshold":    "0.6",
-				"scaledowngpuutilizationthreshold": "0.7",
-				"scaledownunneededtime":            "1m",
-				"scaledownunreadytime":             "1h",
-				"notyetspecified":                  "42",
+				config.DefaultScaleDownUtilizationThresholdKey:    "0.6",
+				config.DefaultScaleDownGpuUtilizationThresholdKey: "0.7",
+				config.DefaultScaleDownUnneededTimeKey:            "1m",
+				config.DefaultScaleDownUnreadyTimeKey:             "1h",
+				"notyetspecified":                                 "42",
 			},
 			expected: &config.NodeGroupAutoscalingOptions{
 				ScaleDownUtilizationThreshold:    0.6,
 				ScaleDownGpuUtilizationThreshold: 0.7,
 				ScaleDownUnneededTime:            time.Minute,
 				ScaleDownUnreadyTime:             time.Hour,
+				IgnoreDaemonSetsUtilization:      false,
 			},
 		},
 	}

diff --git a/cluster-autoscaler/config/autoscaling_options.go b/cluster-autoscaler/config/autoscaling_options.go
@@ -48,6 +48,8 @@ type NodeGroupAutoscalingOptions struct {
 	MaxNodeProvisionTime time.Duration
 	// ZeroOrMaxNodeScaling means that a node group should be scaled up to maximum size or down to zero nodes all at once instead of one-by-one.
 	ZeroOrMaxNodeScaling bool
+	// IgnoreDaemonSetsUtilization sets if daemonsets utilization should be considered during node scale-down
+	IgnoreDaemonSetsUtilization bool
 }
 
 // GCEOptions contain autoscaling options specific to GCE cloud provider.
@@ -117,8 +119,6 @@ type AutoscalingOptions struct {
 	GRPCExpanderCert string
 	// GRPCExpanderURL is the url of the gRPC server when using the gRPC expander
 	GRPCExpanderURL string
-	// IgnoreDaemonSetsUtilization is whether CA will ignore DaemonSet pods when calculating resource utilization for scaling down
-	IgnoreDaemonSetsUtilization bool
 	// IgnoreMirrorPodsUtilization is whether CA will ignore Mirror pods when calculating resource utilization for scaling down
 	IgnoreMirrorPodsUtilization bool
 	// MaxGracefulTerminationSec is maximum number of seconds scale down waits for pods to terminate before

diff --git a/cluster-autoscaler/config/const.go b/cluster-autoscaler/config/const.go
@@ -16,6 +16,8 @@ limitations under the License.
 
 package config
 
+import "time"
+
 const (
 	// DefaultMaxClusterCores is the default maximum number of cores in the cluster.
 	DefaultMaxClusterCores = 5000 * 64
@@ -32,4 +34,14 @@ const (
 	DefaultScaleDownUnreadyTimeKey = "scaledownunreadytime"
 	// DefaultMaxNodeProvisionTimeKey identifies MaxNodeProvisionTime autoscaling option
 	DefaultMaxNodeProvisionTimeKey = "maxnodeprovisiontime"
+	// DefaultIgnoreDaemonSetsUtilizationKey identifies IgnoreDaemonSetsUtilization autoscaling option
+	DefaultIgnoreDaemonSetsUtilizationKey = "ignoredaemonsetsutilization"
+	// DefaultScaleDownUnneededTime identifies ScaleDownUnneededTime autoscaling option
+	DefaultScaleDownUnneededTime = 10 * time.Minute
+	// DefaultScaleDownUnreadyTime identifies ScaleDownUnreadyTime autoscaling option
+	DefaultScaleDownUnreadyTime = 20 * time.Minute
+	// DefaultScaleDownUtilizationThreshold identifies ScaleDownUtilizationThreshold autoscaling option
+	DefaultScaleDownUtilizationThreshold = 0.5
+	// DefaultScaleDownGpuUtilizationThreshold identifies ScaleDownGpuUtilizationThreshold autoscaling option
+	DefaultScaleDownGpuUtilizationThreshold = 0.5
 )
diff --git a/cluster-autoscaler/core/scaledown/actuation/actuator.go b/cluster-autoscaler/core/scaledown/actuation/actuator.go
@@ -52,10 +52,18 @@ type Actuator struct {
 	// This is a larger change to the code structure which impacts some existing actuator unit tests
 	// as well as Cluster Autoscaler implementations that may override ScaleDownSetProcessor
 	budgetProcessor *budgets.ScaleDownBudgetProcessor
+	configGetter    actuatorNodeGroupConfigGetter
+}
+
+// actuatorNodeGroupConfigGetter is an interface to limit the functions that can be used
+// from NodeGroupConfigProcessor interface
+type actuatorNodeGroupConfigGetter interface {
+	// GetIgnoreDaemonSetsUtilization returns IgnoreDaemonSetsUtilization value that should be used for a given NodeGroup.
+	GetIgnoreDaemonSetsUtilization(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (bool, error)
 }
 
 // NewActuator returns a new instance of Actuator.
-func NewActuator(ctx *context.AutoscalingContext, csr *clusterstate.ClusterStateRegistry, ndt *deletiontracker.NodeDeletionTracker, deleteOptions simulator.NodeDeleteOptions) *Actuator {
+func NewActuator(ctx *context.AutoscalingContext, csr *clusterstate.ClusterStateRegistry, ndt *deletiontracker.NodeDeletionTracker, deleteOptions simulator.NodeDeleteOptions, configGetter actuatorNodeGroupConfigGetter) *Actuator {
 	ndb := NewNodeDeletionBatcher(ctx, csr, ndt, ctx.NodeDeletionBatcherInterval)
 	return &Actuator{
 		ctx:                   ctx,
@@ -64,6 +72,7 @@ func NewActuator(ctx *context.AutoscalingContext, csr *clusterstate.ClusterState
 		nodeDeletionScheduler: NewGroupDeletionScheduler(ctx, ndt, ndb, NewDefaultEvictor(deleteOptions, ndt)),
 		budgetProcessor:       budgets.NewScaleDownBudgetProcessor(ctx),
 		deleteOptions:         deleteOptions,
+		configGetter:          configGetter,
 	}
 }
 
@@ -263,8 +272,14 @@ func (a *Actuator) scaleDownNodeToReport(node *apiv1.Node, drain bool) (*status.
 	if err != nil {
 		return nil, err
 	}
+
+	ignoreDaemonSetsUtilization, err := a.configGetter.GetIgnoreDaemonSetsUtilization(a.ctx, nodeGroup)
+	if err != nil {
+		return nil, err
+	}
+
 	gpuConfig := a.ctx.CloudProvider.GetNodeGpuConfig(node)
-	utilInfo, err := utilization.Calculate(nodeInfo, a.ctx.IgnoreDaemonSetsUtilization, a.ctx.IgnoreMirrorPodsUtilization, gpuConfig, time.Now())
+	utilInfo, err := utilization.Calculate(nodeInfo, ignoreDaemonSetsUtilization, a.ctx.IgnoreMirrorPodsUtilization, gpuConfig, time.Now())
 	if err != nil {
 		return nil, err
 	}