Skip to content

Commit

Permalink
Merge pull request #4654 from calvix/do-not-set-desired-when-updating…
Browse files Browse the repository at this point in the history
…-asg-with-external-scaler

🐛 Update ASG - do not set desired value for machinepool which have externally managed replicas
  • Loading branch information
k8s-ci-robot authored Nov 28, 2023
2 parents b4506f8 + 4d33ed1 commit c9b9276
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 48 deletions.
5 changes: 3 additions & 2 deletions exp/controllers/awsmachinepool_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import (
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/conditions"
"sigs.k8s.io/cluster-api/util/predicates"
)
Expand Down Expand Up @@ -276,7 +277,7 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP
return nil
}

if scope.ReplicasExternallyManaged(machinePoolScope.MachinePool) {
if annotations.ReplicasManagedByExternalAutoscaler(machinePoolScope.MachinePool) {
// Set MachinePool replicas to the ASG DesiredCapacity
if *machinePoolScope.MachinePool.Spec.Replicas != *asg.DesiredCapacity {
machinePoolScope.Info("Setting MachinePool replicas to ASG DesiredCapacity",
Expand Down Expand Up @@ -504,7 +505,7 @@ func (r *AWSMachinePoolReconciler) findASG(machinePoolScope *scope.MachinePoolSc
func diffASG(machinePoolScope *scope.MachinePoolScope, existingASG *expinfrav1.AutoScalingGroup) string {
detectedMachinePoolSpec := machinePoolScope.MachinePool.Spec.DeepCopy()

if !scope.ReplicasExternallyManaged(machinePoolScope.MachinePool) {
if !annotations.ReplicasManagedByExternalAutoscaler(machinePoolScope.MachinePool) {
detectedMachinePoolSpec.Replicas = existingASG.DesiredCapacity
}
if diff := cmp.Diff(machinePoolScope.MachinePool.Spec, *detectedMachinePoolSpec); diff != "" {
Expand Down
4 changes: 2 additions & 2 deletions exp/controllers/awsmachinepool_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) {
ec2Svc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil).AnyTimes()

ms.MachinePool.Annotations = map[string]string{
scope.ReplicasManagedByAnnotation: scope.ExternalAutoscalerReplicasManagedByAnnotationValue,
clusterv1.ReplicasManagedByAnnotation: "somehow-externally-managed",
}
ms.MachinePool.Spec.Replicas = pointer.Int32(0)

Expand Down Expand Up @@ -908,7 +908,7 @@ func TestDiffASG(t *testing.T) {
MachinePool: &expclusterv1.MachinePool{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{
scope.ReplicasManagedByAnnotation: scope.ExternalAutoscalerReplicasManagedByAnnotationValue,
clusterv1.ReplicasManagedByAnnotation: "", // empty value counts as true (= externally managed)
},
},
Spec: expclusterv1.MachinePoolSpec{
Expand Down
20 changes: 0 additions & 20 deletions pkg/cloud/scope/machinepool.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,21 +43,6 @@ import (
"sigs.k8s.io/cluster-api/util/patch"
)

const (
// ReplicasManagedByAnnotation is an annotation that indicates external (non-Cluster API) management of infra scaling.
// The practical effect of this is that the capi "replica" count is derived from the number of observed infra machines,
// instead of being a source of truth for eventual consistency.
//
// N.B. this is to be replaced by a direct reference to CAPI once https://github.com/kubernetes-sigs/cluster-api/pull/7107 is meged.
ReplicasManagedByAnnotation = "cluster.x-k8s.io/replicas-managed-by"

// ExternalAutoscalerReplicasManagedByAnnotationValue is used with the "cluster.x-k8s.io/replicas-managed-by" annotation
// to indicate an external autoscaler enforces replica count.
//
// N.B. this is to be replaced by a direct reference to CAPI once https://github.com/kubernetes-sigs/cluster-api/pull/7107 is meged.
ExternalAutoscalerReplicasManagedByAnnotationValue = "external-autoscaler"
)

// MachinePoolScope defines a scope defined around a machine and its cluster.
type MachinePoolScope struct {
logger.Logger
Expand Down Expand Up @@ -404,8 +389,3 @@ func (m *MachinePoolScope) LaunchTemplateName() string {
func (m *MachinePoolScope) GetRuntimeObject() runtime.Object {
return m.AWSMachinePool
}

func ReplicasExternallyManaged(mp *expclusterv1.MachinePool) bool {
val, ok := mp.Annotations[ReplicasManagedByAnnotation]
return ok && val == ExternalAutoscalerReplicasManagedByAnnotationValue
}
29 changes: 15 additions & 14 deletions pkg/cloud/services/autoscaling/autoscalinggroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/converters"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/record"
"sigs.k8s.io/cluster-api/util/annotations"
)

// SDKToAutoScalingGroup converts an AWS EC2 SDK AutoScalingGroup to the CAPA AutoScalingGroup type.
Expand All @@ -46,7 +47,7 @@ func (s *Service) SDKToAutoScalingGroup(v *autoscaling.Group) (*expinfrav1.AutoS
MaxSize: int32(aws.Int64Value(v.MaxSize)),
MinSize: int32(aws.Int64Value(v.MinSize)),
CapacityRebalance: aws.BoolValue(v.CapacityRebalance),
//TODO: determine what additional values go here and what else should be in the struct
// TODO: determine what additional values go here and what else should be in the struct
}

if v.VPCZoneIdentifier != nil {
Expand Down Expand Up @@ -177,7 +178,7 @@ func (s *Service) CreateASG(machinePoolScope *scope.MachinePoolScope) (*expinfra
// Ignore the problem for externally managed clusters because MachinePool replicas will be updated to the right value automatically.
if mpReplicas >= machinePoolScope.AWSMachinePool.Spec.MinSize && mpReplicas <= machinePoolScope.AWSMachinePool.Spec.MaxSize {
input.DesiredCapacity = &mpReplicas
} else if !scope.ReplicasExternallyManaged(machinePoolScope.MachinePool) {
} else if !annotations.ReplicasManagedByExternalAutoscaler(machinePoolScope.MachinePool) {
return nil, fmt.Errorf("incorrect number of replicas %d in MachinePool %v", mpReplicas, machinePoolScope.MachinePool.Name)
}

Expand Down Expand Up @@ -284,35 +285,35 @@ func (s *Service) DeleteASG(name string) error {
}

// UpdateASG will update the ASG of a service.
func (s *Service) UpdateASG(scope *scope.MachinePoolScope) error {
subnetIDs, err := s.SubnetIDs(scope)
func (s *Service) UpdateASG(machinePoolScope *scope.MachinePoolScope) error {
subnetIDs, err := s.SubnetIDs(machinePoolScope)
if err != nil {
return fmt.Errorf("getting subnets for ASG: %w", err)
}

input := &autoscaling.UpdateAutoScalingGroupInput{
AutoScalingGroupName: aws.String(scope.Name()), //TODO: define dynamically - borrow logic from ec2
MaxSize: aws.Int64(int64(scope.AWSMachinePool.Spec.MaxSize)),
MinSize: aws.Int64(int64(scope.AWSMachinePool.Spec.MinSize)),
AutoScalingGroupName: aws.String(machinePoolScope.Name()), // TODO: define dynamically - borrow logic from ec2
MaxSize: aws.Int64(int64(machinePoolScope.AWSMachinePool.Spec.MaxSize)),
MinSize: aws.Int64(int64(machinePoolScope.AWSMachinePool.Spec.MinSize)),
VPCZoneIdentifier: aws.String(strings.Join(subnetIDs, ",")),
CapacityRebalance: aws.Bool(scope.AWSMachinePool.Spec.CapacityRebalance),
CapacityRebalance: aws.Bool(machinePoolScope.AWSMachinePool.Spec.CapacityRebalance),
}

if scope.MachinePool.Spec.Replicas != nil {
input.DesiredCapacity = aws.Int64(int64(*scope.MachinePool.Spec.Replicas))
if machinePoolScope.MachinePool.Spec.Replicas != nil && !annotations.ReplicasManagedByExternalAutoscaler(machinePoolScope.MachinePool) {
input.DesiredCapacity = aws.Int64(int64(*machinePoolScope.MachinePool.Spec.Replicas))
}

if scope.AWSMachinePool.Spec.MixedInstancesPolicy != nil {
input.MixedInstancesPolicy = createSDKMixedInstancesPolicy(scope.Name(), scope.AWSMachinePool.Spec.MixedInstancesPolicy)
if machinePoolScope.AWSMachinePool.Spec.MixedInstancesPolicy != nil {
input.MixedInstancesPolicy = createSDKMixedInstancesPolicy(machinePoolScope.Name(), machinePoolScope.AWSMachinePool.Spec.MixedInstancesPolicy)
} else {
input.LaunchTemplate = &autoscaling.LaunchTemplateSpecification{
LaunchTemplateId: aws.String(scope.AWSMachinePool.Status.LaunchTemplateID),
LaunchTemplateId: aws.String(machinePoolScope.AWSMachinePool.Status.LaunchTemplateID),
Version: aws.String(expinfrav1.LaunchTemplateLatestVersion),
}
}

if _, err := s.ASGClient.UpdateAutoScalingGroupWithContext(context.TODO(), input); err != nil {
return errors.Wrapf(err, "failed to update ASG %q", scope.Name())
return errors.Wrapf(err, "failed to update ASG %q", machinePoolScope.Name())
}

return nil
Expand Down
48 changes: 40 additions & 8 deletions pkg/cloud/services/autoscaling/autoscalinggroup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
. "github.com/onsi/gomega"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/utils/pointer"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

Expand Down Expand Up @@ -570,7 +571,7 @@ func TestServiceCreateASG(t *testing.T) {
mps.AWSMachinePool.Spec.MaxSize = 5
mps.MachinePool.Spec.Replicas = aws.Int32(1)
mps.MachinePool.Annotations = map[string]string{
scope.ReplicasManagedByAnnotation: scope.ExternalAutoscalerReplicasManagedByAnnotationValue,
clusterv1.ReplicasManagedByAnnotation: "", // empty value counts as true (= externally managed)
}
},
wantErr: false,
Expand All @@ -592,7 +593,7 @@ func TestServiceCreateASG(t *testing.T) {
mps.AWSMachinePool.Spec.MaxSize = 5
mps.MachinePool.Spec.Replicas = aws.Int32(6)
mps.MachinePool.Annotations = map[string]string{
scope.ReplicasManagedByAnnotation: scope.ExternalAutoscalerReplicasManagedByAnnotationValue,
clusterv1.ReplicasManagedByAnnotation: "truthy",
}
},
wantErr: false,
Expand Down Expand Up @@ -699,17 +700,26 @@ func TestServiceUpdateASG(t *testing.T) {
machinePoolName string
setupMachinePoolScope func(*scope.MachinePoolScope)
wantErr bool
expect func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder)
expect func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder, g *WithT)
}{
{
name: "should return without error if update ASG is successful",
machinePoolName: "update-asg-success",
wantErr: false,
setupMachinePoolScope: func(mps *scope.MachinePoolScope) {
mps.AWSMachinePool.Spec.Subnets = nil
mps.MachinePool.Spec.Replicas = pointer.Int32(3)
mps.AWSMachinePool.Spec.MinSize = 2
mps.AWSMachinePool.Spec.MaxSize = 5
},
expect: func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder) {
m.UpdateAutoScalingGroupWithContext(context.TODO(), gomock.AssignableToTypeOf(&autoscaling.UpdateAutoScalingGroupInput{})).Return(&autoscaling.UpdateAutoScalingGroupOutput{}, nil)
expect: func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder, g *WithT) {
m.UpdateAutoScalingGroupWithContext(context.TODO(), gomock.AssignableToTypeOf(&autoscaling.UpdateAutoScalingGroupInput{})).DoAndReturn(func(ctx context.Context, input *autoscaling.UpdateAutoScalingGroupInput, options ...request.Option) (*autoscaling.UpdateAutoScalingGroupOutput, error) {
// CAPA should set min/max, and because there's no "externally managed" annotation, also the
// "desired" number of instances
g.Expect(input.MinSize).To(BeComparableTo(pointer.Int64(2)))
g.Expect(input.MaxSize).To(BeComparableTo(pointer.Int64(5)))
g.Expect(input.DesiredCapacity).To(BeComparableTo(pointer.Int64(3)))
return &autoscaling.UpdateAutoScalingGroupOutput{}, nil
})
},
},
{
Expand All @@ -719,10 +729,31 @@ func TestServiceUpdateASG(t *testing.T) {
setupMachinePoolScope: func(mps *scope.MachinePoolScope) {
mps.AWSMachinePool.Spec.MixedInstancesPolicy = nil
},
expect: func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder) {
expect: func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder, g *WithT) {
m.UpdateAutoScalingGroupWithContext(context.TODO(), gomock.AssignableToTypeOf(&autoscaling.UpdateAutoScalingGroupInput{})).Return(nil, awserrors.NewFailedDependency("dependency failure"))
},
},
{
name: "externally managed replicas annotation",
machinePoolName: "update-asg-externally-managed-replicas-annotation",
wantErr: false,
setupMachinePoolScope: func(mps *scope.MachinePoolScope) {
mps.MachinePool.SetAnnotations(map[string]string{clusterv1.ReplicasManagedByAnnotation: "anything-that-is-not-false"})

mps.MachinePool.Spec.Replicas = pointer.Int32(40)
mps.AWSMachinePool.Spec.MinSize = 20
mps.AWSMachinePool.Spec.MaxSize = 50
},
expect: func(e *mocks.MockEC2APIMockRecorder, m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder, g *WithT) {
m.UpdateAutoScalingGroupWithContext(context.TODO(), gomock.AssignableToTypeOf(&autoscaling.UpdateAutoScalingGroupInput{})).DoAndReturn(func(ctx context.Context, input *autoscaling.UpdateAutoScalingGroupInput, options ...request.Option) (*autoscaling.UpdateAutoScalingGroupOutput, error) {
// CAPA should set min/max, but not the externally managed "desired" number of instances
g.Expect(input.MinSize).To(BeComparableTo(pointer.Int64(20)))
g.Expect(input.MaxSize).To(BeComparableTo(pointer.Int64(50)))
g.Expect(input.DesiredCapacity).To(BeNil())
return &autoscaling.UpdateAutoScalingGroupOutput{}, nil
})
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand All @@ -733,13 +764,14 @@ func TestServiceUpdateASG(t *testing.T) {
g.Expect(err).ToNot(HaveOccurred())
ec2Mock := mocks.NewMockEC2API(mockCtrl)
asgMock := mock_autoscalingiface.NewMockAutoScalingAPI(mockCtrl)
tt.expect(ec2Mock.EXPECT(), asgMock.EXPECT())
tt.expect(ec2Mock.EXPECT(), asgMock.EXPECT(), g)
s := NewService(clusterScope)
s.ASGClient = asgMock

mps, err := getMachinePoolScope(fakeClient, clusterScope)
g.Expect(err).ToNot(HaveOccurred())
mps.AWSMachinePool.Name = tt.machinePoolName
tt.setupMachinePoolScope(mps)

err = s.UpdateASG(mps)
checkErr(tt.wantErr, err, g)
Expand Down
4 changes: 2 additions & 2 deletions pkg/cloud/services/eks/nodegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ import (
expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/awserrors"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/converters"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/wait"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/record"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/util/annotations"
)

func (s *NodegroupService) describeNodegroup() (*eks.Nodegroup, error) {
Expand Down Expand Up @@ -533,7 +533,7 @@ func (s *NodegroupService) reconcileNodegroup(ctx context.Context) error {
break
}

if scope.ReplicasExternallyManaged(s.scope.MachinePool) {
if annotations.ReplicasManagedByExternalAutoscaler(s.scope.MachinePool) {
// Set MachinePool replicas to the node group DesiredCapacity
ngDesiredCapacity := int32(aws.Int64Value(ng.ScalingConfig.DesiredSize))
if *s.scope.MachinePool.Spec.Replicas != ngDesiredCapacity {
Expand Down

0 comments on commit c9b9276

Please sign in to comment.