Skip to content

Commit

Permalink
fix: Handle rollback to earlier version of Karpenter (#4830)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-innis authored Oct 13, 2023
1 parent 04c5306 commit f14db29
Show file tree
Hide file tree
Showing 9 changed files with 400 additions and 16 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ require (
github.com/PuerkitoBio/goquery v1.8.1
github.com/avast/retry-go v3.0.0+incompatible
github.com/aws/aws-sdk-go v1.45.24
github.com/aws/karpenter-core v0.31.1-0.20231012011159-9fa7077ca0e4
github.com/aws/karpenter-core v0.31.1-0.20231013203304-4239902b18b9
github.com/aws/karpenter/tools/kompat v0.0.0-20231010173459-62c25a3ea85c
github.com/imdario/mergo v0.3.16
github.com/mitchellh/hashstructure/v2 v2.0.2
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHS
github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
github.com/aws/aws-sdk-go v1.45.24 h1:TZx/CizkmCQn8Rtsb11iLYutEQVGK5PK9wAhwouELBo=
github.com/aws/aws-sdk-go v1.45.24/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI=
github.com/aws/karpenter-core v0.31.1-0.20231012011159-9fa7077ca0e4 h1:BO5WIUYUosCcb61AYQaIVm9SLe5mXI/+B4pCuFS1Cvc=
github.com/aws/karpenter-core v0.31.1-0.20231012011159-9fa7077ca0e4/go.mod h1:rb3kp/3cj38tACF6udfpmIvKoQMwirSVoHNlrd66LyE=
github.com/aws/karpenter-core v0.31.1-0.20231013203304-4239902b18b9 h1:j0iZuhoAKHrt0oqfSiKDqvHMnV/t45wi0loG1lEqdUw=
github.com/aws/karpenter-core v0.31.1-0.20231013203304-4239902b18b9/go.mod h1:rb3kp/3cj38tACF6udfpmIvKoQMwirSVoHNlrd66LyE=
github.com/aws/karpenter/tools/kompat v0.0.0-20231010173459-62c25a3ea85c h1:oXWwIttmjYLbBKhLazG21aQvpJ3NOOr8IXhCJ/p6e/M=
github.com/aws/karpenter/tools/kompat v0.0.0-20231010173459-62c25a3ea85c/go.mod h1:l/TIBsaCx/IrOr0Xvlj/cHLOf05QzuQKEZ1hx2XWmfU=
github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
Expand Down
14 changes: 7 additions & 7 deletions pkg/controllers/nodeclaim/garbagecollection/machine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ var _ = Describe("Machine/GarbageCollection", func() {
})

It("should delete an instance if there is no machine owner", func() {
// Launch time was 10m ago
// Launch time was 1m ago
instance.LaunchTime = aws.Time(time.Now().Add(-time.Minute))
awsEnv.EC2API.Instances.Store(aws.StringValue(instance.InstanceId), instance)

Expand All @@ -96,7 +96,7 @@ var _ = Describe("Machine/GarbageCollection", func() {
Expect(corecloudprovider.IsNodeClaimNotFoundError(err)).To(BeTrue())
})
It("should delete an instance along with the node if there is no machine owner (to quicken scheduling)", func() {
// Launch time was 10m ago
// Launch time was 1m ago
instance.LaunchTime = aws.Time(time.Now().Add(-time.Minute))
awsEnv.EC2API.Instances.Store(aws.StringValue(instance.InstanceId), instance)

Expand Down Expand Up @@ -195,7 +195,7 @@ var _ = Describe("Machine/GarbageCollection", func() {
Placement: &ec2.Placement{
AvailabilityZone: aws.String(fake.DefaultRegion),
},
// Launch time was 10m ago
// Launch time was 1m ago
LaunchTime: aws.Time(time.Now().Add(-time.Minute)),
InstanceId: aws.String(instanceID),
InstanceType: aws.String("m5.large"),
Expand Down Expand Up @@ -244,7 +244,7 @@ var _ = Describe("Machine/GarbageCollection", func() {
return aws.StringValue(t.Key) == v1alpha5.MachineManagedByAnnotationKey
})

// Launch time was 10m ago
// Launch time was 1m ago
instance.LaunchTime = aws.Time(time.Now().Add(-time.Minute))
awsEnv.EC2API.Instances.Store(aws.StringValue(instance.InstanceId), instance)

Expand All @@ -253,7 +253,7 @@ var _ = Describe("Machine/GarbageCollection", func() {
Expect(err).NotTo(HaveOccurred())
})
It("should not delete the instance or node if it already has a machine that matches it", func() {
// Launch time was 10m ago
// Launch time was 1m ago
instance.LaunchTime = aws.Time(time.Now().Add(-time.Minute))
awsEnv.EC2API.Instances.Store(aws.StringValue(instance.InstanceId), instance)

Expand Down Expand Up @@ -337,7 +337,7 @@ var _ = Describe("Machine/GarbageCollection", func() {
wg.Wait()
})
It("should not delete an instance if it is linked", func() {
// Launch time was 10m ago
// Launch time was 1m ago
instance.LaunchTime = aws.Time(time.Now().Add(-time.Minute))
awsEnv.EC2API.Instances.Store(aws.StringValue(instance.InstanceId), instance)

Expand All @@ -357,7 +357,7 @@ var _ = Describe("Machine/GarbageCollection", func() {
Expect(err).NotTo(HaveOccurred())
})
It("should not delete an instance if it is recently linked but the machine doesn't exist", func() {
// Launch time was 10m ago
// Launch time was 1m ago
instance.LaunchTime = aws.Time(time.Now().Add(-time.Minute))
awsEnv.EC2API.Instances.Store(aws.StringValue(instance.InstanceId), instance)

Expand Down
24 changes: 19 additions & 5 deletions pkg/controllers/nodeclaim/garbagecollection/nodeclaim_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ import (
corecloudprovider "github.com/aws/karpenter-core/pkg/cloudprovider"
coretest "github.com/aws/karpenter-core/pkg/test"
. "github.com/aws/karpenter-core/pkg/test/expectations"
nodeclaimutil "github.com/aws/karpenter-core/pkg/utils/nodeclaim"
nodepoolutil "github.com/aws/karpenter-core/pkg/utils/nodepool"
"github.com/aws/karpenter/pkg/apis/v1beta1"

"github.com/aws/karpenter/pkg/apis/settings"
Expand Down Expand Up @@ -91,7 +93,7 @@ var _ = Describe("NodeClaim/GarbageCollection", func() {
})

It("should delete an instance if there is no NodeClaim owner", func() {
// Launch time was 10m ago
// Launch time was 1m ago
instance.LaunchTime = aws.Time(time.Now().Add(-time.Minute))
awsEnv.EC2API.Instances.Store(aws.StringValue(instance.InstanceId), instance)

Expand All @@ -101,7 +103,7 @@ var _ = Describe("NodeClaim/GarbageCollection", func() {
Expect(corecloudprovider.IsNodeClaimNotFoundError(err)).To(BeTrue())
})
It("should delete an instance along with the node if there is no NodeClaim owner (to quicken scheduling)", func() {
// Launch time was 10m ago
// Launch time was 1m ago
instance.LaunchTime = aws.Time(time.Now().Add(-time.Minute))
awsEnv.EC2API.Instances.Store(aws.StringValue(instance.InstanceId), instance)

Expand Down Expand Up @@ -200,7 +202,7 @@ var _ = Describe("NodeClaim/GarbageCollection", func() {
Placement: &ec2.Placement{
AvailabilityZone: aws.String(fake.DefaultRegion),
},
// Launch time was 10m ago
// Launch time was 1m ago
LaunchTime: aws.Time(time.Now().Add(-time.Minute)),
InstanceId: aws.String(instanceID),
InstanceType: aws.String("m5.large"),
Expand Down Expand Up @@ -254,7 +256,7 @@ var _ = Describe("NodeClaim/GarbageCollection", func() {
return aws.StringValue(t.Key) == corev1beta1.ManagedByAnnotationKey
})

// Launch time was 10m ago
// Launch time was 1m ago
instance.LaunchTime = aws.Time(time.Now().Add(-time.Minute))
awsEnv.EC2API.Instances.Store(aws.StringValue(instance.InstanceId), instance)

Expand All @@ -263,7 +265,7 @@ var _ = Describe("NodeClaim/GarbageCollection", func() {
Expect(err).NotTo(HaveOccurred())
})
It("should not delete the instance or node if it already has a NodeClaim that matches it", func() {
// Launch time was 10m ago
// Launch time was 1m ago
instance.LaunchTime = aws.Time(time.Now().Add(-time.Minute))
awsEnv.EC2API.Instances.Store(aws.StringValue(instance.InstanceId), instance)

Expand Down Expand Up @@ -356,4 +358,16 @@ var _ = Describe("NodeClaim/GarbageCollection", func() {
}
wg.Wait()
})
It("should not delete an instance if EnableNodePools/EnableNodeClaims isn't enabled", func() {
nodepoolutil.EnableNodePools = false
nodeclaimutil.EnableNodeClaims = false

// Launch time was 1m ago
instance.LaunchTime = aws.Time(time.Now().Add(-time.Minute))
awsEnv.EC2API.Instances.Store(aws.StringValue(instance.InstanceId), instance)

ExpectReconcileSucceeded(ctx, garbageCollectionController, client.ObjectKey{})
_, err := cloudProvider.Get(ctx, providerID)
Expect(err).ToNot(HaveOccurred())
})
})
4 changes: 4 additions & 0 deletions pkg/controllers/nodeclaim/garbagecollection/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ import (
"github.com/aws/karpenter-core/pkg/operator/scheme"
coretest "github.com/aws/karpenter-core/pkg/test"
. "github.com/aws/karpenter-core/pkg/test/expectations"
nodeclaimutil "github.com/aws/karpenter-core/pkg/utils/nodeclaim"
nodepoolutil "github.com/aws/karpenter-core/pkg/utils/nodepool"
"github.com/aws/karpenter/pkg/apis"
"github.com/aws/karpenter/pkg/apis/settings"
"github.com/aws/karpenter/pkg/apis/v1beta1"
Expand Down Expand Up @@ -83,6 +85,8 @@ var _ = AfterSuite(func() {
})

var _ = BeforeEach(func() {
nodepoolutil.EnableNodePools = true
nodeclaimutil.EnableNodeClaims = true
awsEnv.Reset()
})

Expand Down
7 changes: 6 additions & 1 deletion pkg/providers/instance/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"knative.dev/pkg/logging"

corev1beta1 "github.com/aws/karpenter-core/pkg/apis/v1beta1"
nodepoolutil "github.com/aws/karpenter-core/pkg/utils/nodepool"
"github.com/aws/karpenter-core/pkg/utils/resources"
"github.com/aws/karpenter/pkg/apis/settings"
"github.com/aws/karpenter/pkg/apis/v1alpha1"
Expand Down Expand Up @@ -135,11 +136,15 @@ func (p *Provider) Get(ctx context.Context, id string) (*Instance, error) {

func (p *Provider) List(ctx context.Context) ([]*Instance, error) {
var out = &ec2.DescribeInstancesOutput{}
tagKeys := []string{v1alpha5.ProvisionerNameLabelKey}
if nodepoolutil.EnableNodePools {
tagKeys = append(tagKeys, corev1beta1.NodePoolLabelKey)
}
err := p.ec2api.DescribeInstancesPagesWithContext(ctx, &ec2.DescribeInstancesInput{
Filters: []*ec2.Filter{
{
Name: aws.String("tag-key"),
Values: aws.StringSlice([]string{v1alpha5.ProvisionerNameLabelKey, corev1beta1.NodePoolLabelKey}),
Values: aws.StringSlice(tagKeys),
},
{
Name: aws.String("tag-key"),
Expand Down
72 changes: 72 additions & 0 deletions pkg/providers/instance/nodeclass_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,26 @@ limitations under the License.
package instance_test

import (
"fmt"
"time"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/ec2"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/samber/lo"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"

"github.com/aws/karpenter-core/pkg/apis/v1alpha5"
corev1beta1 "github.com/aws/karpenter-core/pkg/apis/v1beta1"
corecloudprovider "github.com/aws/karpenter-core/pkg/cloudprovider"
coretest "github.com/aws/karpenter-core/pkg/test"
. "github.com/aws/karpenter-core/pkg/test/expectations"
"github.com/aws/karpenter/pkg/apis/settings"
"github.com/aws/karpenter/pkg/apis/v1beta1"
"github.com/aws/karpenter/pkg/fake"
"github.com/aws/karpenter/pkg/providers/instance"
"github.com/aws/karpenter/pkg/test"
)

Expand Down Expand Up @@ -79,4 +87,68 @@ var _ = Describe("NodeClass/InstanceProvider", func() {
Expect(corecloudprovider.IsInsufficientCapacityError(err)).To(BeTrue())
Expect(instance).To(BeNil())
})
It("should return all NodePool-owned instances from List", func() {
ids := sets.New[string]()
// Provision instances that have the karpenter.sh/nodepool key
for i := 0; i < 20; i++ {
instanceID := fake.InstanceID()
awsEnv.EC2API.Instances.Store(
instanceID,
&ec2.Instance{
State: &ec2.InstanceState{
Name: aws.String(ec2.InstanceStateNameRunning),
},
Tags: []*ec2.Tag{
{
Key: aws.String(fmt.Sprintf("kubernetes.io/cluster/%s", settings.FromContext(ctx).ClusterName)),
Value: aws.String("owned"),
},
{
Key: aws.String(corev1beta1.NodePoolLabelKey),
Value: aws.String("default"),
},
{
Key: aws.String(corev1beta1.ManagedByAnnotationKey),
Value: aws.String(settings.FromContext(ctx).ClusterName),
},
},
PrivateDnsName: aws.String(fake.PrivateDNSName()),
Placement: &ec2.Placement{
AvailabilityZone: aws.String(fake.DefaultRegion),
},
// Launch time was 1m ago
LaunchTime: aws.Time(time.Now().Add(-time.Minute)),
InstanceId: aws.String(instanceID),
InstanceType: aws.String("m5.large"),
},
)
ids.Insert(instanceID)
}
// Provision instances that do not have this tag key
for i := 0; i < 20; i++ {
instanceID := fake.InstanceID()
awsEnv.EC2API.Instances.Store(
instanceID,
&ec2.Instance{
State: &ec2.InstanceState{
Name: aws.String(ec2.InstanceStateNameRunning),
},
PrivateDnsName: aws.String(fake.PrivateDNSName()),
Placement: &ec2.Placement{
AvailabilityZone: aws.String(fake.DefaultRegion),
},
// Launch time was 1m ago
LaunchTime: aws.Time(time.Now().Add(-time.Minute)),
InstanceId: aws.String(instanceID),
InstanceType: aws.String("m5.large"),
},
)
}
instances, err := awsEnv.InstanceProvider.List(ctx)
Expect(err).To(BeNil())
Expect(instances).To(HaveLen(20))

retrievedIDs := sets.New[string](lo.Map(instances, func(i *instance.Instance, _ int) string { return i.ID })...)
Expect(ids.Equal(retrievedIDs)).To(BeTrue())
})
})
71 changes: 71 additions & 0 deletions pkg/providers/instance/nodetemplate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,28 @@ limitations under the License.
package instance_test

import (
"fmt"
"time"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/ec2"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/samber/lo"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"

"github.com/aws/karpenter-core/pkg/apis/v1alpha5"
corecloudprovider "github.com/aws/karpenter-core/pkg/cloudprovider"
coretest "github.com/aws/karpenter-core/pkg/test"
. "github.com/aws/karpenter-core/pkg/test/expectations"
nodeclaimutil "github.com/aws/karpenter-core/pkg/utils/nodeclaim"
nodepoolutil "github.com/aws/karpenter-core/pkg/utils/nodepool"
"github.com/aws/karpenter/pkg/apis/settings"
"github.com/aws/karpenter/pkg/apis/v1alpha1"
"github.com/aws/karpenter/pkg/fake"
"github.com/aws/karpenter/pkg/providers/instance"
"github.com/aws/karpenter/pkg/test"
nodeclassutil "github.com/aws/karpenter/pkg/utils/nodeclass"
)
Expand Down Expand Up @@ -94,4 +101,68 @@ var _ = Describe("NodeTemplate/InstanceProvider", func() {
Expect(corecloudprovider.IsInsufficientCapacityError(err)).To(BeTrue())
Expect(instance).To(BeNil())
})
It("should return all Provisioner-owned instances from List", func() {
ids := sets.New[string]()
// Provision instances that have the karpenter.sh/provisioner-name key
for i := 0; i < 20; i++ {
instanceID := fake.InstanceID()
awsEnv.EC2API.Instances.Store(
instanceID,
&ec2.Instance{
State: &ec2.InstanceState{
Name: aws.String(ec2.InstanceStateNameRunning),
},
Tags: []*ec2.Tag{
{
Key: aws.String(fmt.Sprintf("kubernetes.io/cluster/%s", settings.FromContext(ctx).ClusterName)),
Value: aws.String("owned"),
},
{
Key: aws.String(v1alpha5.ProvisionerNameLabelKey),
Value: aws.String("default"),
},
{
Key: aws.String(v1alpha5.MachineManagedByAnnotationKey),
Value: aws.String(settings.FromContext(ctx).ClusterName),
},
},
PrivateDnsName: aws.String(fake.PrivateDNSName()),
Placement: &ec2.Placement{
AvailabilityZone: aws.String(fake.DefaultRegion),
},
// Launch time was 1m ago
LaunchTime: aws.Time(time.Now().Add(-time.Minute)),
InstanceId: aws.String(instanceID),
InstanceType: aws.String("m5.large"),
},
)
ids.Insert(instanceID)
}
// Provision instances that do not have this tag key
for i := 0; i < 20; i++ {
instanceID := fake.InstanceID()
awsEnv.EC2API.Instances.Store(
instanceID,
&ec2.Instance{
State: &ec2.InstanceState{
Name: aws.String(ec2.InstanceStateNameRunning),
},
PrivateDnsName: aws.String(fake.PrivateDNSName()),
Placement: &ec2.Placement{
AvailabilityZone: aws.String(fake.DefaultRegion),
},
// Launch time was 1m ago
LaunchTime: aws.Time(time.Now().Add(-time.Minute)),
InstanceId: aws.String(instanceID),
InstanceType: aws.String("m5.large"),
},
)
}
instances, err := awsEnv.InstanceProvider.List(ctx)
Expect(err).To(BeNil())
Expect(instances).To(HaveLen(20))

retrievedIDs := sets.New[string](lo.Map(instances, func(i *instance.Instance, _ int) string { return i.ID })...)
Expect(ids.Equal(retrievedIDs)).To(BeTrue())
})
})
Loading

0 comments on commit f14db29

Please sign in to comment.