From 5ba219096c22a0428df7477e4c4737007129db98 Mon Sep 17 00:00:00 2001 From: Ellis Tarn Date: Tue, 31 Jan 2023 14:39:57 -0800 Subject: [PATCH] test: Added cluster.Consolidated() checks to test (#190) --- pkg/controllers/deprovisioning/drift_test.go | 360 ++++++++++ .../deprovisioning/expiration_test.go | 379 ++++++++++ pkg/controllers/deprovisioning/suite_test.go | 674 +----------------- 3 files changed, 743 insertions(+), 670 deletions(-) create mode 100644 pkg/controllers/deprovisioning/drift_test.go create mode 100644 pkg/controllers/deprovisioning/expiration_test.go diff --git a/pkg/controllers/deprovisioning/drift_test.go b/pkg/controllers/deprovisioning/drift_test.go new file mode 100644 index 0000000000..47e33c8441 --- /dev/null +++ b/pkg/controllers/deprovisioning/drift_test.go @@ -0,0 +1,360 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deprovisioning_test + +import ( + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "knative.dev/pkg/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/aws/karpenter-core/pkg/apis/settings" + "github.com/aws/karpenter-core/pkg/apis/v1alpha5" + "github.com/aws/karpenter-core/pkg/cloudprovider" + "github.com/aws/karpenter-core/pkg/cloudprovider/fake" + "github.com/aws/karpenter-core/pkg/test" + . "github.com/aws/karpenter-core/pkg/test/expectations" +) + +var _ = Describe("Drift", func() { + It("should ignore drifted nodes if the feature flag is disabled", func() { + ctx = settings.ToContext(ctx, test.Settings(test.SettingsOptions{DriftEnabled: false})) + prov := test.Provisioner() + node := test.Node(test.NodeOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: prov.Name, + v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, + v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, + v1.LabelTopologyZone: mostExpensiveOffering.Zone, + }, + Annotations: map[string]string{ + v1alpha5.VoluntaryDisruptionAnnotationKey: v1alpha5.VoluntaryDisruptionDriftedAnnotationValue, + }}, + Allocatable: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourcePods: resource.MustParse("100"), + }}, + ) + + ExpectApplied(ctx, env.Client, node, prov) + ExpectMakeNodesReady(ctx, env.Client, node) + + // inform cluster state about the nodes + ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) + fakeClock.Step(10 * time.Minute) + go triggerVerifyAction() + _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) + Expect(err).ToNot(HaveOccurred()) + + Expect(cloudProvider.CreateCalls).To(HaveLen(0)) + ExpectExists(ctx, env.Client, node) + }) + It("should ignore nodes with the drift label, but not the drifted value", func() { + prov := test.Provisioner() + node := test.Node(test.NodeOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: prov.Name, + v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, + v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, + v1.LabelTopologyZone: mostExpensiveOffering.Zone, + }, + Annotations: map[string]string{ + v1alpha5.VoluntaryDisruptionAnnotationKey: "wrong-value", + }}, + Allocatable: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourcePods: resource.MustParse("100"), + }}, + ) + + ExpectApplied(ctx, env.Client, node, prov) + ExpectMakeNodesReady(ctx, env.Client, node) + + // inform cluster state about the nodes + ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) + fakeClock.Step(10 * time.Minute) + go triggerVerifyAction() + _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) + Expect(err).ToNot(HaveOccurred()) + + Expect(cloudProvider.CreateCalls).To(HaveLen(0)) + ExpectExists(ctx, env.Client, node) + }) + It("should ignore nodes without the drift label", func() { + prov := test.Provisioner() + node := test.Node(test.NodeOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: prov.Name, + v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, + v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, + v1.LabelTopologyZone: mostExpensiveOffering.Zone, + }}, + Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("32")}, + }) + + ExpectApplied(ctx, env.Client, node, prov) + ExpectMakeNodesReady(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) + Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(node), node)).To(Succeed()) + + // inform cluster state about the nodes + ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) + fakeClock.Step(10 * time.Minute) + _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) + Expect(err).ToNot(HaveOccurred()) + + // we don't need a new node + Expect(cloudProvider.CreateCalls).To(HaveLen(0)) + // and can't delete the node since node is not drifted + ExpectNodeExists(ctx, env.Client, node.Name) + }) + It("can delete drifted nodes", func() { + prov := test.Provisioner() + node := test.Node(test.NodeOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: prov.Name, + v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, + v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, + v1.LabelTopologyZone: mostExpensiveOffering.Zone, + }, + Annotations: map[string]string{ + v1alpha5.VoluntaryDisruptionAnnotationKey: v1alpha5.VoluntaryDisruptionDriftedAnnotationValue, + }}, + Allocatable: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourcePods: resource.MustParse("100"), + }}, + ) + + ExpectApplied(ctx, env.Client, node, prov) + ExpectMakeNodesReady(ctx, env.Client, node) + + // inform cluster state about the nodes + ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) + fakeClock.Step(10 * time.Minute) + go triggerVerifyAction() + _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) + Expect(err).ToNot(HaveOccurred()) + + // we don't need a new node, but we should evict everything off one of node2 which only has a single pod + Expect(cloudProvider.CreateCalls).To(HaveLen(0)) + // and delete the old one + ExpectNotFound(ctx, env.Client, node) + }) + It("can replace drifted nodes", func() { + labels := map[string]string{ + "app": "test", + } + // create our RS so we can link a pod to it + rs := test.ReplicaSet() + ExpectApplied(ctx, env.Client, rs) + Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(rs), rs)).To(Succeed()) + + pod := test.Pod(test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: labels, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "apps/v1", + Kind: "ReplicaSet", + Name: rs.Name, + UID: rs.UID, + Controller: ptr.Bool(true), + BlockOwnerDeletion: ptr.Bool(true), + }, + }}}) + + prov := test.Provisioner() + node := test.Node(test.NodeOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: prov.Name, + v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, + v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, + v1.LabelTopologyZone: mostExpensiveOffering.Zone, + }, + Annotations: map[string]string{ + v1alpha5.VoluntaryDisruptionAnnotationKey: v1alpha5.VoluntaryDisruptionDriftedAnnotationValue, + }}, + Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("32")}, + }) + ExpectApplied(ctx, env.Client, rs, pod, node, prov) + ExpectMakeNodesReady(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) + ExpectManualBinding(ctx, env.Client, pod, node) + ExpectScheduled(ctx, env.Client, pod) + Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(node), node)).To(Succeed()) + + // deprovisioning won't delete the old node until the new node is ready + wg := ExpectMakeNewNodesReady(ctx, env.Client, 1, node) + fakeClock.Step(10 * time.Minute) + go triggerVerifyAction() + _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) + Expect(err).ToNot(HaveOccurred()) + wg.Wait() + + Expect(cloudProvider.CreateCalls).To(HaveLen(1)) + + ExpectNotFound(ctx, env.Client, node) + }) + It("can replace drifted nodes with multiple nodes", func() { + currentInstance := fake.NewInstanceType(fake.InstanceTypeOptions{ + Name: "current-on-demand", + Offerings: []cloudprovider.Offering{ + { + CapacityType: v1alpha5.CapacityTypeOnDemand, + Zone: "test-zone-1a", + Price: 0.5, + Available: false, + }, + }, + }) + replacementInstance := fake.NewInstanceType(fake.InstanceTypeOptions{ + Name: "replacement-on-demand", + Offerings: []cloudprovider.Offering{ + { + CapacityType: v1alpha5.CapacityTypeOnDemand, + Zone: "test-zone-1a", + Price: 0.3, + Available: true, + }, + }, + Resources: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("3")}, + }) + cloudProvider.InstanceTypes = []*cloudprovider.InstanceType{ + currentInstance, + replacementInstance, + } + + labels := map[string]string{ + "app": "test", + } + // create our RS so we can link a pod to it + rs := test.ReplicaSet() + ExpectApplied(ctx, env.Client, rs) + Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(rs), rs)).To(Succeed()) + + pods := test.Pods(3, test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: labels, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "apps/v1", + Kind: "ReplicaSet", + Name: rs.Name, + UID: rs.UID, + Controller: ptr.Bool(true), + BlockOwnerDeletion: ptr.Bool(true), + }, + }}, + // Make each pod request about a third of the allocatable on the node + ResourceRequirements: v1.ResourceRequirements{ + Requests: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("2")}, + }, + }) + + prov := test.Provisioner() + node := test.Node(test.NodeOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: prov.Name, + v1.LabelInstanceTypeStable: currentInstance.Name, + v1alpha5.LabelCapacityType: currentInstance.Offerings[0].CapacityType, + v1.LabelTopologyZone: currentInstance.Offerings[0].Zone, + }, + Annotations: map[string]string{ + v1alpha5.VoluntaryDisruptionAnnotationKey: v1alpha5.VoluntaryDisruptionDriftedAnnotationValue, + }}, + Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("8")}, + }) + ExpectApplied(ctx, env.Client, rs, node, prov, pods[0], pods[1], pods[2]) + ExpectMakeNodesReady(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) + ExpectManualBinding(ctx, env.Client, pods[0], node) + ExpectManualBinding(ctx, env.Client, pods[1], node) + ExpectManualBinding(ctx, env.Client, pods[2], node) + ExpectScheduled(ctx, env.Client, pods[0]) + ExpectScheduled(ctx, env.Client, pods[1]) + ExpectScheduled(ctx, env.Client, pods[2]) + Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(node), node)).To(Succeed()) + + // deprovisioning won't delete the old node until the new node is ready + wg := ExpectMakeNewNodesReady(ctx, env.Client, 3, node) + fakeClock.Step(10 * time.Minute) + go triggerVerifyAction() + _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) + Expect(err).ToNot(HaveOccurred()) + wg.Wait() + + Expect(cloudProvider.CreateCalls).To(HaveLen(3)) + + ExpectNotFound(ctx, env.Client, node) + }) + It("should delete one drifted node at a time", func() { + prov := test.Provisioner() + node1 := test.Node(test.NodeOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: prov.Name, + v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, + v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, + v1.LabelTopologyZone: mostExpensiveOffering.Zone, + }, + Annotations: map[string]string{ + v1alpha5.VoluntaryDisruptionAnnotationKey: v1alpha5.VoluntaryDisruptionDriftedAnnotationValue, + }}, + Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("32")}, + }) + node2 := test.Node(test.NodeOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: prov.Name, + v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, + v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, + v1.LabelTopologyZone: mostExpensiveOffering.Zone, + }, + Annotations: map[string]string{ + v1alpha5.VoluntaryDisruptionAnnotationKey: v1alpha5.VoluntaryDisruptionDriftedAnnotationValue, + }}, + Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("32")}, + }) + ExpectApplied(ctx, env.Client, node1, prov, node2) + ExpectMakeNodesReady(ctx, env.Client, node1, node2) + + // inform cluster state about the nodes + ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node1)) + ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node2)) + fakeClock.Step(10 * time.Minute) + go triggerVerifyAction() + _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) + Expect(err).ToNot(HaveOccurred()) + + // we don't need a new node, but we should evict everything off one of node2 which only has a single pod + Expect(cloudProvider.CreateCalls).To(HaveLen(0)) + + // Expect one of the nodes to be deleted + nodes := &v1.NodeList{} + Expect(env.Client.List(ctx, nodes)).To(Succeed()) + Expect(len(nodes.Items)).To(Equal(1)) + }) +}) diff --git a/pkg/controllers/deprovisioning/expiration_test.go b/pkg/controllers/deprovisioning/expiration_test.go new file mode 100644 index 0000000000..ec497dd5be --- /dev/null +++ b/pkg/controllers/deprovisioning/expiration_test.go @@ -0,0 +1,379 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deprovisioning_test + +import ( + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "knative.dev/pkg/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/aws/karpenter-core/pkg/apis/v1alpha5" + "github.com/aws/karpenter-core/pkg/cloudprovider" + "github.com/aws/karpenter-core/pkg/cloudprovider/fake" + "github.com/aws/karpenter-core/pkg/test" + . "github.com/aws/karpenter-core/pkg/test/expectations" +) + +var _ = Describe("Expiration", func() { + It("should ignore nodes without TTLSecondsUntilExpired", func() { + prov := test.Provisioner() + node := test.Node(test.NodeOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: prov.Name, + v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, + v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, + v1.LabelTopologyZone: mostExpensiveOffering.Zone, + }}, + Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("32")}, + }) + + ExpectApplied(ctx, env.Client, node, prov) + ExpectMakeNodesReady(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) + Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(node), node)).To(Succeed()) + + // inform cluster state about the nodes + ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) + fakeClock.Step(10 * time.Minute) + _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) + Expect(err).ToNot(HaveOccurred()) + + // we don't need a new node + Expect(cloudProvider.CreateCalls).To(HaveLen(0)) + // and can't delete the node since expiry is not enabled + ExpectNodeExists(ctx, env.Client, node.Name) + }) + It("can delete expired nodes", func() { + prov := test.Provisioner(test.ProvisionerOptions{ + TTLSecondsUntilExpired: ptr.Int64(60), + }) + node := test.Node(test.NodeOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: prov.Name, + v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, + v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, + v1.LabelTopologyZone: mostExpensiveOffering.Zone, + }}, + Allocatable: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("32"), + v1.ResourcePods: resource.MustParse("100"), + }}, + ) + + ExpectApplied(ctx, env.Client, node, prov) + ExpectMakeNodesReady(ctx, env.Client, node) + + // inform cluster state about the nodes + ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) + fakeClock.Step(10 * time.Minute) + go triggerVerifyAction() + _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) + Expect(err).ToNot(HaveOccurred()) + + // we don't need a new node, but we should evict everything off one of node2 which only has a single pod + Expect(cloudProvider.CreateCalls).To(HaveLen(0)) + // and delete the old one + ExpectNotFound(ctx, env.Client, node) + }) + It("should expire one node at a time, starting with most expired", func() { + expireProv := test.Provisioner(test.ProvisionerOptions{ + TTLSecondsUntilExpired: ptr.Int64(100), + }) + nodeToExpire := test.Node(test.NodeOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: expireProv.Name, + v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, + v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, + v1.LabelTopologyZone: mostExpensiveOffering.Zone, + }}, + Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("32")}, + }) + prov := test.Provisioner(test.ProvisionerOptions{ + TTLSecondsUntilExpired: ptr.Int64(500), + }) + nodeNotExpire := test.Node(test.NodeOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: prov.Name, + v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, + v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, + v1.LabelTopologyZone: mostExpensiveOffering.Zone, + }}, + Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("32")}, + }) + + ExpectApplied(ctx, env.Client, nodeToExpire, expireProv, nodeNotExpire, prov) + ExpectMakeNodesReady(ctx, env.Client, nodeToExpire, nodeNotExpire) + + // inform cluster state about the nodes + ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(nodeToExpire)) + ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(nodeNotExpire)) + fakeClock.Step(10 * time.Minute) + go triggerVerifyAction() + _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) + Expect(err).ToNot(HaveOccurred()) + + // we don't need a new node, but we should evict everything off one of node2 which only has a single pod + Expect(cloudProvider.CreateCalls).To(HaveLen(0)) + // and delete the old one + ExpectNotFound(ctx, env.Client, nodeToExpire) + }) + It("can replace node for expiration", func() { + labels := map[string]string{ + "app": "test", + } + // create our RS so we can link a pod to it + rs := test.ReplicaSet() + ExpectApplied(ctx, env.Client, rs) + Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(rs), rs)).To(Succeed()) + + pod := test.Pod(test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: labels, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "apps/v1", + Kind: "ReplicaSet", + Name: rs.Name, + UID: rs.UID, + Controller: ptr.Bool(true), + BlockOwnerDeletion: ptr.Bool(true), + }, + }}}) + + prov := test.Provisioner(test.ProvisionerOptions{ + TTLSecondsUntilExpired: ptr.Int64(30), + }) + node := test.Node(test.NodeOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: prov.Name, + v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, + v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, + v1.LabelTopologyZone: mostExpensiveOffering.Zone, + }}, + Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("32")}, + }) + ExpectApplied(ctx, env.Client, rs, pod, node, prov) + ExpectMakeNodesReady(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) + ExpectManualBinding(ctx, env.Client, pod, node) + ExpectScheduled(ctx, env.Client, pod) + Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(node), node)).To(Succeed()) + + // deprovisioning won't delete the old node until the new node is ready + wg := ExpectMakeNewNodesReady(ctx, env.Client, 1, node) + fakeClock.Step(10 * time.Minute) + go triggerVerifyAction() + _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) + Expect(err).ToNot(HaveOccurred()) + wg.Wait() + + Expect(cloudProvider.CreateCalls).To(HaveLen(1)) + + ExpectNotFound(ctx, env.Client, node) + }) + It("should uncordon nodes when expiration replacement partially fails", func() { + currentInstance := fake.NewInstanceType(fake.InstanceTypeOptions{ + Name: "current-on-demand", + Offerings: []cloudprovider.Offering{ + { + CapacityType: v1alpha5.CapacityTypeOnDemand, + Zone: "test-zone-1a", + Price: 0.5, + Available: false, + }, + }, + }) + replacementInstance := fake.NewInstanceType(fake.InstanceTypeOptions{ + Name: "replacement-on-demand", + Offerings: []cloudprovider.Offering{ + { + CapacityType: v1alpha5.CapacityTypeOnDemand, + Zone: "test-zone-1a", + Price: 0.3, + Available: true, + }, + }, + Resources: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("3")}, + }) + cloudProvider.InstanceTypes = []*cloudprovider.InstanceType{ + currentInstance, + replacementInstance, + } + cloudProvider.AllowedCreateCalls = 2 + + labels := map[string]string{ + "app": "test", + } + // create our RS so we can link a pod to it + rs := test.ReplicaSet() + ExpectApplied(ctx, env.Client, rs) + Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(rs), rs)).To(Succeed()) + + pods := test.Pods(3, test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: labels, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "apps/v1", + Kind: "ReplicaSet", + Name: rs.Name, + UID: rs.UID, + Controller: ptr.Bool(true), + BlockOwnerDeletion: ptr.Bool(true), + }, + }}, + // Make each pod request about a third of the allocatable on the node + ResourceRequirements: v1.ResourceRequirements{ + Requests: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("2")}, + }, + }) + + prov := test.Provisioner(test.ProvisionerOptions{ + TTLSecondsUntilExpired: ptr.Int64(30), + }) + node := test.Node(test.NodeOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: prov.Name, + v1.LabelInstanceTypeStable: currentInstance.Name, + v1alpha5.LabelCapacityType: currentInstance.Offerings[0].CapacityType, + v1.LabelTopologyZone: currentInstance.Offerings[0].Zone, + }}, + Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("7")}, + }) + ExpectApplied(ctx, env.Client, rs, node, prov, pods[0], pods[1], pods[2]) + ExpectMakeNodesReady(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) + ExpectManualBinding(ctx, env.Client, pods[0], node) + ExpectManualBinding(ctx, env.Client, pods[1], node) + ExpectManualBinding(ctx, env.Client, pods[2], node) + ExpectScheduled(ctx, env.Client, pods[0]) + ExpectScheduled(ctx, env.Client, pods[1]) + ExpectScheduled(ctx, env.Client, pods[2]) + Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(node), node)).To(Succeed()) + + fakeClock.Step(10 * time.Minute) + go triggerVerifyAction() + _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) + Expect(err).To(HaveOccurred()) + + // Expiration should try to make 3 calls but fail for the third. + Expect(cloudProvider.CreateCalls).To(HaveLen(3)) + + node = ExpectNodeExists(ctx, env.Client, node.Name) + Expect(node.Spec.Unschedulable).To(BeFalse()) + }) + It("can replace node for expiration with multiple nodes", func() { + currentInstance := fake.NewInstanceType(fake.InstanceTypeOptions{ + Name: "current-on-demand", + Offerings: []cloudprovider.Offering{ + { + CapacityType: v1alpha5.CapacityTypeOnDemand, + Zone: "test-zone-1a", + Price: 0.5, + Available: false, + }, + }, + }) + replacementInstance := fake.NewInstanceType(fake.InstanceTypeOptions{ + Name: "replacement-on-demand", + Offerings: []cloudprovider.Offering{ + { + CapacityType: v1alpha5.CapacityTypeOnDemand, + Zone: "test-zone-1a", + Price: 0.3, + Available: true, + }, + }, + Resources: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("3")}, + }) + cloudProvider.InstanceTypes = []*cloudprovider.InstanceType{ + currentInstance, + replacementInstance, + } + + labels := map[string]string{ + "app": "test", + } + // create our RS so we can link a pod to it + rs := test.ReplicaSet() + ExpectApplied(ctx, env.Client, rs) + Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(rs), rs)).To(Succeed()) + + pods := test.Pods(3, test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: labels, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "apps/v1", + Kind: "ReplicaSet", + Name: rs.Name, + UID: rs.UID, + Controller: ptr.Bool(true), + BlockOwnerDeletion: ptr.Bool(true), + }, + }}, + // Make each pod request about a third of the allocatable on the node + ResourceRequirements: v1.ResourceRequirements{ + Requests: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("2")}, + }, + }) + + prov := test.Provisioner(test.ProvisionerOptions{ + TTLSecondsUntilExpired: ptr.Int64(200), + }) + node := test.Node(test.NodeOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: prov.Name, + v1.LabelInstanceTypeStable: currentInstance.Name, + v1alpha5.LabelCapacityType: currentInstance.Offerings[0].CapacityType, + v1.LabelTopologyZone: currentInstance.Offerings[0].Zone, + }}, + Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("8")}, + }) + ExpectApplied(ctx, env.Client, rs, node, prov, pods[0], pods[1], pods[2]) + ExpectMakeNodesReady(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) + ExpectManualBinding(ctx, env.Client, pods[0], node) + ExpectManualBinding(ctx, env.Client, pods[1], node) + ExpectManualBinding(ctx, env.Client, pods[2], node) + ExpectScheduled(ctx, env.Client, pods[0]) + ExpectScheduled(ctx, env.Client, pods[1]) + ExpectScheduled(ctx, env.Client, pods[2]) + Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(node), node)).To(Succeed()) + + // deprovisioning won't delete the old node until the new node is ready + wg := ExpectMakeNewNodesReady(ctx, env.Client, 3, node) + fakeClock.Step(10 * time.Minute) + go triggerVerifyAction() + _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) + Expect(err).ToNot(HaveOccurred()) + wg.Wait() + + Expect(cloudProvider.CreateCalls).To(HaveLen(3)) + + ExpectNotFound(ctx, env.Client, node) + }) +}) diff --git a/pkg/controllers/deprovisioning/suite_test.go b/pkg/controllers/deprovisioning/suite_test.go index 5bfdef477f..7d12e30c1d 100644 --- a/pkg/controllers/deprovisioning/suite_test.go +++ b/pkg/controllers/deprovisioning/suite_test.go @@ -146,676 +146,6 @@ var _ = AfterEach(func() { } }) -var _ = Describe("Drift", func() { - It("should ignore drifted nodes if the feature flag is disabled", func() { - ctx = settings.ToContext(ctx, test.Settings(test.SettingsOptions{DriftEnabled: false})) - prov := test.Provisioner() - node := test.Node(test.NodeOptions{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1alpha5.ProvisionerNameLabelKey: prov.Name, - v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, - v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, - v1.LabelTopologyZone: mostExpensiveOffering.Zone, - }, - Annotations: map[string]string{ - v1alpha5.VoluntaryDisruptionAnnotationKey: v1alpha5.VoluntaryDisruptionDriftedAnnotationValue, - }}, - Allocatable: map[v1.ResourceName]resource.Quantity{ - v1.ResourceCPU: resource.MustParse("32"), - v1.ResourcePods: resource.MustParse("100"), - }}, - ) - - ExpectApplied(ctx, env.Client, node, prov) - ExpectMakeNodesReady(ctx, env.Client, node) - - // inform cluster state about the nodes - ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) - fakeClock.Step(10 * time.Minute) - go triggerVerifyAction() - _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) - Expect(err).ToNot(HaveOccurred()) - - Expect(cloudProvider.CreateCalls).To(HaveLen(0)) - ExpectExists(ctx, env.Client, node) - }) - It("should ignore nodes with the drift label, but not the drifted value", func() { - prov := test.Provisioner() - node := test.Node(test.NodeOptions{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1alpha5.ProvisionerNameLabelKey: prov.Name, - v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, - v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, - v1.LabelTopologyZone: mostExpensiveOffering.Zone, - }, - Annotations: map[string]string{ - v1alpha5.VoluntaryDisruptionAnnotationKey: "wrong-value", - }}, - Allocatable: map[v1.ResourceName]resource.Quantity{ - v1.ResourceCPU: resource.MustParse("32"), - v1.ResourcePods: resource.MustParse("100"), - }}, - ) - - ExpectApplied(ctx, env.Client, node, prov) - ExpectMakeNodesReady(ctx, env.Client, node) - - // inform cluster state about the nodes - ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) - fakeClock.Step(10 * time.Minute) - go triggerVerifyAction() - _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) - Expect(err).ToNot(HaveOccurred()) - - Expect(cloudProvider.CreateCalls).To(HaveLen(0)) - ExpectExists(ctx, env.Client, node) - }) - It("should ignore nodes without the drift label", func() { - prov := test.Provisioner() - node := test.Node(test.NodeOptions{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1alpha5.ProvisionerNameLabelKey: prov.Name, - v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, - v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, - v1.LabelTopologyZone: mostExpensiveOffering.Zone, - }}, - Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("32")}, - }) - - ExpectApplied(ctx, env.Client, node, prov) - ExpectMakeNodesReady(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) - Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(node), node)).To(Succeed()) - - // inform cluster state about the nodes - ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) - fakeClock.Step(10 * time.Minute) - _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) - Expect(err).ToNot(HaveOccurred()) - - // we don't need a new node - Expect(cloudProvider.CreateCalls).To(HaveLen(0)) - // and can't delete the node since node is not drifted - ExpectNodeExists(ctx, env.Client, node.Name) - }) - It("can delete drifted nodes", func() { - prov := test.Provisioner() - node := test.Node(test.NodeOptions{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1alpha5.ProvisionerNameLabelKey: prov.Name, - v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, - v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, - v1.LabelTopologyZone: mostExpensiveOffering.Zone, - }, - Annotations: map[string]string{ - v1alpha5.VoluntaryDisruptionAnnotationKey: v1alpha5.VoluntaryDisruptionDriftedAnnotationValue, - }}, - Allocatable: map[v1.ResourceName]resource.Quantity{ - v1.ResourceCPU: resource.MustParse("32"), - v1.ResourcePods: resource.MustParse("100"), - }}, - ) - - ExpectApplied(ctx, env.Client, node, prov) - ExpectMakeNodesReady(ctx, env.Client, node) - - // inform cluster state about the nodes - ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) - fakeClock.Step(10 * time.Minute) - go triggerVerifyAction() - _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) - Expect(err).ToNot(HaveOccurred()) - - // we don't need a new node, but we should evict everything off one of node2 which only has a single pod - Expect(cloudProvider.CreateCalls).To(HaveLen(0)) - // and delete the old one - ExpectNotFound(ctx, env.Client, node) - }) - It("can replace drifted nodes", func() { - labels := map[string]string{ - "app": "test", - } - // create our RS so we can link a pod to it - rs := test.ReplicaSet() - ExpectApplied(ctx, env.Client, rs) - Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(rs), rs)).To(Succeed()) - - pod := test.Pod(test.PodOptions{ - ObjectMeta: metav1.ObjectMeta{Labels: labels, - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: "apps/v1", - Kind: "ReplicaSet", - Name: rs.Name, - UID: rs.UID, - Controller: ptr.Bool(true), - BlockOwnerDeletion: ptr.Bool(true), - }, - }}}) - - prov := test.Provisioner() - node := test.Node(test.NodeOptions{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1alpha5.ProvisionerNameLabelKey: prov.Name, - v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, - v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, - v1.LabelTopologyZone: mostExpensiveOffering.Zone, - }, - Annotations: map[string]string{ - v1alpha5.VoluntaryDisruptionAnnotationKey: v1alpha5.VoluntaryDisruptionDriftedAnnotationValue, - }}, - Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("32")}, - }) - ExpectApplied(ctx, env.Client, rs, pod, node, prov) - ExpectMakeNodesReady(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) - ExpectManualBinding(ctx, env.Client, pod, node) - ExpectScheduled(ctx, env.Client, pod) - Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(node), node)).To(Succeed()) - - // deprovisioning won't delete the old node until the new node is ready - wg := ExpectMakeNewNodesReady(ctx, env.Client, 1, node) - fakeClock.Step(10 * time.Minute) - go triggerVerifyAction() - _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) - Expect(err).ToNot(HaveOccurred()) - wg.Wait() - - Expect(cloudProvider.CreateCalls).To(HaveLen(1)) - - ExpectNotFound(ctx, env.Client, node) - }) - It("can replace drifted nodes with multiple nodes", func() { - currentInstance := fake.NewInstanceType(fake.InstanceTypeOptions{ - Name: "current-on-demand", - Offerings: []cloudprovider.Offering{ - { - CapacityType: v1alpha5.CapacityTypeOnDemand, - Zone: "test-zone-1a", - Price: 0.5, - Available: false, - }, - }, - }) - replacementInstance := fake.NewInstanceType(fake.InstanceTypeOptions{ - Name: "replacement-on-demand", - Offerings: []cloudprovider.Offering{ - { - CapacityType: v1alpha5.CapacityTypeOnDemand, - Zone: "test-zone-1a", - Price: 0.3, - Available: true, - }, - }, - Resources: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("3")}, - }) - cloudProvider.InstanceTypes = []*cloudprovider.InstanceType{ - currentInstance, - replacementInstance, - } - - labels := map[string]string{ - "app": "test", - } - // create our RS so we can link a pod to it - rs := test.ReplicaSet() - ExpectApplied(ctx, env.Client, rs) - Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(rs), rs)).To(Succeed()) - - pods := test.Pods(3, test.PodOptions{ - ObjectMeta: metav1.ObjectMeta{Labels: labels, - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: "apps/v1", - Kind: "ReplicaSet", - Name: rs.Name, - UID: rs.UID, - Controller: ptr.Bool(true), - BlockOwnerDeletion: ptr.Bool(true), - }, - }}, - // Make each pod request about a third of the allocatable on the node - ResourceRequirements: v1.ResourceRequirements{ - Requests: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("2")}, - }, - }) - - prov := test.Provisioner() - node := test.Node(test.NodeOptions{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1alpha5.ProvisionerNameLabelKey: prov.Name, - v1.LabelInstanceTypeStable: currentInstance.Name, - v1alpha5.LabelCapacityType: currentInstance.Offerings[0].CapacityType, - v1.LabelTopologyZone: currentInstance.Offerings[0].Zone, - }, - Annotations: map[string]string{ - v1alpha5.VoluntaryDisruptionAnnotationKey: v1alpha5.VoluntaryDisruptionDriftedAnnotationValue, - }}, - Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("8")}, - }) - ExpectApplied(ctx, env.Client, rs, node, prov, pods[0], pods[1], pods[2]) - ExpectMakeNodesReady(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) - ExpectManualBinding(ctx, env.Client, pods[0], node) - ExpectManualBinding(ctx, env.Client, pods[1], node) - ExpectManualBinding(ctx, env.Client, pods[2], node) - ExpectScheduled(ctx, env.Client, pods[0]) - ExpectScheduled(ctx, env.Client, pods[1]) - ExpectScheduled(ctx, env.Client, pods[2]) - Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(node), node)).To(Succeed()) - - // deprovisioning won't delete the old node until the new node is ready - wg := ExpectMakeNewNodesReady(ctx, env.Client, 3, node) - fakeClock.Step(10 * time.Minute) - go triggerVerifyAction() - _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) - Expect(err).ToNot(HaveOccurred()) - wg.Wait() - - Expect(cloudProvider.CreateCalls).To(HaveLen(3)) - - ExpectNotFound(ctx, env.Client, node) - }) - It("should delete one drifted node at a time", func() { - prov := test.Provisioner() - node1 := test.Node(test.NodeOptions{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1alpha5.ProvisionerNameLabelKey: prov.Name, - v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, - v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, - v1.LabelTopologyZone: mostExpensiveOffering.Zone, - }, - Annotations: map[string]string{ - v1alpha5.VoluntaryDisruptionAnnotationKey: v1alpha5.VoluntaryDisruptionDriftedAnnotationValue, - }}, - Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("32")}, - }) - node2 := test.Node(test.NodeOptions{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1alpha5.ProvisionerNameLabelKey: prov.Name, - v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, - v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, - v1.LabelTopologyZone: mostExpensiveOffering.Zone, - }, - Annotations: map[string]string{ - v1alpha5.VoluntaryDisruptionAnnotationKey: v1alpha5.VoluntaryDisruptionDriftedAnnotationValue, - }}, - Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("32")}, - }) - ExpectApplied(ctx, env.Client, node1, prov, node2) - ExpectMakeNodesReady(ctx, env.Client, node1, node2) - - // inform cluster state about the nodes - ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node1)) - ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node2)) - fakeClock.Step(10 * time.Minute) - go triggerVerifyAction() - _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) - Expect(err).ToNot(HaveOccurred()) - - // we don't need a new node, but we should evict everything off one of node2 which only has a single pod - Expect(cloudProvider.CreateCalls).To(HaveLen(0)) - - // Expect one of the nodes to be deleted - nodes := &v1.NodeList{} - Expect(env.Client.List(ctx, nodes)).To(Succeed()) - Expect(len(nodes.Items)).To(Equal(1)) - }) -}) - -var _ = Describe("Expiration", func() { - It("should ignore nodes without TTLSecondsUntilExpired", func() { - prov := test.Provisioner() - node := test.Node(test.NodeOptions{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1alpha5.ProvisionerNameLabelKey: prov.Name, - v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, - v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, - v1.LabelTopologyZone: mostExpensiveOffering.Zone, - }}, - Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("32")}, - }) - - ExpectApplied(ctx, env.Client, node, prov) - ExpectMakeNodesReady(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) - Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(node), node)).To(Succeed()) - - // inform cluster state about the nodes - ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) - fakeClock.Step(10 * time.Minute) - _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) - Expect(err).ToNot(HaveOccurred()) - - // we don't need a new node - Expect(cloudProvider.CreateCalls).To(HaveLen(0)) - // and can't delete the node since expiry is not enabled - ExpectNodeExists(ctx, env.Client, node.Name) - }) - It("can delete expired nodes", func() { - prov := test.Provisioner(test.ProvisionerOptions{ - TTLSecondsUntilExpired: ptr.Int64(60), - }) - node := test.Node(test.NodeOptions{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1alpha5.ProvisionerNameLabelKey: prov.Name, - v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, - v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, - v1.LabelTopologyZone: mostExpensiveOffering.Zone, - }}, - Allocatable: map[v1.ResourceName]resource.Quantity{ - v1.ResourceCPU: resource.MustParse("32"), - v1.ResourcePods: resource.MustParse("100"), - }}, - ) - - ExpectApplied(ctx, env.Client, node, prov) - ExpectMakeNodesReady(ctx, env.Client, node) - - // inform cluster state about the nodes - ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) - fakeClock.Step(10 * time.Minute) - go triggerVerifyAction() - _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) - Expect(err).ToNot(HaveOccurred()) - - // we don't need a new node, but we should evict everything off one of node2 which only has a single pod - Expect(cloudProvider.CreateCalls).To(HaveLen(0)) - // and delete the old one - ExpectNotFound(ctx, env.Client, node) - }) - It("should expire one node at a time, starting with most expired", func() { - expireProv := test.Provisioner(test.ProvisionerOptions{ - TTLSecondsUntilExpired: ptr.Int64(100), - }) - nodeToExpire := test.Node(test.NodeOptions{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1alpha5.ProvisionerNameLabelKey: expireProv.Name, - v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, - v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, - v1.LabelTopologyZone: mostExpensiveOffering.Zone, - }}, - Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("32")}, - }) - prov := test.Provisioner(test.ProvisionerOptions{ - TTLSecondsUntilExpired: ptr.Int64(500), - }) - nodeNotExpire := test.Node(test.NodeOptions{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1alpha5.ProvisionerNameLabelKey: prov.Name, - v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, - v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, - v1.LabelTopologyZone: mostExpensiveOffering.Zone, - }}, - Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("32")}, - }) - - ExpectApplied(ctx, env.Client, nodeToExpire, expireProv, nodeNotExpire, prov) - ExpectMakeNodesReady(ctx, env.Client, nodeToExpire, nodeNotExpire) - - // inform cluster state about the nodes - ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(nodeToExpire)) - ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(nodeNotExpire)) - fakeClock.Step(10 * time.Minute) - go triggerVerifyAction() - _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) - Expect(err).ToNot(HaveOccurred()) - - // we don't need a new node, but we should evict everything off one of node2 which only has a single pod - Expect(cloudProvider.CreateCalls).To(HaveLen(0)) - // and delete the old one - ExpectNotFound(ctx, env.Client, nodeToExpire) - }) - It("can replace node for expiration", func() { - labels := map[string]string{ - "app": "test", - } - // create our RS so we can link a pod to it - rs := test.ReplicaSet() - ExpectApplied(ctx, env.Client, rs) - Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(rs), rs)).To(Succeed()) - - pod := test.Pod(test.PodOptions{ - ObjectMeta: metav1.ObjectMeta{Labels: labels, - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: "apps/v1", - Kind: "ReplicaSet", - Name: rs.Name, - UID: rs.UID, - Controller: ptr.Bool(true), - BlockOwnerDeletion: ptr.Bool(true), - }, - }}}) - - prov := test.Provisioner(test.ProvisionerOptions{ - TTLSecondsUntilExpired: ptr.Int64(30), - }) - node := test.Node(test.NodeOptions{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1alpha5.ProvisionerNameLabelKey: prov.Name, - v1.LabelInstanceTypeStable: mostExpensiveInstance.Name, - v1alpha5.LabelCapacityType: mostExpensiveOffering.CapacityType, - v1.LabelTopologyZone: mostExpensiveOffering.Zone, - }}, - Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("32")}, - }) - ExpectApplied(ctx, env.Client, rs, pod, node, prov) - ExpectMakeNodesReady(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) - ExpectManualBinding(ctx, env.Client, pod, node) - ExpectScheduled(ctx, env.Client, pod) - Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(node), node)).To(Succeed()) - - // deprovisioning won't delete the old node until the new node is ready - wg := ExpectMakeNewNodesReady(ctx, env.Client, 1, node) - fakeClock.Step(10 * time.Minute) - go triggerVerifyAction() - _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) - Expect(err).ToNot(HaveOccurred()) - wg.Wait() - - Expect(cloudProvider.CreateCalls).To(HaveLen(1)) - - ExpectNotFound(ctx, env.Client, node) - }) - It("should uncordon nodes when expiration replacement partially fails", func() { - currentInstance := fake.NewInstanceType(fake.InstanceTypeOptions{ - Name: "current-on-demand", - Offerings: []cloudprovider.Offering{ - { - CapacityType: v1alpha5.CapacityTypeOnDemand, - Zone: "test-zone-1a", - Price: 0.5, - Available: false, - }, - }, - }) - replacementInstance := fake.NewInstanceType(fake.InstanceTypeOptions{ - Name: "replacement-on-demand", - Offerings: []cloudprovider.Offering{ - { - CapacityType: v1alpha5.CapacityTypeOnDemand, - Zone: "test-zone-1a", - Price: 0.3, - Available: true, - }, - }, - Resources: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("3")}, - }) - cloudProvider.InstanceTypes = []*cloudprovider.InstanceType{ - currentInstance, - replacementInstance, - } - cloudProvider.AllowedCreateCalls = 2 - - labels := map[string]string{ - "app": "test", - } - // create our RS so we can link a pod to it - rs := test.ReplicaSet() - ExpectApplied(ctx, env.Client, rs) - Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(rs), rs)).To(Succeed()) - - pods := test.Pods(3, test.PodOptions{ - ObjectMeta: metav1.ObjectMeta{Labels: labels, - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: "apps/v1", - Kind: "ReplicaSet", - Name: rs.Name, - UID: rs.UID, - Controller: ptr.Bool(true), - BlockOwnerDeletion: ptr.Bool(true), - }, - }}, - // Make each pod request about a third of the allocatable on the node - ResourceRequirements: v1.ResourceRequirements{ - Requests: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("2")}, - }, - }) - - prov := test.Provisioner(test.ProvisionerOptions{ - TTLSecondsUntilExpired: ptr.Int64(30), - }) - node := test.Node(test.NodeOptions{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1alpha5.ProvisionerNameLabelKey: prov.Name, - v1.LabelInstanceTypeStable: currentInstance.Name, - v1alpha5.LabelCapacityType: currentInstance.Offerings[0].CapacityType, - v1.LabelTopologyZone: currentInstance.Offerings[0].Zone, - }}, - Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("7")}, - }) - ExpectApplied(ctx, env.Client, rs, node, prov, pods[0], pods[1], pods[2]) - ExpectMakeNodesReady(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) - ExpectManualBinding(ctx, env.Client, pods[0], node) - ExpectManualBinding(ctx, env.Client, pods[1], node) - ExpectManualBinding(ctx, env.Client, pods[2], node) - ExpectScheduled(ctx, env.Client, pods[0]) - ExpectScheduled(ctx, env.Client, pods[1]) - ExpectScheduled(ctx, env.Client, pods[2]) - Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(node), node)).To(Succeed()) - - fakeClock.Step(10 * time.Minute) - go triggerVerifyAction() - _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) - Expect(err).To(HaveOccurred()) - - // Expiration should try to make 3 calls but fail for the third. - Expect(cloudProvider.CreateCalls).To(HaveLen(3)) - - node = ExpectNodeExists(ctx, env.Client, node.Name) - Expect(node.Spec.Unschedulable).To(BeFalse()) - }) - It("can replace node for expiration with multiple nodes", func() { - currentInstance := fake.NewInstanceType(fake.InstanceTypeOptions{ - Name: "current-on-demand", - Offerings: []cloudprovider.Offering{ - { - CapacityType: v1alpha5.CapacityTypeOnDemand, - Zone: "test-zone-1a", - Price: 0.5, - Available: false, - }, - }, - }) - replacementInstance := fake.NewInstanceType(fake.InstanceTypeOptions{ - Name: "replacement-on-demand", - Offerings: []cloudprovider.Offering{ - { - CapacityType: v1alpha5.CapacityTypeOnDemand, - Zone: "test-zone-1a", - Price: 0.3, - Available: true, - }, - }, - Resources: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("3")}, - }) - cloudProvider.InstanceTypes = []*cloudprovider.InstanceType{ - currentInstance, - replacementInstance, - } - - labels := map[string]string{ - "app": "test", - } - // create our RS so we can link a pod to it - rs := test.ReplicaSet() - ExpectApplied(ctx, env.Client, rs) - Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(rs), rs)).To(Succeed()) - - pods := test.Pods(3, test.PodOptions{ - ObjectMeta: metav1.ObjectMeta{Labels: labels, - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: "apps/v1", - Kind: "ReplicaSet", - Name: rs.Name, - UID: rs.UID, - Controller: ptr.Bool(true), - BlockOwnerDeletion: ptr.Bool(true), - }, - }}, - // Make each pod request about a third of the allocatable on the node - ResourceRequirements: v1.ResourceRequirements{ - Requests: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("2")}, - }, - }) - - prov := test.Provisioner(test.ProvisionerOptions{ - TTLSecondsUntilExpired: ptr.Int64(200), - }) - node := test.Node(test.NodeOptions{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1alpha5.ProvisionerNameLabelKey: prov.Name, - v1.LabelInstanceTypeStable: currentInstance.Name, - v1alpha5.LabelCapacityType: currentInstance.Offerings[0].CapacityType, - v1.LabelTopologyZone: currentInstance.Offerings[0].Zone, - }}, - Allocatable: map[v1.ResourceName]resource.Quantity{v1.ResourceCPU: resource.MustParse("8")}, - }) - ExpectApplied(ctx, env.Client, rs, node, prov, pods[0], pods[1], pods[2]) - ExpectMakeNodesReady(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node)) - ExpectManualBinding(ctx, env.Client, pods[0], node) - ExpectManualBinding(ctx, env.Client, pods[1], node) - ExpectManualBinding(ctx, env.Client, pods[2], node) - ExpectScheduled(ctx, env.Client, pods[0]) - ExpectScheduled(ctx, env.Client, pods[1]) - ExpectScheduled(ctx, env.Client, pods[2]) - Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(node), node)).To(Succeed()) - - // deprovisioning won't delete the old node until the new node is ready - wg := ExpectMakeNewNodesReady(ctx, env.Client, 3, node) - fakeClock.Step(10 * time.Minute) - go triggerVerifyAction() - _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) - Expect(err).ToNot(HaveOccurred()) - wg.Wait() - - Expect(cloudProvider.CreateCalls).To(HaveLen(3)) - - ExpectNotFound(ctx, env.Client, node) - }) -}) - var _ = Describe("Pod Eviction Cost", func() { const standardPodCost = 1.0 It("should have a standard disruptionCost for a pod with no priority or disruptionCost specified", func() { @@ -996,6 +326,7 @@ var _ = Describe("Replace Nodes", func() { fakeClock.Step(10 * time.Minute) _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) Expect(err).ToNot(HaveOccurred()) + Expect(cluster.Consolidated()).To(BeTrue()) // we don't need a new node Expect(cloudProvider.CreateCalls).To(HaveLen(0)) @@ -1237,6 +568,7 @@ var _ = Describe("Replace Nodes", func() { fakeClock.Step(10 * time.Minute) go triggerVerifyAction() _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) + Expect(cluster.Consolidated()).To(BeTrue()) Expect(err).ToNot(HaveOccurred()) Expect(cloudProvider.CreateCalls).To(HaveLen(0)) ExpectNodeExists(ctx, env.Client, node.Name) @@ -1340,6 +672,7 @@ var _ = Describe("Replace Nodes", func() { fakeClock.Step(10 * time.Minute) go triggerVerifyAction() _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) + Expect(cluster.Consolidated()).To(BeTrue()) Expect(err).ToNot(HaveOccurred()) Expect(cloudProvider.CreateCalls).To(HaveLen(0)) ExpectNodeExists(ctx, env.Client, node.Name) @@ -1396,6 +729,7 @@ var _ = Describe("Replace Nodes", func() { go func() { _, err := deprovisioningController.Reconcile(ctx, reconcile.Request{}) Expect(err).ToNot(HaveOccurred()) + Expect(cluster.Consolidated()).To(BeFalse()) consolidationFinished.Store(true) }() wg.Wait()