From 176bad1a859dd18b270cef39489bf02c62f5c637 Mon Sep 17 00:00:00 2001 From: Jonathan Innis Date: Fri, 3 Feb 2023 15:37:08 -0800 Subject: [PATCH] Create a garbage collection reconciler for machine --- pkg/controllers/machine/controller.go | 5 + pkg/controllers/machine/garbagecollect.go | 51 ++ .../machine/garbagecollect_test.go | 80 ++ pkg/controllers/machine/initialization.go | 3 + .../machine/initialization_test.go | 412 +++++++++ pkg/controllers/machine/launch_test.go | 115 +++ pkg/controllers/machine/liveness_test.go | 100 +++ pkg/controllers/machine/registration.go | 3 + pkg/controllers/machine/registration_test.go | 313 +++++++ pkg/controllers/machine/suite_test.go | 785 +----------------- pkg/controllers/machine/termination_test.go | 75 ++ 11 files changed, 1181 insertions(+), 761 deletions(-) create mode 100644 pkg/controllers/machine/garbagecollect.go create mode 100644 pkg/controllers/machine/garbagecollect_test.go create mode 100644 pkg/controllers/machine/initialization_test.go create mode 100644 pkg/controllers/machine/launch_test.go create mode 100644 pkg/controllers/machine/liveness_test.go create mode 100644 pkg/controllers/machine/registration_test.go create mode 100644 pkg/controllers/machine/termination_test.go diff --git a/pkg/controllers/machine/controller.go b/pkg/controllers/machine/controller.go index 14308e2f91..416da34914 100644 --- a/pkg/controllers/machine/controller.go +++ b/pkg/controllers/machine/controller.go @@ -17,7 +17,9 @@ package machine import ( "context" "fmt" + "time" + "github.com/patrickmn/go-cache" "github.com/samber/lo" "go.uber.org/multierr" v1 "k8s.io/api/core/v1" @@ -57,6 +59,7 @@ type Controller struct { recorder events.Recorder terminator *terminator.Terminator + garbageCollect *GarbageCollect launch *Launch registration *Registration initialization *Initialization @@ -72,6 +75,7 @@ func NewController(clk clock.Clock, kubeClient client.Client, cloudProvider clou recorder: recorder, terminator: terminator, + garbageCollect: &GarbageCollect{kubeClient: kubeClient, cloudProvider: cloudProvider, lastChecked: cache.New(time.Minute*10, 1*time.Minute)}, launch: &Launch{kubeClient: kubeClient, cloudProvider: cloudProvider}, registration: &Registration{kubeClient: kubeClient}, initialization: &Initialization{kubeClient: kubeClient}, @@ -98,6 +102,7 @@ func (c *Controller) Reconcile(ctx context.Context, machine *v1alpha5.Machine) ( var results []reconcile.Result var errs error for _, reconciler := range []machineReconciler{ + c.garbageCollect, c.launch, c.registration, c.initialization, diff --git a/pkg/controllers/machine/garbagecollect.go b/pkg/controllers/machine/garbagecollect.go new file mode 100644 index 0000000000..eedbcf4f6f --- /dev/null +++ b/pkg/controllers/machine/garbagecollect.go @@ -0,0 +1,51 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package machine + +import ( + "context" + "time" + + "github.com/patrickmn/go-cache" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/aws/karpenter-core/pkg/apis/v1alpha5" + "github.com/aws/karpenter-core/pkg/cloudprovider" +) + +type GarbageCollect struct { + kubeClient client.Client + cloudProvider cloudprovider.CloudProvider + lastChecked *cache.Cache +} + +func (e *GarbageCollect) Reconcile(ctx context.Context, machine *v1alpha5.Machine) (reconcile.Result, error) { + if !machine.StatusConditions().GetCondition(v1alpha5.MachineCreated).IsTrue() { + return reconcile.Result{}, nil + } + // If there is no node representation for the machine, then check if there is a representation at the cloudprovider + if _, err := nodeForMachine(ctx, e.kubeClient, machine); err == nil || !IsNodeNotFoundError(err) { + return reconcile.Result{}, nil + } + if _, expireTime, ok := e.lastChecked.GetWithExpiration(client.ObjectKeyFromObject(machine).String()); ok { + return reconcile.Result{RequeueAfter: time.Until(expireTime)}, nil + } + if _, err := e.cloudProvider.Get(ctx, machine.Name, machine.Labels[v1alpha5.ProvisionerNameLabelKey]); cloudprovider.IsMachineNotFoundError(err) { + return reconcile.Result{}, client.IgnoreNotFound(e.kubeClient.Delete(ctx, machine)) + } + e.lastChecked.SetDefault(client.ObjectKeyFromObject(machine).String(), nil) + return reconcile.Result{}, nil +} diff --git a/pkg/controllers/machine/garbagecollect_test.go b/pkg/controllers/machine/garbagecollect_test.go new file mode 100644 index 0000000000..b509d807cd --- /dev/null +++ b/pkg/controllers/machine/garbagecollect_test.go @@ -0,0 +1,80 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package machine_test + +import ( + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/aws/karpenter-core/pkg/apis/v1alpha5" + "github.com/aws/karpenter-core/pkg/test" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + . "github.com/aws/karpenter-core/pkg/test/expectations" +) + +var _ = Describe("GarbageCollection", func() { + var provisioner *v1alpha5.Provisioner + + BeforeEach(func() { + provisioner = test.Provisioner() + }) + It("should delete the Machine when the Node never appears and the instance is gone", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + }, + }) + ExpectApplied(ctx, env.Client, provisioner, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + + // Delete the machine from the cloudprovider + Expect(cloudProvider.Delete(ctx, machine)).To(Succeed()) + + // Wait for the cache expiration to complete + time.Sleep(time.Second) + + // Expect the Machine to be removed now that the Instance is gone + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) // Reconcile again to handle termination flow + ExpectNotFound(ctx, env.Client, machine) + }) + It("shouldn't delete the Machine when the Node isn't there but the instance is there", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + }, + }) + ExpectApplied(ctx, env.Client, provisioner, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + + // Wait for the cache expiration to complete + time.Sleep(time.Second) + + // Reconcile the Machine. It should not be deleted by this flow since it has never been registered + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + ExpectExists(ctx, env.Client, machine) + }) +}) diff --git a/pkg/controllers/machine/initialization.go b/pkg/controllers/machine/initialization.go index 8e11e0e06e..29b167f6bd 100644 --- a/pkg/controllers/machine/initialization.go +++ b/pkg/controllers/machine/initialization.go @@ -43,6 +43,9 @@ func (i *Initialization) Reconcile(ctx context.Context, machine *v1alpha5.Machin if machine.Status.ProviderID == "" { return reconcile.Result{}, nil } + if machine.StatusConditions().GetCondition(v1alpha5.MachineInitialized).IsTrue() { + return reconcile.Result{}, nil + } ctx = logging.WithLogger(ctx, logging.FromContext(ctx).With("provider-id", machine.Status.ProviderID)) node, err := nodeForMachine(ctx, i.kubeClient, machine) if err != nil { diff --git a/pkg/controllers/machine/initialization_test.go b/pkg/controllers/machine/initialization_test.go new file mode 100644 index 0000000000..7349d62e03 --- /dev/null +++ b/pkg/controllers/machine/initialization_test.go @@ -0,0 +1,412 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package machine_test + +import ( + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/aws/karpenter-core/pkg/apis/v1alpha5" + "github.com/aws/karpenter-core/pkg/cloudprovider/fake" + "github.com/aws/karpenter-core/pkg/test" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + . "github.com/aws/karpenter-core/pkg/test/expectations" +) + +var _ = Describe("Initialization", func() { + var provisioner *v1alpha5.Provisioner + + BeforeEach(func() { + provisioner = test.Provisioner() + }) + It("should consider the Machine initialized when all initialization conditions are met", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + }, + Spec: v1alpha5.MachineSpec{ + Resources: v1alpha5.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("50Mi"), + v1.ResourcePods: resource.MustParse("5"), + }, + }, + }, + }) + ExpectApplied(ctx, env.Client, provisioner, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + + node := test.Node(test.NodeOptions{ + ProviderID: machine.Status.ProviderID, + }) + ExpectApplied(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + + machine = ExpectExists(ctx, env.Client, machine) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineInitialized).Status).To(Equal(v1.ConditionFalse)) + + node = ExpectExists(ctx, env.Client, node) + node.Status.Capacity = v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("10"), + v1.ResourceMemory: resource.MustParse("100Mi"), + v1.ResourcePods: resource.MustParse("110"), + } + node.Status.Allocatable = v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("80Mi"), + v1.ResourcePods: resource.MustParse("110"), + } + ExpectApplied(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + + machine = ExpectExists(ctx, env.Client, machine) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineInitialized).Status).To(Equal(v1.ConditionTrue)) + }) + It("should add the initialization label to the node when the Machine is initialized", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + }, + Spec: v1alpha5.MachineSpec{ + Resources: v1alpha5.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("50Mi"), + v1.ResourcePods: resource.MustParse("5"), + }, + }, + }, + }) + ExpectApplied(ctx, env.Client, provisioner, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + + node := test.Node(test.NodeOptions{ + ProviderID: machine.Status.ProviderID, + Capacity: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("10"), + v1.ResourceMemory: resource.MustParse("100Mi"), + v1.ResourcePods: resource.MustParse("110"), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("80Mi"), + v1.ResourcePods: resource.MustParse("110"), + }, + }) + ExpectApplied(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + + node = ExpectExists(ctx, env.Client, node) + Expect(node.Labels).To(HaveKeyWithValue(v1alpha5.LabelNodeInitialized, "true")) + }) + It("should not consider the Node to be initialized when the status of the Node is NotReady", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + }, + Spec: v1alpha5.MachineSpec{ + Resources: v1alpha5.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("50Mi"), + v1.ResourcePods: resource.MustParse("5"), + }, + }, + }, + }) + ExpectApplied(ctx, env.Client, provisioner, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + + node := test.Node(test.NodeOptions{ + ProviderID: machine.Status.ProviderID, + Capacity: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("10"), + v1.ResourceMemory: resource.MustParse("100Mi"), + v1.ResourcePods: resource.MustParse("110"), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("80Mi"), + v1.ResourcePods: resource.MustParse("110"), + }, + ReadyStatus: v1.ConditionFalse, + }) + ExpectApplied(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + + machine = ExpectExists(ctx, env.Client, machine) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineInitialized).Status).To(Equal(v1.ConditionFalse)) + }) + It("should not consider the Node to be initialized when all requested resources aren't registered", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + }, + Spec: v1alpha5.MachineSpec{ + Resources: v1alpha5.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("50Mi"), + v1.ResourcePods: resource.MustParse("5"), + fake.ResourceGPUVendorA: resource.MustParse("1"), + }, + }, + }, + }) + ExpectApplied(ctx, env.Client, provisioner, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + + // Update the machine to add mock the instance type having an extended resource + machine.Status.Capacity[fake.ResourceGPUVendorA] = resource.MustParse("2") + machine.Status.Allocatable[fake.ResourceGPUVendorA] = resource.MustParse("2") + ExpectApplied(ctx, env.Client, machine) + + // Extended resource hasn't registered yet by the daemonset + node := test.Node(test.NodeOptions{ + ProviderID: machine.Status.ProviderID, + Capacity: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("10"), + v1.ResourceMemory: resource.MustParse("100Mi"), + v1.ResourcePods: resource.MustParse("110"), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("80Mi"), + v1.ResourcePods: resource.MustParse("110"), + }, + }) + ExpectApplied(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + + machine = ExpectExists(ctx, env.Client, machine) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineInitialized).Status).To(Equal(v1.ConditionFalse)) + }) + It("should consider the node to be initialized once all the resources are registered", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + }, + Spec: v1alpha5.MachineSpec{ + Resources: v1alpha5.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("50Mi"), + v1.ResourcePods: resource.MustParse("5"), + fake.ResourceGPUVendorA: resource.MustParse("1"), + }, + }, + }, + }) + ExpectApplied(ctx, env.Client, provisioner, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + + // Update the machine to add mock the instance type having an extended resource + machine.Status.Capacity[fake.ResourceGPUVendorA] = resource.MustParse("2") + machine.Status.Allocatable[fake.ResourceGPUVendorA] = resource.MustParse("2") + ExpectApplied(ctx, env.Client, machine) + + // Extended resource hasn't registered yet by the daemonset + node := test.Node(test.NodeOptions{ + ProviderID: machine.Status.ProviderID, + Capacity: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("10"), + v1.ResourceMemory: resource.MustParse("100Mi"), + v1.ResourcePods: resource.MustParse("110"), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("80Mi"), + v1.ResourcePods: resource.MustParse("110"), + }, + }) + ExpectApplied(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + + machine = ExpectExists(ctx, env.Client, machine) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineInitialized).Status).To(Equal(v1.ConditionFalse)) + + // Node now registers the resource + node = ExpectExists(ctx, env.Client, node) + node.Status.Capacity[fake.ResourceGPUVendorA] = resource.MustParse("2") + node.Status.Allocatable[fake.ResourceGPUVendorA] = resource.MustParse("2") + ExpectApplied(ctx, env.Client, node) + + // Reconcile the machine and the Machine/Node should now be initilized + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineInitialized).Status).To(Equal(v1.ConditionTrue)) + }) + It("should not consider the Node to be initialized when all startupTaints aren't removed", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + }, + Spec: v1alpha5.MachineSpec{ + Resources: v1alpha5.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("50Mi"), + v1.ResourcePods: resource.MustParse("5"), + }, + }, + StartupTaints: []v1.Taint{ + { + Key: "custom-startup-taint", + Effect: v1.TaintEffectNoSchedule, + Value: "custom-startup-value", + }, + { + Key: "other-custom-startup-taint", + Effect: v1.TaintEffectNoExecute, + Value: "other-custom-startup-value", + }, + }, + }, + }) + ExpectApplied(ctx, env.Client, provisioner, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + + node := test.Node(test.NodeOptions{ + ProviderID: machine.Status.ProviderID, + Capacity: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("10"), + v1.ResourceMemory: resource.MustParse("100Mi"), + v1.ResourcePods: resource.MustParse("110"), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("80Mi"), + v1.ResourcePods: resource.MustParse("110"), + }, + }) + ExpectApplied(ctx, env.Client, node) + + // Should add the startup taints to the node + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + node = ExpectExists(ctx, env.Client, node) + Expect(node.Spec.Taints).To(ContainElements( + v1.Taint{ + Key: "custom-startup-taint", + Effect: v1.TaintEffectNoSchedule, + Value: "custom-startup-value", + }, + v1.Taint{ + Key: "other-custom-startup-taint", + Effect: v1.TaintEffectNoExecute, + Value: "other-custom-startup-value", + }, + )) + + // Shouldn't consider the node ready since the startup taints still exist + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineInitialized).Status).To(Equal(v1.ConditionFalse)) + }) + It("should consider the Node to be initialized once the startup taints are removed", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + }, + Spec: v1alpha5.MachineSpec{ + Resources: v1alpha5.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("50Mi"), + v1.ResourcePods: resource.MustParse("5"), + }, + }, + StartupTaints: []v1.Taint{ + { + Key: "custom-startup-taint", + Effect: v1.TaintEffectNoSchedule, + Value: "custom-startup-value", + }, + { + Key: "other-custom-startup-taint", + Effect: v1.TaintEffectNoExecute, + Value: "other-custom-startup-value", + }, + }, + }, + }) + ExpectApplied(ctx, env.Client, provisioner, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + + node := test.Node(test.NodeOptions{ + ProviderID: machine.Status.ProviderID, + Capacity: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("10"), + v1.ResourceMemory: resource.MustParse("100Mi"), + v1.ResourcePods: resource.MustParse("110"), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("80Mi"), + v1.ResourcePods: resource.MustParse("110"), + }, + }) + ExpectApplied(ctx, env.Client, node) + + // Shouldn't consider the node ready since the startup taints still exist + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineInitialized).Status).To(Equal(v1.ConditionFalse)) + + node = ExpectExists(ctx, env.Client, node) + node.Spec.Taints = []v1.Taint{} + ExpectApplied(ctx, env.Client, node) + + // Machine should now be ready since all startup taints are removed + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineInitialized).Status).To(Equal(v1.ConditionTrue)) + }) +}) diff --git a/pkg/controllers/machine/launch_test.go b/pkg/controllers/machine/launch_test.go new file mode 100644 index 0000000000..39eacc9e11 --- /dev/null +++ b/pkg/controllers/machine/launch_test.go @@ -0,0 +1,115 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package machine_test + +import ( + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/aws/karpenter-core/pkg/apis/v1alpha5" + "github.com/aws/karpenter-core/pkg/test" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + . "github.com/aws/karpenter-core/pkg/test/expectations" +) + +var _ = Describe("Launch", func() { + var provisioner *v1alpha5.Provisioner + + BeforeEach(func() { + provisioner = test.Provisioner() + }) + It("should launch an instance when a new Machine is created", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + }, + }) + ExpectApplied(ctx, env.Client, provisioner, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + + Expect(cloudProvider.CreateCalls).To(HaveLen(1)) + Expect(cloudProvider.CreatedMachines).To(HaveLen(1)) + _, err := cloudProvider.Get(ctx, machine.Name, "") + Expect(err).ToNot(HaveOccurred()) + }) + It("should get an instance and hydrate the Machine when the Machine is already created", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + }, + }) + ExpectApplied(ctx, env.Client, provisioner, machine) + cloudProviderMachine := &v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: machine.Name, + Labels: map[string]string{ + v1.LabelInstanceTypeStable: "small-instance-type", + v1.LabelTopologyZone: "test-zone-1a", + v1.LabelTopologyRegion: "test-zone", + v1alpha5.LabelCapacityType: v1alpha5.CapacityTypeSpot, + }, + }, + Status: v1alpha5.MachineStatus{ + ProviderID: test.RandomProviderID(), + Capacity: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("10"), + v1.ResourceMemory: resource.MustParse("100Mi"), + v1.ResourceEphemeralStorage: resource.MustParse("20Gi"), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("80Mi"), + v1.ResourceEphemeralStorage: resource.MustParse("18Gi"), + }, + }, + } + cloudProvider.CreatedMachines[machine.Name] = cloudProviderMachine + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + + machine = ExpectExists(ctx, env.Client, machine) + + Expect(machine.Status.ProviderID).To(Equal(cloudProviderMachine.Status.ProviderID)) + ExpectResources(machine.Status.Capacity, cloudProviderMachine.Status.Capacity) + ExpectResources(machine.Status.Allocatable, cloudProviderMachine.Status.Allocatable) + + Expect(machine.Labels).To(HaveKeyWithValue(v1.LabelInstanceTypeStable, "small-instance-type")) + Expect(machine.Labels).To(HaveKeyWithValue(v1.LabelTopologyZone, "test-zone-1a")) + Expect(machine.Labels).To(HaveKeyWithValue(v1.LabelTopologyRegion, "test-zone")) + Expect(machine.Labels).To(HaveKeyWithValue(v1alpha5.LabelCapacityType, v1alpha5.CapacityTypeSpot)) + }) + It("should add the MachineCreated status condition after creating the Machine", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + }, + }) + ExpectApplied(ctx, env.Client, provisioner, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + + machine = ExpectExists(ctx, env.Client, machine) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineCreated).Status).To(Equal(v1.ConditionTrue)) + }) +}) diff --git a/pkg/controllers/machine/liveness_test.go b/pkg/controllers/machine/liveness_test.go new file mode 100644 index 0000000000..82423c65fd --- /dev/null +++ b/pkg/controllers/machine/liveness_test.go @@ -0,0 +1,100 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package machine_test + +import ( + "time" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/aws/karpenter-core/pkg/apis/settings" + "github.com/aws/karpenter-core/pkg/apis/v1alpha5" + "github.com/aws/karpenter-core/pkg/cloudprovider/fake" + "github.com/aws/karpenter-core/pkg/test" + + . "github.com/onsi/ginkgo/v2" + + . "github.com/aws/karpenter-core/pkg/test/expectations" +) + +var _ = Describe("Liveness", func() { + var provisioner *v1alpha5.Provisioner + + BeforeEach(func() { + provisioner = test.Provisioner() + }) + It("should delete the Machine when the Node hasn't registered to the Machine past the liveness TTL", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + }, + Spec: v1alpha5.MachineSpec{ + Resources: v1alpha5.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("50Mi"), + v1.ResourcePods: resource.MustParse("5"), + fake.ResourceGPUVendorA: resource.MustParse("1"), + }, + }, + }, + }) + ExpectApplied(ctx, env.Client, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + + // If the node hasn't registered in the liveness timeframe, then we deprovision the Machine + fakeClock.Step(time.Minute * 20) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) // Reconcile again to handle termination flow + ExpectNotFound(ctx, env.Client, machine) + }) + It("should not delete the Machine when the Node hasn't registered to the Machine past the liveness TTL if ttlAfterNotRegistered is disabled", func() { + s := test.Settings() + s.TTLAfterNotRegistered = nil + ctx = settings.ToContext(ctx, s) + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + }, + Spec: v1alpha5.MachineSpec{ + Resources: v1alpha5.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("50Mi"), + v1.ResourcePods: resource.MustParse("5"), + fake.ResourceGPUVendorA: resource.MustParse("1"), + }, + }, + }, + }) + ExpectApplied(ctx, env.Client, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + + // If the node hasn't registered in the liveness timeframe, then we deprovision the Machine + fakeClock.Step(time.Minute * 20) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) // Reconcile again to handle termination flow + ExpectExists(ctx, env.Client, machine) + }) +}) diff --git a/pkg/controllers/machine/registration.go b/pkg/controllers/machine/registration.go index e9536b9349..55c78fed9f 100644 --- a/pkg/controllers/machine/registration.go +++ b/pkg/controllers/machine/registration.go @@ -39,6 +39,9 @@ func (r *Registration) Reconcile(ctx context.Context, machine *v1alpha5.Machine) if machine.Status.ProviderID == "" { return reconcile.Result{}, nil } + if machine.StatusConditions().GetCondition(v1alpha5.MachineRegistered).IsTrue() { + return reconcile.Result{}, nil + } ctx = logging.WithLogger(ctx, logging.FromContext(ctx).With("provider-id", machine.Status.ProviderID)) node, err := nodeForMachine(ctx, r.kubeClient, machine) if err != nil { diff --git a/pkg/controllers/machine/registration_test.go b/pkg/controllers/machine/registration_test.go new file mode 100644 index 0000000000..0cb1bb41bb --- /dev/null +++ b/pkg/controllers/machine/registration_test.go @@ -0,0 +1,313 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package machine_test + +import ( + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/aws/karpenter-core/pkg/apis/v1alpha5" + "github.com/aws/karpenter-core/pkg/test" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + . "github.com/aws/karpenter-core/pkg/test/expectations" +) + +var _ = Describe("Registration", func() { + var provisioner *v1alpha5.Provisioner + + BeforeEach(func() { + provisioner = test.Provisioner() + }) + It("should match the Machine to the Node when the Node comes online", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + }, + }) + ExpectApplied(ctx, env.Client, provisioner, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + + node := test.Node(test.NodeOptions{ProviderID: machine.Status.ProviderID}) + ExpectApplied(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + + machine = ExpectExists(ctx, env.Client, machine) + Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) + }) + It("should add the owner reference to the Node when the Node comes online", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + }, + }) + ExpectApplied(ctx, env.Client, provisioner, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + + node := test.Node(test.NodeOptions{ProviderID: machine.Status.ProviderID}) + ExpectApplied(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + + node = ExpectExists(ctx, env.Client, node) + ExpectOwnerReferenceExists(node, machine) + }) + It("should sync the labels to the Node when the Node comes online", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + "custom-label": "custom-value", + "other-custom-label": "other-custom-value", + }, + }, + }) + ExpectApplied(ctx, env.Client, provisioner, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + Expect(machine.Labels).To(HaveKeyWithValue("custom-label", "custom-value")) + Expect(machine.Labels).To(HaveKeyWithValue("other-custom-label", "other-custom-value")) + + node := test.Node(test.NodeOptions{ProviderID: machine.Status.ProviderID}) + ExpectApplied(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + node = ExpectExists(ctx, env.Client, node) + + // Expect Node to have all the labels that the Machine has + for k, v := range machine.Labels { + Expect(node.Labels).To(HaveKeyWithValue(k, v)) + } + }) + It("should sync the annotations to the Node when the Node comes online", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + Annotations: map[string]string{ + v1alpha5.DoNotConsolidateNodeAnnotationKey: "true", + "my-custom-annotation": "my-custom-value", + }, + }, + }) + ExpectApplied(ctx, env.Client, provisioner, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + Expect(machine.Annotations).To(HaveKeyWithValue(v1alpha5.DoNotConsolidateNodeAnnotationKey, "true")) + Expect(machine.Annotations).To(HaveKeyWithValue("my-custom-annotation", "my-custom-value")) + + node := test.Node(test.NodeOptions{ProviderID: machine.Status.ProviderID}) + ExpectApplied(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + node = ExpectExists(ctx, env.Client, node) + + // Expect Node to have all the annotations that the Machine has + for k, v := range machine.Annotations { + Expect(node.Annotations).To(HaveKeyWithValue(k, v)) + } + }) + It("should sync the taints to the Node when the Node comes online", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + }, + Spec: v1alpha5.MachineSpec{ + Taints: []v1.Taint{ + { + Key: "custom-taint", + Effect: v1.TaintEffectNoSchedule, + Value: "custom-value", + }, + { + Key: "other-custom-taint", + Effect: v1.TaintEffectNoExecute, + Value: "other-custom-value", + }, + }, + }, + }) + ExpectApplied(ctx, env.Client, provisioner, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + Expect(machine.Spec.Taints).To(ContainElements( + v1.Taint{ + Key: "custom-taint", + Effect: v1.TaintEffectNoSchedule, + Value: "custom-value", + }, + v1.Taint{ + Key: "other-custom-taint", + Effect: v1.TaintEffectNoExecute, + Value: "other-custom-value", + }, + )) + + node := test.Node(test.NodeOptions{ProviderID: machine.Status.ProviderID}) + ExpectApplied(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + node = ExpectExists(ctx, env.Client, node) + + Expect(node.Spec.Taints).To(ContainElements( + v1.Taint{ + Key: "custom-taint", + Effect: v1.TaintEffectNoSchedule, + Value: "custom-value", + }, + v1.Taint{ + Key: "other-custom-taint", + Effect: v1.TaintEffectNoExecute, + Value: "other-custom-value", + }, + )) + }) + It("should sync the startupTaints to the Node when the Node comes online", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + }, + Spec: v1alpha5.MachineSpec{ + Taints: []v1.Taint{ + { + Key: "custom-taint", + Effect: v1.TaintEffectNoSchedule, + Value: "custom-value", + }, + { + Key: "other-custom-taint", + Effect: v1.TaintEffectNoExecute, + Value: "other-custom-value", + }, + }, + StartupTaints: []v1.Taint{ + { + Key: "custom-startup-taint", + Effect: v1.TaintEffectNoSchedule, + Value: "custom-startup-value", + }, + { + Key: "other-custom-startup-taint", + Effect: v1.TaintEffectNoExecute, + Value: "other-custom-startup-value", + }, + }, + }, + }) + ExpectApplied(ctx, env.Client, provisioner, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + Expect(machine.Spec.StartupTaints).To(ContainElements( + v1.Taint{ + Key: "custom-startup-taint", + Effect: v1.TaintEffectNoSchedule, + Value: "custom-startup-value", + }, + v1.Taint{ + Key: "other-custom-startup-taint", + Effect: v1.TaintEffectNoExecute, + Value: "other-custom-startup-value", + }, + )) + + node := test.Node(test.NodeOptions{ProviderID: machine.Status.ProviderID}) + ExpectApplied(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + node = ExpectExists(ctx, env.Client, node) + + Expect(node.Spec.Taints).To(ContainElements( + v1.Taint{ + Key: "custom-taint", + Effect: v1.TaintEffectNoSchedule, + Value: "custom-value", + }, + v1.Taint{ + Key: "other-custom-taint", + Effect: v1.TaintEffectNoExecute, + Value: "other-custom-value", + }, + v1.Taint{ + Key: "custom-startup-taint", + Effect: v1.TaintEffectNoSchedule, + Value: "custom-startup-value", + }, + v1.Taint{ + Key: "other-custom-startup-taint", + Effect: v1.TaintEffectNoExecute, + Value: "other-custom-startup-value", + }, + )) + }) + It("should not re-sync the startupTaints to the Node when the startupTaints are removed", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, + }, + }, + Spec: v1alpha5.MachineSpec{ + StartupTaints: []v1.Taint{ + { + Key: "custom-startup-taint", + Effect: v1.TaintEffectNoSchedule, + Value: "custom-startup-value", + }, + { + Key: "other-custom-startup-taint", + Effect: v1.TaintEffectNoExecute, + Value: "other-custom-startup-value", + }, + }, + }, + }) + ExpectApplied(ctx, env.Client, provisioner, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + + node := test.Node(test.NodeOptions{ProviderID: machine.Status.ProviderID}) + ExpectApplied(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + node = ExpectExists(ctx, env.Client, node) + + Expect(node.Spec.Taints).To(ContainElements( + v1.Taint{ + Key: "custom-startup-taint", + Effect: v1.TaintEffectNoSchedule, + Value: "custom-startup-value", + }, + v1.Taint{ + Key: "other-custom-startup-taint", + Effect: v1.TaintEffectNoExecute, + Value: "other-custom-startup-value", + }, + )) + node.Spec.Taints = []v1.Taint{} + ExpectApplied(ctx, env.Client, node) + + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + node = ExpectExists(ctx, env.Client, node) + Expect(node.Spec.Taints).To(HaveLen(0)) + }) +}) diff --git a/pkg/controllers/machine/suite_test.go b/pkg/controllers/machine/suite_test.go index 3b05fb8e96..a2c6b20660 100644 --- a/pkg/controllers/machine/suite_test.go +++ b/pkg/controllers/machine/suite_test.go @@ -23,7 +23,6 @@ import ( . "github.com/onsi/gomega" "github.com/samber/lo" v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/record" clock "k8s.io/utils/clock/testing" @@ -49,6 +48,7 @@ var ctx context.Context var machineController controller.Controller var env *test.Environment var fakeClock *clock.FakeClock +var term *terminator.Terminator var cloudProvider *fake.CloudProvider func TestAPIs(t *testing.T) { @@ -67,778 +67,41 @@ var _ = BeforeSuite(func() { ctx = settings.ToContext(ctx, test.Settings()) cloudProvider = fake.NewCloudProvider() - terminator := terminator.NewTerminator(fakeClock, env.Client, cloudProvider, terminator.NewEvictionQueue(ctx, env.KubernetesInterface.CoreV1(), events.NewRecorder(&record.FakeRecorder{}))) - machineController = machine.NewController(fakeClock, env.Client, cloudProvider, terminator, events.NewRecorder(&record.FakeRecorder{})) + term = terminator.NewTerminator(fakeClock, env.Client, cloudProvider, terminator.NewEvictionQueue(ctx, env.KubernetesInterface.CoreV1(), events.NewRecorder(&record.FakeRecorder{}))) + machineController = machine.NewController(fakeClock, env.Client, cloudProvider, term, events.NewRecorder(&record.FakeRecorder{})) }) var _ = AfterSuite(func() { Expect(env.Stop()).To(Succeed(), "Failed to stop environment") }) -var _ = Describe("Controller", func() { - BeforeEach(func() { - ctx = settings.ToContext(ctx, test.Settings()) - }) - AfterEach(func() { - fakeClock.SetTime(time.Now()) - ExpectCleanedUp(ctx, env.Client) - cloudProvider.Reset() - }) - - Context("Finalizer", func() { - It("should add the finalizer if it doesn't exist", func() { - machine := test.Machine() - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - - machine = ExpectExists(ctx, env.Client, machine) - _, ok := lo.Find(machine.Finalizers, func(f string) bool { - return f == v1alpha5.TerminationFinalizer - }) - Expect(ok).To(BeTrue()) - }) - }) - Context("Launch", func() { - It("should launch an instance when a new Machine is created", func() { - machine := test.Machine() - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - - Expect(cloudProvider.CreateCalls).To(HaveLen(1)) - Expect(cloudProvider.CreatedMachines).To(HaveLen(1)) - _, err := cloudProvider.Get(ctx, machine.Name, "") - Expect(err).ToNot(HaveOccurred()) - }) - It("should get an instance and hydrate the Machine when the Machine is already created", func() { - machine := test.Machine() - cloudProviderMachine := &v1alpha5.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: machine.Name, - Labels: map[string]string{ - v1.LabelInstanceTypeStable: "small-instance-type", - v1.LabelTopologyZone: "test-zone-1a", - v1.LabelTopologyRegion: "test-zone", - v1alpha5.LabelCapacityType: v1alpha5.CapacityTypeSpot, - }, - }, - Status: v1alpha5.MachineStatus{ - ProviderID: test.RandomProviderID(), - Capacity: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("10"), - v1.ResourceMemory: resource.MustParse("100Mi"), - v1.ResourceEphemeralStorage: resource.MustParse("20Gi"), - }, - Allocatable: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("8"), - v1.ResourceMemory: resource.MustParse("80Mi"), - v1.ResourceEphemeralStorage: resource.MustParse("18Gi"), - }, - }, - } - cloudProvider.CreatedMachines[machine.Name] = cloudProviderMachine - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - - machine = ExpectExists(ctx, env.Client, machine) - - Expect(machine.Status.ProviderID).To(Equal(cloudProviderMachine.Status.ProviderID)) - ExpectResources(machine.Status.Capacity, cloudProviderMachine.Status.Capacity) - ExpectResources(machine.Status.Allocatable, cloudProviderMachine.Status.Allocatable) - - Expect(machine.Labels).To(HaveKeyWithValue(v1.LabelInstanceTypeStable, "small-instance-type")) - Expect(machine.Labels).To(HaveKeyWithValue(v1.LabelTopologyZone, "test-zone-1a")) - Expect(machine.Labels).To(HaveKeyWithValue(v1.LabelTopologyRegion, "test-zone")) - Expect(machine.Labels).To(HaveKeyWithValue(v1alpha5.LabelCapacityType, v1alpha5.CapacityTypeSpot)) - }) - It("should add the MachineCreated status condition after creating the Machine", func() { - machine := test.Machine() - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - - machine = ExpectExists(ctx, env.Client, machine) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineCreated).Status).To(Equal(v1.ConditionTrue)) - }) - }) - Context("Registration", func() { - It("should match the Machine to the Node when the Node comes online", func() { - machine := test.Machine() - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - - node := test.Node(test.NodeOptions{ProviderID: machine.Status.ProviderID}) - ExpectApplied(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - - machine = ExpectExists(ctx, env.Client, machine) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) - }) - It("should add the owner reference to the Node when the Node comes online", func() { - machine := test.Machine() - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - - node := test.Node(test.NodeOptions{ProviderID: machine.Status.ProviderID}) - ExpectApplied(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - - node = ExpectExists(ctx, env.Client, node) - ExpectOwnerReferenceExists(node, machine) - }) - It("should sync the labels to the Node when the Node comes online", func() { - machine := test.Machine(v1alpha5.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - "custom-label": "custom-value", - "other-custom-label": "other-custom-value", - }, - }, - }) - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - Expect(machine.Labels).To(HaveKeyWithValue("custom-label", "custom-value")) - Expect(machine.Labels).To(HaveKeyWithValue("other-custom-label", "other-custom-value")) - - node := test.Node(test.NodeOptions{ProviderID: machine.Status.ProviderID}) - ExpectApplied(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - node = ExpectExists(ctx, env.Client, node) - - // Expect Node to have all the labels that the Machine has - for k, v := range machine.Labels { - Expect(node.Labels).To(HaveKeyWithValue(k, v)) - } - }) - It("should sync the annotations to the Node when the Node comes online", func() { - machine := test.Machine(v1alpha5.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Annotations: map[string]string{ - v1alpha5.DoNotConsolidateNodeAnnotationKey: "true", - "my-custom-annotation": "my-custom-value", - }, - }, - }) - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - Expect(machine.Annotations).To(HaveKeyWithValue(v1alpha5.DoNotConsolidateNodeAnnotationKey, "true")) - Expect(machine.Annotations).To(HaveKeyWithValue("my-custom-annotation", "my-custom-value")) - - node := test.Node(test.NodeOptions{ProviderID: machine.Status.ProviderID}) - ExpectApplied(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - node = ExpectExists(ctx, env.Client, node) - - // Expect Node to have all the annotations that the Machine has - for k, v := range machine.Annotations { - Expect(node.Annotations).To(HaveKeyWithValue(k, v)) - } - }) - It("should sync the taints to the Node when the Node comes online", func() { - machine := test.Machine(v1alpha5.Machine{ - Spec: v1alpha5.MachineSpec{ - Taints: []v1.Taint{ - { - Key: "custom-taint", - Effect: v1.TaintEffectNoSchedule, - Value: "custom-value", - }, - { - Key: "other-custom-taint", - Effect: v1.TaintEffectNoExecute, - Value: "other-custom-value", - }, - }, - }, - }) - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - Expect(machine.Spec.Taints).To(ContainElements( - v1.Taint{ - Key: "custom-taint", - Effect: v1.TaintEffectNoSchedule, - Value: "custom-value", - }, - v1.Taint{ - Key: "other-custom-taint", - Effect: v1.TaintEffectNoExecute, - Value: "other-custom-value", - }, - )) - - node := test.Node(test.NodeOptions{ProviderID: machine.Status.ProviderID}) - ExpectApplied(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - node = ExpectExists(ctx, env.Client, node) - - Expect(node.Spec.Taints).To(ContainElements( - v1.Taint{ - Key: "custom-taint", - Effect: v1.TaintEffectNoSchedule, - Value: "custom-value", - }, - v1.Taint{ - Key: "other-custom-taint", - Effect: v1.TaintEffectNoExecute, - Value: "other-custom-value", - }, - )) - }) - It("should sync the startupTaints to the Node when the Node comes online", func() { - machine := test.Machine(v1alpha5.Machine{ - Spec: v1alpha5.MachineSpec{ - Taints: []v1.Taint{ - { - Key: "custom-taint", - Effect: v1.TaintEffectNoSchedule, - Value: "custom-value", - }, - { - Key: "other-custom-taint", - Effect: v1.TaintEffectNoExecute, - Value: "other-custom-value", - }, - }, - StartupTaints: []v1.Taint{ - { - Key: "custom-startup-taint", - Effect: v1.TaintEffectNoSchedule, - Value: "custom-startup-value", - }, - { - Key: "other-custom-startup-taint", - Effect: v1.TaintEffectNoExecute, - Value: "other-custom-startup-value", - }, - }, - }, - }) - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - Expect(machine.Spec.StartupTaints).To(ContainElements( - v1.Taint{ - Key: "custom-startup-taint", - Effect: v1.TaintEffectNoSchedule, - Value: "custom-startup-value", - }, - v1.Taint{ - Key: "other-custom-startup-taint", - Effect: v1.TaintEffectNoExecute, - Value: "other-custom-startup-value", - }, - )) - - node := test.Node(test.NodeOptions{ProviderID: machine.Status.ProviderID}) - ExpectApplied(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - node = ExpectExists(ctx, env.Client, node) - - Expect(node.Spec.Taints).To(ContainElements( - v1.Taint{ - Key: "custom-taint", - Effect: v1.TaintEffectNoSchedule, - Value: "custom-value", - }, - v1.Taint{ - Key: "other-custom-taint", - Effect: v1.TaintEffectNoExecute, - Value: "other-custom-value", - }, - v1.Taint{ - Key: "custom-startup-taint", - Effect: v1.TaintEffectNoSchedule, - Value: "custom-startup-value", - }, - v1.Taint{ - Key: "other-custom-startup-taint", - Effect: v1.TaintEffectNoExecute, - Value: "other-custom-startup-value", - }, - )) - }) - It("should not re-sync the startupTaints to the Node when the startupTaints are removed", func() { - machine := test.Machine(v1alpha5.Machine{ - Spec: v1alpha5.MachineSpec{ - StartupTaints: []v1.Taint{ - { - Key: "custom-startup-taint", - Effect: v1.TaintEffectNoSchedule, - Value: "custom-startup-value", - }, - { - Key: "other-custom-startup-taint", - Effect: v1.TaintEffectNoExecute, - Value: "other-custom-startup-value", - }, - }, - }, - }) - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - - node := test.Node(test.NodeOptions{ProviderID: machine.Status.ProviderID}) - ExpectApplied(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - node = ExpectExists(ctx, env.Client, node) - - Expect(node.Spec.Taints).To(ContainElements( - v1.Taint{ - Key: "custom-startup-taint", - Effect: v1.TaintEffectNoSchedule, - Value: "custom-startup-value", - }, - v1.Taint{ - Key: "other-custom-startup-taint", - Effect: v1.TaintEffectNoExecute, - Value: "other-custom-startup-value", - }, - )) - node.Spec.Taints = []v1.Taint{} - ExpectApplied(ctx, env.Client, node) - - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - node = ExpectExists(ctx, env.Client, node) - Expect(node.Spec.Taints).To(HaveLen(0)) - }) - }) - Context("Initialization", func() { - It("should consider the Machine initialized when all initialization conditions are met", func() { - machine := test.Machine(v1alpha5.Machine{ - Spec: v1alpha5.MachineSpec{ - Resources: v1alpha5.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("2"), - v1.ResourceMemory: resource.MustParse("50Mi"), - v1.ResourcePods: resource.MustParse("5"), - }, - }, - }, - }) - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - - node := test.Node(test.NodeOptions{ - ProviderID: machine.Status.ProviderID, - }) - ExpectApplied(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - - machine = ExpectExists(ctx, env.Client, machine) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineInitialized).Status).To(Equal(v1.ConditionFalse)) - - node = ExpectExists(ctx, env.Client, node) - node.Status.Capacity = v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("10"), - v1.ResourceMemory: resource.MustParse("100Mi"), - v1.ResourcePods: resource.MustParse("110"), - } - node.Status.Allocatable = v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("8"), - v1.ResourceMemory: resource.MustParse("80Mi"), - v1.ResourcePods: resource.MustParse("110"), - } - ExpectApplied(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - - machine = ExpectExists(ctx, env.Client, machine) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineInitialized).Status).To(Equal(v1.ConditionTrue)) - }) - It("should add the initialization label to the node when the Machine is initialized", func() { - machine := test.Machine(v1alpha5.Machine{ - Spec: v1alpha5.MachineSpec{ - Resources: v1alpha5.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("2"), - v1.ResourceMemory: resource.MustParse("50Mi"), - v1.ResourcePods: resource.MustParse("5"), - }, - }, - }, - }) - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - - node := test.Node(test.NodeOptions{ - ProviderID: machine.Status.ProviderID, - Capacity: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("10"), - v1.ResourceMemory: resource.MustParse("100Mi"), - v1.ResourcePods: resource.MustParse("110"), - }, - Allocatable: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("8"), - v1.ResourceMemory: resource.MustParse("80Mi"), - v1.ResourcePods: resource.MustParse("110"), - }, - }) - ExpectApplied(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - - node = ExpectExists(ctx, env.Client, node) - Expect(node.Labels).To(HaveKeyWithValue(v1alpha5.LabelNodeInitialized, "true")) - }) - It("should not consider the Node to be initialized when the status of the Node is NotReady", func() { - machine := test.Machine(v1alpha5.Machine{ - Spec: v1alpha5.MachineSpec{ - Resources: v1alpha5.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("2"), - v1.ResourceMemory: resource.MustParse("50Mi"), - v1.ResourcePods: resource.MustParse("5"), - }, - }, - }, - }) - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - - node := test.Node(test.NodeOptions{ - ProviderID: machine.Status.ProviderID, - Capacity: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("10"), - v1.ResourceMemory: resource.MustParse("100Mi"), - v1.ResourcePods: resource.MustParse("110"), - }, - Allocatable: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("8"), - v1.ResourceMemory: resource.MustParse("80Mi"), - v1.ResourcePods: resource.MustParse("110"), - }, - ReadyStatus: v1.ConditionFalse, - }) - ExpectApplied(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - - machine = ExpectExists(ctx, env.Client, machine) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineInitialized).Status).To(Equal(v1.ConditionFalse)) - }) - It("should not consider the Node to be initialized when all requested resources aren't registered", func() { - machine := test.Machine(v1alpha5.Machine{ - Spec: v1alpha5.MachineSpec{ - Resources: v1alpha5.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("2"), - v1.ResourceMemory: resource.MustParse("50Mi"), - v1.ResourcePods: resource.MustParse("5"), - fake.ResourceGPUVendorA: resource.MustParse("1"), - }, - }, - }, - }) - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - - // Update the machine to add mock the instance type having an extended resource - machine.Status.Capacity[fake.ResourceGPUVendorA] = resource.MustParse("2") - machine.Status.Allocatable[fake.ResourceGPUVendorA] = resource.MustParse("2") - ExpectApplied(ctx, env.Client, machine) - - // Extended resource hasn't registered yet by the daemonset - node := test.Node(test.NodeOptions{ - ProviderID: machine.Status.ProviderID, - Capacity: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("10"), - v1.ResourceMemory: resource.MustParse("100Mi"), - v1.ResourcePods: resource.MustParse("110"), - }, - Allocatable: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("8"), - v1.ResourceMemory: resource.MustParse("80Mi"), - v1.ResourcePods: resource.MustParse("110"), - }, - }) - ExpectApplied(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - - machine = ExpectExists(ctx, env.Client, machine) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineInitialized).Status).To(Equal(v1.ConditionFalse)) - }) - It("should consider the node to be initialized once all the resources are registered", func() { - machine := test.Machine(v1alpha5.Machine{ - Spec: v1alpha5.MachineSpec{ - Resources: v1alpha5.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("2"), - v1.ResourceMemory: resource.MustParse("50Mi"), - v1.ResourcePods: resource.MustParse("5"), - fake.ResourceGPUVendorA: resource.MustParse("1"), - }, - }, - }, - }) - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - - // Update the machine to add mock the instance type having an extended resource - machine.Status.Capacity[fake.ResourceGPUVendorA] = resource.MustParse("2") - machine.Status.Allocatable[fake.ResourceGPUVendorA] = resource.MustParse("2") - ExpectApplied(ctx, env.Client, machine) - - // Extended resource hasn't registered yet by the daemonset - node := test.Node(test.NodeOptions{ - ProviderID: machine.Status.ProviderID, - Capacity: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("10"), - v1.ResourceMemory: resource.MustParse("100Mi"), - v1.ResourcePods: resource.MustParse("110"), - }, - Allocatable: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("8"), - v1.ResourceMemory: resource.MustParse("80Mi"), - v1.ResourcePods: resource.MustParse("110"), - }, - }) - ExpectApplied(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - - machine = ExpectExists(ctx, env.Client, machine) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineInitialized).Status).To(Equal(v1.ConditionFalse)) - - // Node now registers the resource - node = ExpectExists(ctx, env.Client, node) - node.Status.Capacity[fake.ResourceGPUVendorA] = resource.MustParse("2") - node.Status.Allocatable[fake.ResourceGPUVendorA] = resource.MustParse("2") - ExpectApplied(ctx, env.Client, node) - - // Reconcile the machine and the Machine/Node should now be initilized - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineInitialized).Status).To(Equal(v1.ConditionTrue)) - }) - It("should not consider the Node to be initialized when all startupTaints aren't removed", func() { - machine := test.Machine(v1alpha5.Machine{ - Spec: v1alpha5.MachineSpec{ - Resources: v1alpha5.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("2"), - v1.ResourceMemory: resource.MustParse("50Mi"), - v1.ResourcePods: resource.MustParse("5"), - }, - }, - StartupTaints: []v1.Taint{ - { - Key: "custom-startup-taint", - Effect: v1.TaintEffectNoSchedule, - Value: "custom-startup-value", - }, - { - Key: "other-custom-startup-taint", - Effect: v1.TaintEffectNoExecute, - Value: "other-custom-startup-value", - }, - }, - }, - }) - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - - node := test.Node(test.NodeOptions{ - ProviderID: machine.Status.ProviderID, - Capacity: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("10"), - v1.ResourceMemory: resource.MustParse("100Mi"), - v1.ResourcePods: resource.MustParse("110"), - }, - Allocatable: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("8"), - v1.ResourceMemory: resource.MustParse("80Mi"), - v1.ResourcePods: resource.MustParse("110"), - }, - }) - ExpectApplied(ctx, env.Client, node) - - // Should add the startup taints to the node - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - node = ExpectExists(ctx, env.Client, node) - Expect(node.Spec.Taints).To(ContainElements( - v1.Taint{ - Key: "custom-startup-taint", - Effect: v1.TaintEffectNoSchedule, - Value: "custom-startup-value", - }, - v1.Taint{ - Key: "other-custom-startup-taint", - Effect: v1.TaintEffectNoExecute, - Value: "other-custom-startup-value", - }, - )) - - // Shouldn't consider the node ready since the startup taints still exist - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineInitialized).Status).To(Equal(v1.ConditionFalse)) - }) - It("should consider the Node to be initialized once the startup taints are removed", func() { - machine := test.Machine(v1alpha5.Machine{ - Spec: v1alpha5.MachineSpec{ - Resources: v1alpha5.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("2"), - v1.ResourceMemory: resource.MustParse("50Mi"), - v1.ResourcePods: resource.MustParse("5"), - }, - }, - StartupTaints: []v1.Taint{ - { - Key: "custom-startup-taint", - Effect: v1.TaintEffectNoSchedule, - Value: "custom-startup-value", - }, - { - Key: "other-custom-startup-taint", - Effect: v1.TaintEffectNoExecute, - Value: "other-custom-startup-value", - }, - }, - }, - }) - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - - node := test.Node(test.NodeOptions{ - ProviderID: machine.Status.ProviderID, - Capacity: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("10"), - v1.ResourceMemory: resource.MustParse("100Mi"), - v1.ResourcePods: resource.MustParse("110"), - }, - Allocatable: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("8"), - v1.ResourceMemory: resource.MustParse("80Mi"), - v1.ResourcePods: resource.MustParse("110"), - }, - }) - ExpectApplied(ctx, env.Client, node) - - // Shouldn't consider the node ready since the startup taints still exist - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineInitialized).Status).To(Equal(v1.ConditionFalse)) +var _ = AfterEach(func() { + fakeClock.SetTime(time.Now()) + ExpectCleanedUp(ctx, env.Client) + cloudProvider.Reset() +}) - node = ExpectExists(ctx, env.Client, node) - node.Spec.Taints = []v1.Taint{} - ExpectApplied(ctx, env.Client, node) +var _ = Describe("Finalizer", func() { + var provisioner *v1alpha5.Provisioner - // Machine should now be ready since all startup taints are removed - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineRegistered).Status).To(Equal(v1.ConditionTrue)) - Expect(ExpectStatusConditionExists(machine, v1alpha5.MachineInitialized).Status).To(Equal(v1.ConditionTrue)) - }) + BeforeEach(func() { + provisioner = test.Provisioner() }) - Context("Liveness", func() { - It("should delete the Machine when the Node hasn't registered to the Machine past the liveness TTL", func() { - machine := test.Machine(v1alpha5.Machine{ - Spec: v1alpha5.MachineSpec{ - Resources: v1alpha5.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("2"), - v1.ResourceMemory: resource.MustParse("50Mi"), - v1.ResourcePods: resource.MustParse("5"), - fake.ResourceGPUVendorA: resource.MustParse("1"), - }, - }, + It("should add the finalizer if it doesn't exist", func() { + machine := test.Machine(v1alpha5.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + v1alpha5.ProvisionerNameLabelKey: provisioner.Name, }, - }) - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - - // If the node hasn't registered in the liveness timeframe, then we deprovision the Machine - fakeClock.Step(time.Minute * 20) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) // Reconcile again to handle termination flow - ExpectNotFound(ctx, env.Client, machine) - }) - It("should not delete the Machine when the Node hasn't registered to the Machine past the liveness TTL if ttlAfterNotRegistered is disabled", func() { - s := test.Settings() - s.TTLAfterNotRegistered = nil - ctx = settings.ToContext(ctx, s) - machine := test.Machine(v1alpha5.Machine{ - Spec: v1alpha5.MachineSpec{ - Resources: v1alpha5.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("2"), - v1.ResourceMemory: resource.MustParse("50Mi"), - v1.ResourcePods: resource.MustParse("5"), - fake.ResourceGPUVendorA: resource.MustParse("1"), - }, - }, - }, - }) - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - - // If the node hasn't registered in the liveness timeframe, then we deprovision the Machine - fakeClock.Step(time.Minute * 20) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) // Reconcile again to handle termination flow - ExpectExists(ctx, env.Client, machine) + }, }) - }) - Context("Termination", func() { - It("should cordon, drain, and delete the Machine on terminate", func() { - machine := test.Machine(v1alpha5.Machine{ - Spec: v1alpha5.MachineSpec{ - Resources: v1alpha5.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("2"), - v1.ResourceMemory: resource.MustParse("50Mi"), - v1.ResourcePods: resource.MustParse("5"), - fake.ResourceGPUVendorA: resource.MustParse("1"), - }, - }, - }, - }) - ExpectApplied(ctx, env.Client, machine) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - machine = ExpectExists(ctx, env.Client, machine) - - node := test.Node(test.NodeOptions{ - ProviderID: machine.Status.ProviderID, - Capacity: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("10"), - v1.ResourceMemory: resource.MustParse("100Mi"), - v1.ResourcePods: resource.MustParse("110"), - }, - Allocatable: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("8"), - v1.ResourceMemory: resource.MustParse("80Mi"), - v1.ResourcePods: resource.MustParse("110"), - }, - }) - ExpectApplied(ctx, env.Client, node) - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - - Expect(cloudProvider.CreatedMachines).To(HaveLen(1)) - - // Kickoff the deletion flow for the machine - Expect(env.Client.Delete(ctx, machine)).To(Succeed()) + ExpectApplied(ctx, env.Client, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - // Machine should delete and the Node deletion should cascade shortly after - ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) - ExpectNotFound(ctx, env.Client, machine) + machine = ExpectExists(ctx, env.Client, machine) + _, ok := lo.Find(machine.Finalizers, func(f string) bool { + return f == v1alpha5.TerminationFinalizer }) + Expect(ok).To(BeTrue()) }) }) diff --git a/pkg/controllers/machine/termination_test.go b/pkg/controllers/machine/termination_test.go new file mode 100644 index 0000000000..f2dc31e991 --- /dev/null +++ b/pkg/controllers/machine/termination_test.go @@ -0,0 +1,75 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package machine_test + +import ( + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/aws/karpenter-core/pkg/apis/v1alpha5" + "github.com/aws/karpenter-core/pkg/cloudprovider/fake" + "github.com/aws/karpenter-core/pkg/test" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + . "github.com/aws/karpenter-core/pkg/test/expectations" +) + +var _ = Describe("Termination", func() { + It("should cordon, drain, and delete the Machine on terminate", func() { + machine := test.Machine(v1alpha5.Machine{ + Spec: v1alpha5.MachineSpec{ + Resources: v1alpha5.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("50Mi"), + v1.ResourcePods: resource.MustParse("5"), + fake.ResourceGPUVendorA: resource.MustParse("1"), + }, + }, + }, + }) + ExpectApplied(ctx, env.Client, machine) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + machine = ExpectExists(ctx, env.Client, machine) + + node := test.Node(test.NodeOptions{ + ProviderID: machine.Status.ProviderID, + Capacity: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("10"), + v1.ResourceMemory: resource.MustParse("100Mi"), + v1.ResourcePods: resource.MustParse("110"), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("8"), + v1.ResourceMemory: resource.MustParse("80Mi"), + v1.ResourcePods: resource.MustParse("110"), + }, + }) + ExpectApplied(ctx, env.Client, node) + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + + Expect(cloudProvider.CreatedMachines).To(HaveLen(1)) + + // Kickoff the deletion flow for the machine + Expect(env.Client.Delete(ctx, machine)).To(Succeed()) + + // Machine should delete and the Node deletion should cascade shortly after + ExpectReconcileSucceeded(ctx, machineController, client.ObjectKeyFromObject(machine)) + ExpectNotFound(ctx, env.Client, machine) + }) +})