diff --git a/Makefile b/Makefile index 8101d9490b..61ae639771 100644 --- a/Makefile +++ b/Makefile @@ -91,6 +91,7 @@ verify: ## Verify code. Includes codegen, docgen, dependencies, linting, formatt hack/validation/labels.sh hack/validation/resources.sh hack/validation/status.sh + hack/mutation/conversion_webhook_injection.sh hack/dependabot.sh @# Use perl instead of sed due to https://stackoverflow.com/questions/4247068/sed-command-with-i-option-failing-on-mac-but-works-on-linux @# We need to do this "sed replace" until controller-tools fixes this parameterized types issue: https://github.com/kubernetes-sigs/controller-tools/issues/756 diff --git a/hack/mutation/conversion_webhook_injection.sh b/hack/mutation/conversion_webhook_injection.sh new file mode 100755 index 0000000000..4af5c3abde --- /dev/null +++ b/hack/mutation/conversion_webhook_injection.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +# Add the conversion stanza to the CRD spec to enable conversion via webhook +yq eval '.spec.conversion = {"strategy": "Webhook", "webhook": {"conversionReviewVersions": ["v1beta1", "v1"], "clientConfig": {"service": {"name": "karpenter", "namespace": "kube-system", "port": 8443}}}}' -i pkg/apis/crds/karpenter.sh_nodeclaims.yaml +yq eval '.spec.conversion = {"strategy": "Webhook", "webhook": {"conversionReviewVersions": ["v1beta1", "v1"], "clientConfig": {"service": {"name": "karpenter", "namespace": "kube-system", "port": 8443}}}}' -i pkg/apis/crds/karpenter.sh_nodepools.yaml diff --git a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml index d57541c886..862c8299b7 100644 --- a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml +++ b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml @@ -811,3 +811,14 @@ spec: storage: true subresources: status: {} + conversion: + strategy: Webhook + webhook: + conversionReviewVersions: + - v1beta1 + - v1 + clientConfig: + service: + name: karpenter + namespace: kube-system + port: 8443 diff --git a/pkg/apis/crds/karpenter.sh_nodepools.yaml b/pkg/apis/crds/karpenter.sh_nodepools.yaml index a7931c2bd6..46b2fd4dc9 100644 --- a/pkg/apis/crds/karpenter.sh_nodepools.yaml +++ b/pkg/apis/crds/karpenter.sh_nodepools.yaml @@ -1002,3 +1002,14 @@ spec: storage: true subresources: status: {} + conversion: + strategy: Webhook + webhook: + conversionReviewVersions: + - v1beta1 + - v1 + clientConfig: + service: + name: karpenter + namespace: kube-system + port: 8443 diff --git a/pkg/controllers/disruption/consolidation_test.go b/pkg/controllers/disruption/consolidation_test.go index b5c8f9bb58..ab3771551a 100644 --- a/pkg/controllers/disruption/consolidation_test.go +++ b/pkg/controllers/disruption/consolidation_test.go @@ -4136,180 +4136,6 @@ var _ = Describe("Consolidation", func() { ExpectExists(ctx, env.Client, nodes[1]) }) }) - - Context("Timeout", func() { - It("should return the last valid command when multi-nodeclaim consolidation times out", func() { - numNodes := 20 - nodeClaims, nodes := test.NodeClaimsAndNodes(numNodes, v1beta1.NodeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1beta1.NodePoolLabelKey: nodePool.Name, - v1.LabelInstanceTypeStable: leastExpensiveInstance.Name, - v1beta1.CapacityTypeLabelKey: leastExpensiveOffering.Requirements.Get(v1beta1.CapacityTypeLabelKey).Any(), - v1.LabelTopologyZone: leastExpensiveOffering.Requirements.Get(v1.LabelTopologyZone).Any(), - }, - }, - Status: v1beta1.NodeClaimStatus{ - Allocatable: map[v1.ResourceName]resource.Quantity{ - v1.ResourceCPU: resource.MustParse("32"), - v1.ResourcePods: resource.MustParse("100"), - }, - }}, - ) - // create our RS so we can link a pod to it - rs := test.ReplicaSet() - ExpectApplied(ctx, env.Client, rs) - pods := test.Pods(numNodes, test.PodOptions{ - ObjectMeta: metav1.ObjectMeta{Labels: labels, - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: "apps/v1", - Kind: "ReplicaSet", - Name: rs.Name, - UID: rs.UID, - Controller: lo.ToPtr(true), - BlockOwnerDeletion: lo.ToPtr(true), - }, - }}, - ResourceRequirements: v1.ResourceRequirements{ - Requests: v1.ResourceList{ - // Make the resource requests small so that many nodes can be consolidated at once. - v1.ResourceCPU: resource.MustParse("10m"), - }, - }, - }) - - ExpectApplied(ctx, env.Client, rs, nodePool) - for _, nodeClaim := range nodeClaims { - ExpectApplied(ctx, env.Client, nodeClaim) - } - for _, node := range nodes { - ExpectApplied(ctx, env.Client, node) - } - for i, pod := range pods { - ExpectApplied(ctx, env.Client, pod) - ExpectManualBinding(ctx, env.Client, pod, nodes[i]) - } - - // inform cluster state about nodes and nodeClaims - ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, nodes, nodeClaims) - - var wg sync.WaitGroup - wg.Add(1) - finished := atomic.Bool{} - go func() { - defer GinkgoRecover() - defer wg.Done() - defer finished.Store(true) - ExpectReconcileSucceeded(ctx, disruptionController, client.ObjectKey{}) - }() - - // advance the clock so that the timeout expires - fakeClock.Step(disruption.MultiNodeConsolidationTimeoutDuration) - - // wait for the controller to block on the validation timeout - Eventually(fakeClock.HasWaiters, time.Second*10).Should(BeTrue()) - - ExpectTriggerVerifyAction(&wg) - - // controller should be blocking during the timeout - Expect(finished.Load()).To(BeFalse()) - - // and the node should not be deleted yet - for i := range nodeClaims { - ExpectExists(ctx, env.Client, nodeClaims[i]) - } - - // controller should finish - Eventually(finished.Load, 10*time.Second).Should(BeTrue()) - wg.Wait() - - ExpectReconcileSucceeded(ctx, queue, types.NamespacedName{}) - - // should have at least two nodes deleted from multi nodeClaim consolidation - Expect(len(ExpectNodeClaims(ctx, env.Client))).To(BeNumerically("<=", numNodes-2)) - }) - It("should exit single-nodeclaim consolidation if it times out", func() { - numNodes := 25 - nodeClaims, nodes := test.NodeClaimsAndNodes(numNodes, v1beta1.NodeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - v1beta1.NodePoolLabelKey: nodePool.Name, - v1.LabelInstanceTypeStable: leastExpensiveInstance.Name, - v1beta1.CapacityTypeLabelKey: leastExpensiveOffering.Requirements.Get(v1beta1.CapacityTypeLabelKey).Any(), - v1.LabelTopologyZone: leastExpensiveOffering.Requirements.Get(v1.LabelTopologyZone).Any(), - }, - }, - Status: v1beta1.NodeClaimStatus{ - Allocatable: map[v1.ResourceName]resource.Quantity{ - v1.ResourceCPU: resource.MustParse("32"), - v1.ResourcePods: resource.MustParse("100"), - }, - }}, - ) - // create our RS so we can link a pod to it - rs := test.ReplicaSet() - ExpectApplied(ctx, env.Client, rs) - pods := test.Pods(numNodes, test.PodOptions{ - ObjectMeta: metav1.ObjectMeta{Labels: labels, - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: "apps/v1", - Kind: "ReplicaSet", - Name: rs.Name, - UID: rs.UID, - Controller: lo.ToPtr(true), - BlockOwnerDeletion: lo.ToPtr(true), - }, - }}, - ResourceRequirements: v1.ResourceRequirements{ - Requests: v1.ResourceList{ - // Make the pods more than half of the allocatable so that only one nodeclaim can be done at any time - v1.ResourceCPU: resource.MustParse("20"), - }, - }, - }) - - ExpectApplied(ctx, env.Client, rs, nodePool) - for _, nodeClaim := range nodeClaims { - ExpectApplied(ctx, env.Client, nodeClaim) - } - for _, node := range nodes { - ExpectApplied(ctx, env.Client, node) - } - for i, pod := range pods { - ExpectApplied(ctx, env.Client, pod) - ExpectManualBinding(ctx, env.Client, pod, nodes[i]) - } - - // inform cluster state about nodes and nodeClaims - ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, nodes, nodeClaims) - - var wg sync.WaitGroup - wg.Add(1) - finished := atomic.Bool{} - go func() { - defer GinkgoRecover() - defer wg.Done() - defer finished.Store(true) - ExpectReconcileSucceeded(ctx, disruptionController, client.ObjectKey{}) - }() - - // advance the clock so that the timeout expires for multi-nodeClaim - fakeClock.Step(disruption.MultiNodeConsolidationTimeoutDuration) - // advance the clock so that the timeout expires for single-nodeClaim - fakeClock.Step(disruption.SingleNodeConsolidationTimeoutDuration) - - ExpectTriggerVerifyAction(&wg) - - // controller should finish - Eventually(finished.Load, 10*time.Second).Should(BeTrue()) - wg.Wait() - - // should have no nodeClaims deleted from single nodeClaim consolidation - Expect(ExpectNodeClaims(ctx, env.Client)).To(HaveLen(numNodes)) - }) - }) Context("Multi-NodeClaim", func() { var nodeClaims, spotNodeClaims []*v1beta1.NodeClaim var nodes, spotNodes []*v1.Node diff --git a/pkg/operator/options/options.go b/pkg/operator/options/options.go index 81168d66e5..ae11851977 100644 --- a/pkg/operator/options/options.go +++ b/pkg/operator/options/options.go @@ -83,7 +83,7 @@ func (fs *FlagSet) BoolVarWithEnv(p *bool, name string, envVar string, val bool, func (o *Options) AddFlags(fs *FlagSet) { fs.StringVar(&o.ServiceName, "karpenter-service", env.WithDefaultString("KARPENTER_SERVICE", ""), "The Karpenter Service name for the dynamic webhook certificate") - fs.BoolVarWithEnv(&o.DisableWebhook, "disable-webhook", "DISABLE_WEBHOOK", true, "Disable the admission and validation webhooks") + fs.BoolVarWithEnv(&o.DisableWebhook, "disable-webhook", "DISABLE_WEBHOOK", false, "Disable the admission and validation webhooks") fs.IntVar(&o.WebhookPort, "webhook-port", env.WithDefaultInt("WEBHOOK_PORT", 8443), "The port the webhook endpoint binds to for validation and mutation of resources") fs.IntVar(&o.MetricsPort, "metrics-port", env.WithDefaultInt("METRICS_PORT", 8000), "The port the metric endpoint binds to for operating metrics about the controller itself") fs.IntVar(&o.WebhookMetricsPort, "webhook-metrics-port", env.WithDefaultInt("WEBHOOK_METRICS_PORT", 8001), "The port the webhook metric endpoing binds to for operating metrics about the webhook") diff --git a/pkg/operator/options/suite_test.go b/pkg/operator/options/suite_test.go index ef9307fb8e..0a530de9c2 100644 --- a/pkg/operator/options/suite_test.go +++ b/pkg/operator/options/suite_test.go @@ -97,7 +97,7 @@ var _ = Describe("Options", func() { Expect(err).To(BeNil()) expectOptionsMatch(opts, test.Options(test.OptionsFields{ ServiceName: lo.ToPtr(""), - DisableWebhook: lo.ToPtr(true), + DisableWebhook: lo.ToPtr(false), WebhookPort: lo.ToPtr(8443), MetricsPort: lo.ToPtr(8000), WebhookMetricsPort: lo.ToPtr(8001), @@ -255,7 +255,7 @@ var _ = Describe("Options", func() { Entry("explicit true", "--disable-webhook=true", true), Entry("explicit false", "--disable-webhook=false", false), Entry("implicit true", "--disable-webhook", true), - Entry("implicit true", "", true), + Entry("implicit false", "", false), ) })