From 0e99651ea5986b2e74f90932f0a1f65b25272ee6 Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Fri, 29 Jan 2021 16:22:08 +0100 Subject: [PATCH 01/19] [Autoscaling] Add Elasticsearch autoscaling controller --- cmd/manager/main.go | 2 + pkg/controller/autoscaling/elasticsearch.go | 36 ++ .../elasticsearch/autoscaler/autoscaler.go | 67 ++++ .../autoscaler/autoscaler_test.go | 261 +++++++++++++ .../elasticsearch/autoscaler/context.go | 27 ++ .../elasticsearch/autoscaler/horizontal.go | 115 ++++++ .../elasticsearch/autoscaler/linear_scaler.go | 76 ++++ .../autoscaler/linear_scaler_test.go | 142 +++++++ .../elasticsearch/autoscaler/nodesets.go | 49 +++ .../elasticsearch/autoscaler/nodesets_test.go | 67 ++++ .../elasticsearch/autoscaler/offline.go | 129 ++++++ .../elasticsearch/autoscaler/offline_test.go | 88 +++++ .../elasticsearch/autoscaler/vertical.go | 166 ++++++++ .../autoscaling/elasticsearch/controller.go | 228 +++++++++++ .../elasticsearch/controller_test.go | 362 +++++++++++++++++ .../autoscaling/elasticsearch/driver.go | 254 ++++++++++++ .../autoscaling/elasticsearch/policy.go | 39 ++ .../autoscaling/elasticsearch/reconcile.go | 151 ++++++++ .../elasticsearch/resources/resources.go | 291 ++++++++++++++ .../elasticsearch/resources/resources_test.go | 366 ++++++++++++++++++ .../elasticsearch/status/actual.go | 169 ++++++++ .../elasticsearch/status/actual_test.go | 284 ++++++++++++++ .../elasticsearch/status/events.go | 29 ++ .../elasticsearch/status/status.go | 212 ++++++++++ .../elasticsearch-expected.yml | 130 +++++++ .../cluster-creation/elasticsearch.yml | 80 ++++ .../capacity.json | 91 +++++ .../elasticsearch-expected.yml | 75 ++++ .../elasticsearch.yml | 75 ++++ .../max-storage-reached/capacity.json | 148 +++++++ .../elasticsearch-expected.yml | 130 +++++++ .../max-storage-reached/elasticsearch.yml | 130 +++++++ .../elasticsearch-expected.yml | 75 ++++ .../elasticsearch.yml | 78 ++++ .../elasticsearch/testdata/ml/capacity.json | 64 +++ .../testdata/ml/elasticsearch-expected.yml | 57 +++ .../testdata/ml/elasticsearch.yml | 57 +++ .../storage-scaled-horizontally/capacity.json | 148 +++++++ .../elasticsearch-expected.yml | 75 ++++ .../elasticsearch.yml | 75 ++++ pkg/controller/common/license/check.go | 5 + .../elasticsearch/driver/autoscaling.go | 61 +++ pkg/controller/elasticsearch/driver/nodes.go | 7 + .../elasticsearch/volume/defaults.go | 4 +- 44 files changed, 5174 insertions(+), 1 deletion(-) create mode 100644 pkg/controller/autoscaling/elasticsearch.go create mode 100644 pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler.go create mode 100644 pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler_test.go create mode 100644 pkg/controller/autoscaling/elasticsearch/autoscaler/context.go create mode 100644 pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go create mode 100644 pkg/controller/autoscaling/elasticsearch/autoscaler/linear_scaler.go create mode 100644 pkg/controller/autoscaling/elasticsearch/autoscaler/linear_scaler_test.go create mode 100644 pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets.go create mode 100644 pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets_test.go create mode 100644 pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go create mode 100644 pkg/controller/autoscaling/elasticsearch/autoscaler/offline_test.go create mode 100644 pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go create mode 100644 pkg/controller/autoscaling/elasticsearch/controller.go create mode 100644 pkg/controller/autoscaling/elasticsearch/controller_test.go create mode 100644 pkg/controller/autoscaling/elasticsearch/driver.go create mode 100644 pkg/controller/autoscaling/elasticsearch/policy.go create mode 100644 pkg/controller/autoscaling/elasticsearch/reconcile.go create mode 100644 pkg/controller/autoscaling/elasticsearch/resources/resources.go create mode 100644 pkg/controller/autoscaling/elasticsearch/resources/resources_test.go create mode 100644 pkg/controller/autoscaling/elasticsearch/status/actual.go create mode 100644 pkg/controller/autoscaling/elasticsearch/status/actual_test.go create mode 100644 pkg/controller/autoscaling/elasticsearch/status/events.go create mode 100644 pkg/controller/autoscaling/elasticsearch/status/status.go create mode 100644 pkg/controller/autoscaling/elasticsearch/testdata/cluster-creation/elasticsearch-expected.yml create mode 100644 pkg/controller/autoscaling/elasticsearch/testdata/cluster-creation/elasticsearch.yml create mode 100644 pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/capacity.json create mode 100644 pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/elasticsearch-expected.yml create mode 100644 pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/elasticsearch.yml create mode 100644 pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/capacity.json create mode 100644 pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/elasticsearch-expected.yml create mode 100644 pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/elasticsearch.yml create mode 100644 pkg/controller/autoscaling/elasticsearch/testdata/min-nodes-increased-by-user/elasticsearch-expected.yml create mode 100644 pkg/controller/autoscaling/elasticsearch/testdata/min-nodes-increased-by-user/elasticsearch.yml create mode 100644 pkg/controller/autoscaling/elasticsearch/testdata/ml/capacity.json create mode 100644 pkg/controller/autoscaling/elasticsearch/testdata/ml/elasticsearch-expected.yml create mode 100644 pkg/controller/autoscaling/elasticsearch/testdata/ml/elasticsearch.yml create mode 100644 pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/capacity.json create mode 100644 pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch-expected.yml create mode 100644 pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch.yml create mode 100644 pkg/controller/elasticsearch/driver/autoscaling.go diff --git a/cmd/manager/main.go b/cmd/manager/main.go index 02ec33e502..043fa6d712 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -29,6 +29,7 @@ import ( "github.com/elastic/cloud-on-k8s/pkg/controller/apmserver" "github.com/elastic/cloud-on-k8s/pkg/controller/association" associationctl "github.com/elastic/cloud-on-k8s/pkg/controller/association/controller" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling" "github.com/elastic/cloud-on-k8s/pkg/controller/beat" "github.com/elastic/cloud-on-k8s/pkg/controller/common/certificates" "github.com/elastic/cloud-on-k8s/pkg/controller/common/container" @@ -643,6 +644,7 @@ func registerControllers(mgr manager.Manager, params operator.Parameters, access }{ {name: "APMServer", registerFunc: apmserver.Add}, {name: "Elasticsearch", registerFunc: elasticsearch.Add}, + {name: "ElasticsearchAutoscaling", registerFunc: autoscaling.Add}, {name: "Kibana", registerFunc: kibana.Add}, {name: "EnterpriseSearch", registerFunc: enterprisesearch.Add}, {name: "Beats", registerFunc: beat.Add}, diff --git a/pkg/controller/autoscaling/elasticsearch.go b/pkg/controller/autoscaling/elasticsearch.go new file mode 100644 index 0000000000..39d3d9438f --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch.go @@ -0,0 +1,36 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package autoscaling + +import ( + esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch" + "github.com/elastic/cloud-on-k8s/pkg/controller/common" + "github.com/elastic/cloud-on-k8s/pkg/controller/common/operator" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/source" +) + +const ( + controllerName = "elasticsearch-autoscaling" +) + +// Add creates a new Elasticsearch autoscaling controller and adds it to the Manager with default RBAC. +// The Manager will set fields on the Controller and Start it when the Manager is Started. +func Add(mgr manager.Manager, p operator.Parameters) error { + r := elasticsearch.NewReconciler(mgr, p) + c, err := common.NewController(mgr, controllerName, r, p) + if err != nil { + return err + } + // Watch for changes on Elasticsearch clusters. + if err := c.Watch( + &source.Kind{Type: &esv1.Elasticsearch{}}, &handler.EnqueueRequestForObject{}, + ); err != nil { + return err + } + return nil +} diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler.go new file mode 100644 index 0000000000..ca334ac143 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler.go @@ -0,0 +1,67 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package autoscaler + +import ( + esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/resources" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/status" + "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/volume" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +// GetResources calculates the resources required by all the NodeSets managed by a same autoscaling policy. +func (ctx *Context) GetResources() resources.NodeSetsResources { + // 1. Scale vertically, calculating the resources for each node managed by the autoscaling policy in the context. + desiredNodeResources := ctx.scaleVertically() + ctx.Log.Info( + "Vertical autoscaler", + "state", "online", + "policy", ctx.AutoscalingSpec.Name, + "scope", "node", + "nodesets", ctx.NodeSets.Names(), + "resources", desiredNodeResources.ToInt64(), + "required_capacity", ctx.RequiredCapacity, + ) + + // 2. Scale horizontally by adding nodes to meet the resource requirements. + return ctx.scaleHorizontally(desiredNodeResources) +} + +// scaleVertically calculates the desired resources for all the nodes managed a same autoscaling policy, given the requested +// capacity returned by the Elasticsearch autoscaling API and the AutoscalingSpec specified by the user. +// It attempts to scale all the resources vertically until the required resources are provided or the limits set by the user are reached. +func (ctx *Context) scaleVertically() resources.NodeResources { + // All resources can be computed "from scratch", without knowing the previous values. + // This is however not true for storage. Storage can't be scaled down, current storage capacity must be considered + // as an hard min. limit. This storage limit must be taken into consideration when computing the desired resources. + currentStorage := getStorage(ctx.AutoscalingSpec, ctx.ActualAutoscalingStatus) + return ctx.nodeResources( + int64(ctx.AutoscalingSpec.NodeCount.Min), + currentStorage, + ) +} + +// getStorage returns the min. storage capacity that should be used by the autoscaling algorithm. +// The value is the max. value of either: +// * the current value in the status +// * the min. value set by the user in the autoscaling spec. +func getStorage(autoscalingSpec esv1.AutoscalingPolicySpec, actualAutoscalingStatus status.Status) resource.Quantity { + // If no storage spec is defined in the autoscaling status we return the default volume size. + storage := volume.DefaultPersistentVolumeSize.DeepCopy() + // Always adjust to the min value specified by the user in the limits. + if autoscalingSpec.IsStorageDefined() { + storage = autoscalingSpec.Storage.Min + } + // If a storage value is stored in the status then reuse it. + if actualResources, exists := actualAutoscalingStatus.GetNamedTierResources(autoscalingSpec.Name); exists && actualResources.HasRequest(corev1.ResourceStorage) { + storageInStatus := actualResources.GetRequest(corev1.ResourceStorage) + if storageInStatus.Cmp(storage) > 0 { + storage = storageInStatus + } + } + return storage +} diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler_test.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler_test.go new file mode 100644 index 0000000000..df99bb9fb6 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler_test.go @@ -0,0 +1,261 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package autoscaler + +import ( + "testing" + + esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/resources" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/status" + "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/client" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/equality" + "k8s.io/apimachinery/pkg/api/resource" +) + +func Test_applyScaleDecision(t *testing.T) { + defaultNodeSets := esv1.NodeSetList{{ + Name: "default", + }} + type args struct { + currentNodeSets esv1.NodeSetList + nodeSetsStatus status.Status + requiredCapacity client.AutoscalingCapacityInfo + policy esv1.AutoscalingPolicySpec + } + tests := []struct { + name string + args args + want resources.NodeSetsResources + wantErr bool + }{ + { + name: "Scale both vertically and horizontally to fulfil storage capacity request", + args: args{ + currentNodeSets: defaultNodeSets, + nodeSetsStatus: status.Status{AutoscalingPolicyStatuses: []status.AutoscalingPolicyStatus{{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "default", NodeCount: 3}}, + ResourcesSpecification: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("3G"), corev1.ResourceStorage: q("1Gi")}}}}, + }, + requiredCapacity: newRequiredCapacityBuilder(). + nodeMemory("3Gi").nodeStorage("8Gi"). + tierMemory("9Gi").tierStorage("50Gi"). + build(), + policy: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithNodeCounts(3, 6).WithMemory("3Gi", "4Gi").WithStorage("5Gi", "10Gi").Build(), + }, + want: resources.NodeSetsResources{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "default", NodeCount: 5}}, + NodeResources: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("3Gi"), corev1.ResourceStorage: q("10Gi")}}, + }, + }, + { + name: "Scale existing nodes vertically", + args: args{ + currentNodeSets: defaultNodeSets, + nodeSetsStatus: status.Status{AutoscalingPolicyStatuses: []status.AutoscalingPolicyStatus{{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "default", NodeCount: 3}}, + ResourcesSpecification: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("3G"), corev1.ResourceStorage: q("1Gi")}}}}, + }, + requiredCapacity: newRequiredCapacityBuilder(). + nodeMemory("6G"). + tierMemory("15G"). + build(), + policy: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithNodeCounts(3, 6).WithMemory("5G", "8G").Build(), + }, + want: resources.NodeSetsResources{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "default", NodeCount: 3}}, + NodeResources: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("6Gi")}}, + }, + }, + { + name: "Do not scale down storage capacity", + args: args{ + currentNodeSets: defaultNodeSets, + nodeSetsStatus: status.Status{AutoscalingPolicyStatuses: []status.AutoscalingPolicyStatus{{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "default", NodeCount: 3}}, + ResourcesSpecification: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("4G"), corev1.ResourceStorage: q("10G")}}}}, + }, + requiredCapacity: newRequiredCapacityBuilder(). + nodeMemory("6G"). + tierMemory("15G"). + nodeStorage("1Gi"). + tierStorage("3Gi"). + build(), + policy: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithNodeCounts(3, 6).WithMemory("5G", "8G").WithStorage("1G", "20G").Build(), + }, + want: resources.NodeSetsResources{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "default", NodeCount: 3}}, + NodeResources: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("6Gi"), corev1.ResourceStorage: q("10G")}}, + }, + }, + { + name: "Scale existing nodes vertically up to the tier limit", + args: args{ + currentNodeSets: defaultNodeSets, + nodeSetsStatus: status.Status{AutoscalingPolicyStatuses: []status.AutoscalingPolicyStatus{{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "default", NodeCount: 3}}, + ResourcesSpecification: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("4G"), corev1.ResourceStorage: q("1Gi")}}}}, + }, + requiredCapacity: newRequiredCapacityBuilder(). + nodeMemory("6G"). + tierMemory("21G"). + build(), + policy: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithNodeCounts(3, 6).WithMemory("5G", "8G").Build(), + }, + want: resources.NodeSetsResources{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "default", NodeCount: 3}}, + NodeResources: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("7Gi")}}, + }, + }, + { + name: "Scale both vertically and horizontally", + args: args{ + currentNodeSets: defaultNodeSets, + nodeSetsStatus: status.Status{AutoscalingPolicyStatuses: []status.AutoscalingPolicyStatus{{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "default", NodeCount: 3}}, + ResourcesSpecification: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("4G"), corev1.ResourceStorage: q("1Gi")}}}}, + }, + requiredCapacity: newRequiredCapacityBuilder(). + nodeMemory("6G"). + tierMemory("48G"). + build(), + policy: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithNodeCounts(3, 6).WithMemory("5G", "8G").Build(), + }, + want: resources.NodeSetsResources{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "default", NodeCount: 6}}, + NodeResources: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("8G")}}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := Context{ + Log: logTest, + AutoscalingSpec: tt.args.policy, + NodeSets: tt.args.currentNodeSets, + ActualAutoscalingStatus: tt.args.nodeSetsStatus, + RequiredCapacity: tt.args.requiredCapacity, + StatusBuilder: status.NewAutoscalingStatusBuilder(), + } + if got := ctx.GetResources(); !equality.Semantic.DeepEqual(got, tt.want) { + t.Errorf("autoscaler.GetResources() = %v, want %v", got, tt.want) + } + }) + } +} + +// - AutoscalingSpec builder + +type AutoscalingSpecBuilder struct { + name string + nodeCountMin, nodeCountMax int32 + cpu, memory, storage *esv1.QuantityRange +} + +func NewAutoscalingSpecBuilder(name string) *AutoscalingSpecBuilder { + return &AutoscalingSpecBuilder{name: name} +} + +func (asb *AutoscalingSpecBuilder) WithNodeCounts(min, max int) *AutoscalingSpecBuilder { + asb.nodeCountMin = int32(min) + asb.nodeCountMax = int32(max) + return asb +} + +func (asb *AutoscalingSpecBuilder) WithMemory(min, max string) *AutoscalingSpecBuilder { + asb.memory = &esv1.QuantityRange{ + Min: resource.MustParse(min), + Max: resource.MustParse(max), + } + return asb +} + +func (asb *AutoscalingSpecBuilder) WithStorage(min, max string) *AutoscalingSpecBuilder { + asb.storage = &esv1.QuantityRange{ + Min: resource.MustParse(min), + Max: resource.MustParse(max), + } + return asb +} + +func (asb *AutoscalingSpecBuilder) WithCPU(min, max string) *AutoscalingSpecBuilder { + asb.cpu = &esv1.QuantityRange{ + Min: resource.MustParse(min), + Max: resource.MustParse(max), + } + return asb +} + +func (asb *AutoscalingSpecBuilder) Build() esv1.AutoscalingPolicySpec { + return esv1.AutoscalingPolicySpec{ + NamedAutoscalingPolicy: esv1.NamedAutoscalingPolicy{ + Name: asb.name, + }, + AutoscalingResources: esv1.AutoscalingResources{ + CPU: asb.cpu, + Memory: asb.memory, + Storage: asb.storage, + NodeCount: esv1.CountRange{ + Min: asb.nodeCountMin, + Max: asb.nodeCountMax, + }, + }, + } +} + +// - PolicyCapacityInfo builder + +type requiredCapacityBuilder struct { + client.AutoscalingCapacityInfo +} + +func newRequiredCapacityBuilder() *requiredCapacityBuilder { + return &requiredCapacityBuilder{} +} + +func ptr(q int64) *client.AutoscalingCapacity { + v := client.AutoscalingCapacity(q) + return &v +} + +func (rcb *requiredCapacityBuilder) build() client.AutoscalingCapacityInfo { + return rcb.AutoscalingCapacityInfo +} + +func (rcb *requiredCapacityBuilder) nodeMemory(m string) *requiredCapacityBuilder { + rcb.Node.Memory = ptr(value(m)) + return rcb +} + +func (rcb *requiredCapacityBuilder) tierMemory(m string) *requiredCapacityBuilder { + rcb.Total.Memory = ptr(value(m)) + return rcb +} + +func (rcb *requiredCapacityBuilder) nodeStorage(m string) *requiredCapacityBuilder { + rcb.Node.Storage = ptr(value(m)) + return rcb +} + +func (rcb *requiredCapacityBuilder) tierStorage(m string) *requiredCapacityBuilder { + rcb.Total.Storage = ptr(value(m)) + return rcb +} + +func value(v string) int64 { + q := resource.MustParse(v) + return q.Value() +} diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/context.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/context.go new file mode 100644 index 0000000000..042454d2cc --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/context.go @@ -0,0 +1,27 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package autoscaler + +import ( + esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/status" + "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/client" + "github.com/go-logr/logr" +) + +// Context contains the required objects used by the autoscaler functions. +type Context struct { + Log logr.Logger + // AutoscalingSpec is the autoscaling specification as provided by the user. + AutoscalingSpec esv1.AutoscalingPolicySpec + // NodeSets is the list of the NodeSets managed by the autoscaling specification. + NodeSets esv1.NodeSetList + // ActualAutoscalingStatus is the current resources status as stored in the Elasticsearch resource. + ActualAutoscalingStatus status.Status + // RequiredCapacity contains the Elasticsearch Autoscaling API result. + RequiredCapacity client.AutoscalingCapacityInfo + // StatusBuilder is used to track any event that should be surfaced to the user. + StatusBuilder *status.AutoscalingStatusBuilder +} diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go new file mode 100644 index 0000000000..0355a32062 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go @@ -0,0 +1,115 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package autoscaler + +import ( + "fmt" + + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/resources" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/status" + corev1 "k8s.io/api/core/v1" +) + +// scaleHorizontally adds or removes nodes in a set of node sets to provide the required capacity in a tier. +func (ctx *Context) scaleHorizontally( + nodeCapacity resources.NodeResources, // resources for each node in the tier/policy, as computed by the vertical autoscaler. +) resources.NodeSetsResources { + minNodes := int(ctx.AutoscalingSpec.NodeCount.Min) + maxNodes := int(ctx.AutoscalingSpec.NodeCount.Max) + totalRequiredCapacity := ctx.RequiredCapacity.Total // total required resources, at the tier level. + nodeToAdd := 0 + + // Scale horizontally to match memory requirements + if !totalRequiredCapacity.Memory.IsZero() { + nodeMemory := nodeCapacity.GetRequest(corev1.ResourceMemory) + nodeToAdd = ctx.getNodesToAdd(nodeMemory.Value(), totalRequiredCapacity.Memory.Value(), minNodes, maxNodes, string(corev1.ResourceMemory)) + } + + // Scale horizontally to match storage requirements + if !totalRequiredCapacity.Storage.IsZero() { + nodeStorage := nodeCapacity.GetRequest(corev1.ResourceStorage) + nodeToAdd = max(nodeToAdd, ctx.getNodesToAdd(nodeStorage.Value(), totalRequiredCapacity.Storage.Value(), minNodes, maxNodes, string(corev1.ResourceStorage))) + } + + totalNodes := nodeToAdd + minNodes + ctx.Log.Info("Horizontal autoscaler", "policy", ctx.AutoscalingSpec.Name, + "scope", "tier", + "count", totalNodes, + "required_capacity", totalRequiredCapacity, + ) + + nodeSetsResources := resources.NewNodeSetsResources(ctx.AutoscalingSpec.Name, ctx.NodeSets.Names()) + nodeSetsResources.NodeResources = nodeCapacity + fnm := NewFairNodesManager(ctx.Log, nodeSetsResources.NodeSetNodeCount) + for totalNodes > 0 { + fnm.AddNode() + totalNodes-- + } + + return nodeSetsResources +} + +// getNodesToAdd calculates the number of nodes to add in order to comply with the capacity requested by Elasticsearch. +func (ctx *Context) getNodesToAdd( + nodeResourceCapacity int64, // resource capacity of a single node, for example the memory of a node in the tier + totalRequiredCapacity int64, // required capacity at the tier level + minNodes, maxNodes int, // min and max number of nodes in this tier, as specified by the user the autoscaling spec. + resourceName string, // used for logging and in events +) int { + // minResourceQuantity is the resource quantity in the tier before scaling horizontally. + minResourceQuantity := int64(minNodes) * nodeResourceCapacity + // resourceDelta holds the resource needed to comply with what is requested by Elasticsearch. + resourceDelta := totalRequiredCapacity - minResourceQuantity + // getNodeDelta translates resourceDelta into a number of nodes. + nodeToAdd := getNodeDelta(resourceDelta, nodeResourceCapacity) + + if minNodes+nodeToAdd > maxNodes { + // We would need to exceed the node count limit to fulfil the resource requirement. + ctx.Log.Info( + fmt.Sprintf("Can't provide total required %s", resourceName), + "policy", ctx.AutoscalingSpec.Name, + "scope", "tier", + "resource", resourceName, + "node_value", nodeResourceCapacity, + "requested_value", totalRequiredCapacity, + "requested_count", minNodes+nodeToAdd, + "max_count", maxNodes, + ) + + // Also surface this situation in the status. + ctx.StatusBuilder. + ForPolicy(ctx.AutoscalingSpec.Name). + WithEvent( + status.HorizontalScalingLimitReached, + fmt.Sprintf("Can't provide total required %s %d, max number of nodes is %d, requires %d nodes", resourceName, totalRequiredCapacity, maxNodes, minNodes+nodeToAdd), + ) + // Adjust the number of nodes to be added to comply with the limit specified by the user. + nodeToAdd = maxNodes - minNodes + } + return nodeToAdd +} + +// getNodeDelta computes the nodes to be added given a delta (the additional amount of resource needed) +// and the individual capacity a single node. +func getNodeDelta(delta, nodeCapacity int64) int { + nodeToAdd := 0 + if delta < 0 { + return 0 + } + + for delta > 0 { + delta -= nodeCapacity + // Compute how many nodes should be added + nodeToAdd++ + } + return nodeToAdd +} + +func max(a, b int) int { + if a > b { + return a + } + return b +} diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/linear_scaler.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/linear_scaler.go new file mode 100644 index 0000000000..4bf9c94422 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/linear_scaler.go @@ -0,0 +1,76 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package autoscaler + +import ( + "fmt" + + esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +// cpuFromMemory computes a CPU quantity within the specified allowed range by the user proportionally +// to the amount of memory requested by the autoscaling API. +func cpuFromMemory(requiredMemoryCapacity resource.Quantity, memoryRange, cpuRange esv1.QuantityRange) resource.Quantity { + allowedMemoryRange := memoryRange.Max.Value() - memoryRange.Min.Value() + if allowedMemoryRange == 0 { + // Can't scale CPU as min and max for memory are equal + return cpuRange.Min.DeepCopy() + } + memRatio := float64(requiredMemoryCapacity.Value()-memoryRange.Min.Value()) / float64(allowedMemoryRange) + + // memory is at its lowest value, return the min value for CPU + if memRatio == 0 { + return cpuRange.Min.DeepCopy() + } + // memory is at its max value, return the max value for CPU + if memRatio == 1 { + return cpuRange.Max.DeepCopy() + } + + allowedCPURange := float64(cpuRange.Max.MilliValue() - cpuRange.Min.MilliValue()) + requiredAdditionalCPUCapacity := int64(allowedCPURange * memRatio) + requiredCPUCapacityAsMilli := cpuRange.Min.MilliValue() + requiredAdditionalCPUCapacity + + // Round up memory to the next core + requiredCPUCapacityAsMilli = roundUp(requiredCPUCapacityAsMilli, 1000) + requiredCPUCapacity := resource.NewQuantity(requiredCPUCapacityAsMilli/1000, resource.DecimalSI).DeepCopy() + if requiredCPUCapacity.Cmp(cpuRange.Max) > 0 { + requiredCPUCapacity = cpuRange.Max.DeepCopy() + } + return requiredCPUCapacity +} + +// memoryFromStorage computes a memory quantity within the specified allowed range by the user proportionally +// to the amount of storage requested by the autoscaling API. +func memoryFromStorage(requiredStorageCapacity resource.Quantity, storageRange, memoryRange esv1.QuantityRange) resource.Quantity { + allowedStorageRange := storageRange.Max.Value() - storageRange.Min.Value() + if allowedStorageRange == 0 { + // Can't scale memory as min and max for storage are equal + return memoryRange.Min.DeepCopy() + } + storageRatio := float64(requiredStorageCapacity.Value()-storageRange.Min.Value()) / float64(allowedStorageRange) + // storage is at its lowest value, return the min value for memory + if storageRatio == 0 { + return memoryRange.Min.DeepCopy() + } + // storage is at its maximum value, return the max value for memory + if storageRatio == 1 { + return memoryRange.Max.DeepCopy() + } + + allowedMemoryRange := float64(memoryRange.Max.Value() - memoryRange.Min.Value()) + requiredAdditionalMemoryCapacity := int64(allowedMemoryRange * storageRatio) + requiredMemoryCapacity := memoryRange.Min.Value() + requiredAdditionalMemoryCapacity + + // Round up memory to the next GB + requiredMemoryCapacity = roundUp(requiredMemoryCapacity, giga) + resourceMemoryAsGiga := resource.MustParse(fmt.Sprintf("%dGi", requiredMemoryCapacity/giga)) + + if resourceMemoryAsGiga.Cmp(memoryRange.Max) > 0 { + resourceMemoryAsGiga = memoryRange.Max.DeepCopy() + } + return resourceMemoryAsGiga +} diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/linear_scaler_test.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/linear_scaler_test.go new file mode 100644 index 0000000000..8c68ebff84 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/linear_scaler_test.go @@ -0,0 +1,142 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package autoscaler + +import ( + "testing" + + esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +// q is a shorthand for resource.MustParse, the only purpose is to make unit tests more readable. +func q(quantity string) resource.Quantity { + return resource.MustParse(quantity) +} + +func qPtr(quantity string) *resource.Quantity { + q := resource.MustParse(quantity) + return &q +} + +func Test_memoryFromStorage(t *testing.T) { + type args struct { + requiredStorageCapacity resource.Quantity + autoscalingSpec esv1.AutoscalingPolicySpec + } + tests := []struct { + name string + args args + wantMemory *resource.Quantity + }{ + { + name: "Required storage is at its min. value, return min memory", + args: args{ + requiredStorageCapacity: q("2Gi"), + autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithMemory("3Gi", "6Gi").WithStorage("2Gi", "4Gi").Build(), + }, + wantMemory: qPtr("3Gi"), + }, + { + name: "Storage range is 0, keep memory at its minimum", + args: args{ + requiredStorageCapacity: q("2Gi"), + autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithMemory("1Gi", "3Gi").WithStorage("2Gi", "2Gi").Build(), + }, + wantMemory: qPtr("1Gi"), // keep the min. value + }, + { + name: "Do not allocate more memory than max allowed", + args: args{ + requiredStorageCapacity: q("2Gi"), + autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithMemory("1Gi", "1500Mi").WithStorage("1Gi", "2Gi").Build(), + }, + wantMemory: qPtr("1500Mi"), // keep the min. value + }, + { + name: "Do not allocate more memory than max allowed II", + args: args{ + requiredStorageCapacity: q("1800Mi"), + autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithMemory("1Gi", "1500Mi").WithStorage("1Gi", "2Gi").Build(), + }, + wantMemory: qPtr("1500Mi"), // keep the min. value + }, + { + name: "Allocate max of memory when it's possible", + args: args{ + requiredStorageCapacity: q("2Gi"), + autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithMemory("1Gi", "2256Mi").WithStorage("1Gi", "2Gi").Build(), + }, + wantMemory: qPtr("2256Mi"), // keep the min. value + }, + { + name: "Half of the storage range should be translated to rounded value of half of the memory range", + args: args{ + requiredStorageCapacity: q("2Gi"), + autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithMemory("1Gi", "3Gi").WithStorage("1Gi", "3Gi").Build(), + }, + wantMemory: qPtr("2Gi"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := memoryFromStorage(tt.args.requiredStorageCapacity, *tt.args.autoscalingSpec.Storage, *tt.args.autoscalingSpec.Memory); !got.Equal(*tt.wantMemory) { + t.Errorf("memoryFromStorage() = %v, want %v", got, tt.wantMemory) + } + }) + } +} + +func Test_cpuFromMemory(t *testing.T) { + type args struct { + requiredMemoryCapacity resource.Quantity + autoscalingSpec esv1.AutoscalingPolicySpec + } + tests := []struct { + name string + args args + wantCPU *resource.Quantity + }{ + { + name: "Memory is at its min value, do not scale up CPU", + args: args{ + requiredMemoryCapacity: q("2Gi"), + autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithCPU("1", "3").WithMemory("2Gi", "2Gi").Build(), + }, + wantCPU: resource.NewQuantity(1, resource.DecimalSI), // keep the min. value + }, + { + name: "1/3 of the memory range should be translated to 1/3 of the CPU range", + args: args{ + requiredMemoryCapacity: q("2Gi"), + autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithCPU("1", "4").WithMemory("1Gi", "4Gi").Build(), + }, + wantCPU: resource.NewQuantity(2, resource.DecimalSI), + }, + { + name: "half of the memory range should be translated to rounded value of half of the CPU range", + args: args{ + requiredMemoryCapacity: q("2Gi"), + autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithCPU("1", "4").WithMemory("1Gi", "3Gi").Build(), + }, + wantCPU: qPtr("3"), // 2500 rounded to 3000 + }, + { + name: "min and max CPU are equal", + args: args{ + requiredMemoryCapacity: q("2Gi"), + autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithCPU("4", "4").WithMemory("1Gi", "3Gi").Build(), + }, + wantCPU: qPtr("4000m"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := cpuFromMemory(tt.args.requiredMemoryCapacity, *tt.args.autoscalingSpec.Memory, *tt.args.autoscalingSpec.CPU); !got.Equal(*tt.wantCPU) { + t.Errorf("scaleResourceLinearly() = %v, want %v", got, tt.wantCPU) + } + }) + } +} diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets.go new file mode 100644 index 0000000000..a6ace2247e --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets.go @@ -0,0 +1,49 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package autoscaler + +import ( + "sort" + "strings" + + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/resources" + "github.com/go-logr/logr" +) + +// FairNodesManager helps to distribute nodes among several node sets whose belong to a same tier. +type FairNodesManager struct { + log logr.Logger + nodeSetNodeCountList resources.NodeSetNodeCountList +} + +// sort sorts node sets by the value of the Count field, giving priority to node sets with less nodes. +// If several node sets have the same number of nodes they are sorted alphabetically. +func (fnm *FairNodesManager) sort() { + sort.SliceStable(fnm.nodeSetNodeCountList, func(i, j int) bool { + if fnm.nodeSetNodeCountList[i].NodeCount == fnm.nodeSetNodeCountList[j].NodeCount { + return strings.Compare(fnm.nodeSetNodeCountList[i].Name, fnm.nodeSetNodeCountList[j].Name) < 0 + } + return fnm.nodeSetNodeCountList[i].NodeCount < fnm.nodeSetNodeCountList[j].NodeCount + }) +} + +func NewFairNodesManager(log logr.Logger, nodeSetNodeCount []resources.NodeSetNodeCount) FairNodesManager { + fnm := FairNodesManager{ + log: log, + nodeSetNodeCountList: nodeSetNodeCount, + } + fnm.sort() + return fnm +} + +// AddNode selects the nodeSet with the highest priority and increases by one the value of its NodeCount field. +// Priority is defined as the nodeSet with the lowest NodeCount value, or the first nodeSet in the alphabetical order if +// several node sets have the same NodeCount value. +func (fnm *FairNodesManager) AddNode() { + // Peak the first element, this is the one with the less nodes + fnm.nodeSetNodeCountList[0].NodeCount++ + // Ensure the set is sorted + fnm.sort() +} diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets_test.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets_test.go new file mode 100644 index 0000000000..cfe57c99ea --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets_test.go @@ -0,0 +1,67 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package autoscaler + +import ( + "testing" + + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/resources" + "github.com/stretchr/testify/assert" +) + +func TestFairNodesManager_AddNode(t *testing.T) { + type fields struct { + nodeSetNodeCountList []resources.NodeSetNodeCount + } + tests := []struct { + name string + fields fields + assertFunc func(t *testing.T, fnm FairNodesManager) + }{ + { + name: "One nodeSet", + fields: fields{ + nodeSetNodeCountList: []resources.NodeSetNodeCount{{Name: "nodeset-1"}}, + }, + assertFunc: func(t *testing.T, fnm FairNodesManager) { + assert.Equal(t, 1, len(fnm.nodeSetNodeCountList)) + assert.Equal(t, int32(0), fnm.nodeSetNodeCountList[0].NodeCount) + fnm.AddNode() + assert.Equal(t, int32(1), fnm.nodeSetNodeCountList[0].NodeCount) + fnm.AddNode() + assert.Equal(t, int32(2), fnm.nodeSetNodeCountList[0].NodeCount) + }, + }, + { + name: "Several NodeSets", + fields: fields{ + nodeSetNodeCountList: []resources.NodeSetNodeCount{{Name: "nodeset-1"}, {Name: "nodeset-2"}}, + }, + assertFunc: func(t *testing.T, fnm FairNodesManager) { + assert.Equal(t, 2, len(fnm.nodeSetNodeCountList)) + assert.Equal(t, int32(0), fnm.nodeSetNodeCountList.ByNodeSet()["nodeset-1"]) + assert.Equal(t, int32(0), fnm.nodeSetNodeCountList.ByNodeSet()["nodeset-2"]) + + fnm.AddNode() + assert.Equal(t, int32(1), fnm.nodeSetNodeCountList.ByNodeSet()["nodeset-1"]) + assert.Equal(t, int32(0), fnm.nodeSetNodeCountList.ByNodeSet()["nodeset-2"]) + + fnm.AddNode() + assert.Equal(t, int32(1), fnm.nodeSetNodeCountList.ByNodeSet()["nodeset-1"]) + assert.Equal(t, int32(1), fnm.nodeSetNodeCountList.ByNodeSet()["nodeset-2"]) + + fnm.AddNode() + assert.Equal(t, int32(2), fnm.nodeSetNodeCountList.ByNodeSet()["nodeset-1"]) + assert.Equal(t, int32(1), fnm.nodeSetNodeCountList.ByNodeSet()["nodeset-2"]) + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fnm := NewFairNodesManager(logTest, tt.fields.nodeSetNodeCountList) + tt.assertFunc(t, fnm) + }) + } +} diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go new file mode 100644 index 0000000000..1e5975297c --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go @@ -0,0 +1,129 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package autoscaler + +import ( + esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/resources" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/status" + "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +// GetOfflineNodeSetsResources attempts to create or restore resources.NodeSetsResources without an actual autoscaling +// decision from Elasticsearch. It ensures that even if no decision has been returned by the autoscaling API then +// the NodeSets still respect the min. and max. resource requirements specified by the user. +// If resources are within the min. and max. boundaries then they are left untouched. +func GetOfflineNodeSetsResources( + log logr.Logger, + nodeSets []string, + autoscalingSpec esv1.AutoscalingPolicySpec, + actualAutoscalingStatus status.Status, +) resources.NodeSetsResources { + actualNamedTierResources, hasNamedTierResources := actualAutoscalingStatus.GetNamedTierResources(autoscalingSpec.Name) + + var namedTierResources resources.NodeSetsResources + var expectedNodeCount int32 + if !hasNamedTierResources { + // There's no current status for this nodeSet, this happens when the Elasticsearch cluster does not exist. + // In that case we create a new one from the minimum values provided by the user. + namedTierResources = newMinNodeSetResources(autoscalingSpec, nodeSets) + } else { + // The status contains some resource values for the NodeSets managed by this autoscaling policy, let's reuse them. + namedTierResources = nodeSetResourcesFromStatus(actualAutoscalingStatus, actualNamedTierResources, autoscalingSpec, nodeSets) + for _, nodeSet := range actualNamedTierResources.NodeSetNodeCount { + expectedNodeCount += nodeSet.NodeCount + } + } + + // Ensure that the min. number of nodes is in the allowed range. + if expectedNodeCount < autoscalingSpec.NodeCount.Min { + expectedNodeCount = autoscalingSpec.NodeCount.Min + } else if expectedNodeCount > autoscalingSpec.NodeCount.Max { + expectedNodeCount = autoscalingSpec.NodeCount.Max + } + + // User may have added or removed some NodeSets while the autoscaling API is not available. + // We distribute the nodes to reflect that change. + fnm := NewFairNodesManager(log, namedTierResources.NodeSetNodeCount) + for expectedNodeCount > 0 { + fnm.AddNode() + expectedNodeCount-- + } + + log.Info( + "Offline autoscaling", + "state", "offline", + "policy", autoscalingSpec.Name, + "nodeset", namedTierResources.NodeSetNodeCount, + "count", namedTierResources.NodeSetNodeCount.TotalNodeCount(), + "resources", namedTierResources.ToInt64(), + ) + return namedTierResources +} + +// nodeSetResourcesFromStatus restores NodeSetResources from the status. +// If user removed the limits while offline we are assuming that it wants to take back control on the resources. +func nodeSetResourcesFromStatus( + actualAutoscalingStatus status.Status, + actualNamedTierResources resources.NodeSetsResources, + autoscalingSpec esv1.AutoscalingPolicySpec, + nodeSets []string, +) resources.NodeSetsResources { + namedTierResources := resources.NewNodeSetsResources(autoscalingSpec.Name, nodeSets) + // Ensure memory settings are in the allowed limit range. + if autoscalingSpec.IsMemoryDefined() { + if actualNamedTierResources.HasRequest(corev1.ResourceMemory) { + namedTierResources.SetRequest( + corev1.ResourceMemory, + adjustQuantity(actualNamedTierResources.GetRequest(corev1.ResourceMemory), autoscalingSpec.Memory.Min, autoscalingSpec.Memory.Max), + ) + } else { + namedTierResources.SetRequest(corev1.ResourceMemory, autoscalingSpec.Memory.Min.DeepCopy()) + } + } + + // Ensure CPU settings are in the allowed limit range. + if autoscalingSpec.IsCPUDefined() { + if actualNamedTierResources.HasRequest(corev1.ResourceCPU) { + namedTierResources.SetRequest( + corev1.ResourceCPU, + adjustQuantity(actualNamedTierResources.GetRequest(corev1.ResourceCPU), autoscalingSpec.CPU.Min, autoscalingSpec.CPU.Max), + ) + } else { + namedTierResources.SetRequest(corev1.ResourceCPU, autoscalingSpec.CPU.Min.DeepCopy()) + } + } + + // Ensure storage capacity is set + namedTierResources.SetRequest(corev1.ResourceStorage, getStorage(autoscalingSpec, actualAutoscalingStatus)) + return namedTierResources +} + +// newMinNodeSetResources returns a NodeSetResources with minimums values +func newMinNodeSetResources(autoscalingSpec esv1.AutoscalingPolicySpec, nodeSets []string) resources.NodeSetsResources { + namedTierResources := resources.NewNodeSetsResources(autoscalingSpec.Name, nodeSets) + if autoscalingSpec.IsCPUDefined() { + namedTierResources.SetRequest(corev1.ResourceCPU, autoscalingSpec.CPU.Min.DeepCopy()) + } + if autoscalingSpec.IsMemoryDefined() { + namedTierResources.SetRequest(corev1.ResourceMemory, autoscalingSpec.Memory.Min.DeepCopy()) + } + if autoscalingSpec.IsStorageDefined() { + namedTierResources.SetRequest(corev1.ResourceStorage, autoscalingSpec.Storage.Min.DeepCopy()) + } + return namedTierResources +} + +// adjustQuantity ensures that a quantity is comprised between a min and a max. +func adjustQuantity(value, min, max resource.Quantity) resource.Quantity { + if value.Cmp(min) < 0 { + return min + } else if value.Cmp(max) > 0 { + return max + } + return value +} diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/offline_test.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/offline_test.go new file mode 100644 index 0000000000..9970b6dba0 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/offline_test.go @@ -0,0 +1,88 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package autoscaler + +import ( + "reflect" + "testing" + + esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/resources" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/status" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + logf "sigs.k8s.io/controller-runtime/pkg/log" +) + +var logTest = logf.Log.WithName("autoscaling-test") + +func TestGetOfflineNodeSetsResources(t *testing.T) { + type args struct { + nodeSets []string + autoscalingSpec esv1.AutoscalingPolicySpec + actualAutoscalingStatus status.Status + } + tests := []struct { + name string + args args + want resources.NodeSetsResources + }{ + { + name: "Do not scale down storage", + args: args{ + nodeSets: []string{"region-a", "region-b"}, + autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithNodeCounts(1, 6).WithMemory("2Gi", "6Gi").WithStorage("10Gi", "20Gi").Build(), + actualAutoscalingStatus: status.Status{AutoscalingPolicyStatuses: []status.AutoscalingPolicyStatus{{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "region-a", NodeCount: 3}, {Name: "region-b", NodeCount: 3}}, + ResourcesSpecification: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("3Gi"), corev1.ResourceStorage: q("35Gi")}}}}}, + }, + want: resources.NodeSetsResources{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "region-a", NodeCount: 3}, {Name: "region-b", NodeCount: 3}}, + NodeResources: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("3Gi"), corev1.ResourceStorage: q("35Gi")}}, + }, + }, + { + name: "Min. value has been increased by user", + args: args{ + nodeSets: []string{"region-a", "region-b"}, + autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithNodeCounts(1, 6).WithMemory("50Gi", "60Gi").WithStorage("10Gi", "20Gi").Build(), + actualAutoscalingStatus: status.Status{AutoscalingPolicyStatuses: []status.AutoscalingPolicyStatus{{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "region-a", NodeCount: 3}, {Name: "region-b", NodeCount: 3}}, + ResourcesSpecification: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("3Gi"), corev1.ResourceStorage: q("35Gi")}}}}}, + }, + want: resources.NodeSetsResources{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "region-a", NodeCount: 3}, {Name: "region-b", NodeCount: 3}}, + NodeResources: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("50Gi" /* memory should be increased */), corev1.ResourceStorage: q("35Gi")}}, + }, + }, + { + name: "New nodeSet is added by user while offline", + args: args{ + nodeSets: []string{"region-a", "region-b", "region-new"}, + autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithNodeCounts(1, 6).WithMemory("2Gi", "6Gi").WithStorage("10Gi", "20Gi").Build(), + actualAutoscalingStatus: status.Status{AutoscalingPolicyStatuses: []status.AutoscalingPolicyStatus{{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "region-a", NodeCount: 3}, {Name: "region-b", NodeCount: 3}}, + ResourcesSpecification: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("3Gi"), corev1.ResourceStorage: q("35Gi")}}}}}, + }, + want: resources.NodeSetsResources{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "region-a", NodeCount: 2}, {Name: "region-b", NodeCount: 2}, {Name: "region-new", NodeCount: 2}}, + NodeResources: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("3Gi"), corev1.ResourceStorage: q("35Gi")}}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := GetOfflineNodeSetsResources(logTest, tt.args.nodeSets, tt.args.autoscalingSpec, tt.args.actualAutoscalingStatus); !reflect.DeepEqual(got, tt.want) { + t.Errorf("GetOfflineNodeSetsResources() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go new file mode 100644 index 0000000000..44e7126868 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go @@ -0,0 +1,166 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package autoscaler + +import ( + "fmt" + + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/resources" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/status" + "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/client" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +var giga = int64(1024 * 1024 * 1024) + +// nodeResources computes the desired amount of memory and storage for a node managed by a given AutoscalingPolicySpec. +func (ctx *Context) nodeResources(minNodesCount int64, currentStorage resource.Quantity) resources.NodeResources { + nodeResources := resources.NodeResources{} + + // Compute desired memory quantity for the nodes managed by this AutoscalingPolicySpec. + if !ctx.RequiredCapacity.Node.Memory.IsEmpty() { + memoryRequest := ctx.getResourceValue( + ctx.AutoscalingSpec.Name, + "memory", + ctx.RequiredCapacity.Node.Memory, + ctx.RequiredCapacity.Total.Memory, + minNodesCount, + ctx.AutoscalingSpec.Memory.Min, + ctx.AutoscalingSpec.Memory.Max, + ) + nodeResources.SetRequest(corev1.ResourceMemory, memoryRequest) + } + + // Compute desired storage quantity for the nodes managed by this AutoscalingPolicySpec. + if !ctx.RequiredCapacity.Node.Storage.IsEmpty() { + storageRequest := ctx.getResourceValue( + ctx.AutoscalingSpec.Name, + "storage", + ctx.RequiredCapacity.Node.Storage, + ctx.RequiredCapacity.Total.Storage, + minNodesCount, + ctx.AutoscalingSpec.Storage.Min, + ctx.AutoscalingSpec.Storage.Max, + ) + if storageRequest.Cmp(currentStorage) < 0 { + // Do not decrease storage capacity + storageRequest = currentStorage + } + nodeResources.SetRequest(corev1.ResourceStorage, storageRequest) + } + + // If no memory has been returned by the autoscaling API, but the user has expressed the intent to manage memory + // using the autoscaling specification then we derive the memory from the storage if available. + // See https://github.com/elastic/cloud-on-k8s/issues/4076 + if !nodeResources.HasRequest(corev1.ResourceMemory) && ctx.AutoscalingSpec.IsMemoryDefined() && + ctx.AutoscalingSpec.IsStorageDefined() && nodeResources.HasRequest(corev1.ResourceStorage) { + nodeResources.SetRequest(corev1.ResourceMemory, memoryFromStorage(nodeResources.GetRequest(corev1.ResourceStorage), *ctx.AutoscalingSpec.Storage, *ctx.AutoscalingSpec.Memory)) + } + + // Same as above, if CPU limits have been expressed by the user in the autoscaling specification then we adjust CPU request according to the memory request. + // See https://github.com/elastic/cloud-on-k8s/issues/4021 + if ctx.AutoscalingSpec.IsCPUDefined() && ctx.AutoscalingSpec.IsMemoryDefined() && nodeResources.HasRequest(corev1.ResourceMemory) { + nodeResources.SetRequest(corev1.ResourceCPU, cpuFromMemory(nodeResources.GetRequest(corev1.ResourceMemory), *ctx.AutoscalingSpec.Memory, *ctx.AutoscalingSpec.CPU)) + } + + return nodeResources +} + +// getResourceValue calculates the desired quantity for a specific resource for a node in a tier. This value is +// calculated according to the required value from the Elasticsearch autoscaling API and the resource constraints (limits) +// set by the user in the autoscaling specification. +func (ctx *Context) getResourceValue( + autoscalingPolicyName, resourceType string, + nodeRequired *client.AutoscalingCapacity, // node required capacity as returned by the Elasticsearch API + totalRequired *client.AutoscalingCapacity, // tier required capacity as returned by the Elasticsearch API, considered as optional + minNodesCount int64, // the minimum of nodes that will be deployed + min, max resource.Quantity, // as expressed by the user +) resource.Quantity { + if nodeRequired.IsZero() && totalRequired.IsZero() { + // Elasticsearch has returned 0 for both the node and the tier level. Scale down resources to minimum. + return resourceToQuantity(min.Value()) + } + + // Surface the situation where a resource is exhausted. + if nodeRequired.Value() > max.Value() { + // Elasticsearch requested more capacity per node than allowed by the user + err := fmt.Errorf("node required %s is greater than the maximum one", resourceType) + ctx.Log.Error( + err, err.Error(), + "scope", "node", + "policy", autoscalingPolicyName, + "required_"+resourceType, nodeRequired, + "max_allowed_memory", max.Value(), + ) + // Also update the autoscaling status accordingly + ctx.StatusBuilder. + ForPolicy(autoscalingPolicyName). + WithEvent( + status.VerticalScalingLimitReached, + fmt.Sprintf("Node required %s %d is greater than max allowed: %d", resourceType, nodeRequired, max.Value()), + ) + } + + nodeResource := nodeRequired.Value() + if minNodesCount == 0 { + // Elasticsearch returned some resources, even if user allowed empty nodeSet we need at least 1 node to host them. + minNodesCount = 1 + } + // Adjust the node requested capacity to try to fit the tier requested capacity. + // This is done to check if the required resources at the tier level can fit on the minimum number of nodes scaled to + // their maximums, and thus avoid to scale horizontally while scaling vertically to the maximum is enough. + if totalRequired != nil && minNodesCount > 0 { + memoryOverAllTiers := (*totalRequired).Value() / minNodesCount + nodeResource = max64(nodeResource, memoryOverAllTiers) + } + + // Try to round up the Gb value + nodeResource = roundUp(nodeResource, giga) + + // Always ensure that the calculated resource quantity is at least equal to the min. limit provided by the user. + if nodeResource < min.Value() { + nodeResource = min.Value() + } + + // Resource has been rounded up or scaled up to meet the tier requirements. We need to check that those operations + // do not result in a resource quantity which is greater than the max. limit set by the user. + if nodeResource > max.Value() { + nodeResource = max.Value() + } + + return resourceToQuantity(nodeResource) +} + +// resourceToQuantity attempts to convert a raw integer value into a human readable quantity. +func resourceToQuantity(nodeResource int64) resource.Quantity { + var nodeQuantity resource.Quantity + if nodeResource >= giga && nodeResource%giga == 0 { + // When it's possible we may want to express the memory with a "human readable unit" like the the Gi unit + nodeQuantity = resource.MustParse(fmt.Sprintf("%dGi", nodeResource/giga)) + } else { + nodeQuantity = resource.NewQuantity(nodeResource, resource.DecimalSI).DeepCopy() + } + return nodeQuantity +} + +func max64(x int64, others ...int64) int64 { + max := x + for _, other := range others { + if other > max { + max = other + } + } + return max +} + +// roundUp rounds a value up to an other one. +func roundUp(v, n int64) int64 { + r := v % n + if r == 0 { + return v + } + return v + n - r +} diff --git a/pkg/controller/autoscaling/elasticsearch/controller.go b/pkg/controller/autoscaling/elasticsearch/controller.go new file mode 100644 index 0000000000..5a82b1a06d --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/controller.go @@ -0,0 +1,228 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package elasticsearch + +import ( + "context" + "fmt" + "time" + + esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/status" + "github.com/elastic/cloud-on-k8s/pkg/controller/common" + "github.com/elastic/cloud-on-k8s/pkg/controller/common/annotation" + "github.com/elastic/cloud-on-k8s/pkg/controller/common/certificates" + "github.com/elastic/cloud-on-k8s/pkg/controller/common/events" + "github.com/elastic/cloud-on-k8s/pkg/controller/common/license" + "github.com/elastic/cloud-on-k8s/pkg/controller/common/operator" + "github.com/elastic/cloud-on-k8s/pkg/controller/common/reconciler" + "github.com/elastic/cloud-on-k8s/pkg/controller/common/tracing" + "github.com/elastic/cloud-on-k8s/pkg/controller/common/version" + esclient "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/client" + "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/services" + "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/user" + "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/validation" + "github.com/elastic/cloud-on-k8s/pkg/utils/k8s" + logconf "github.com/elastic/cloud-on-k8s/pkg/utils/log" + "github.com/elastic/cloud-on-k8s/pkg/utils/net" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +type EsClientProvider func(ctx context.Context, c k8s.Client, dialer net.Dialer, es esv1.Elasticsearch) (esclient.Client, error) + +const ( + controllerName = "elasticsearch-autoscaling" + + enterpriseFeaturesDisabledMsg = "Autoscaling is an enterprise feature. Enterprise features are disabled" +) + +var defaultReconcile = reconcile.Result{ + Requeue: true, + RequeueAfter: 60 * time.Second, +} + +// ReconcileElasticsearch reconciles autoscaling policies and Elasticsearch resources specifications based on autoscaling decisions. +type ReconcileElasticsearch struct { + k8s.Client + operator.Parameters + esClientProvider EsClientProvider + recorder record.EventRecorder + licenseChecker license.Checker + + // iteration is the number of times this controller has run its Reconcile method + iteration uint64 +} + +// NewReconciler returns a new reconcile.Reconciler +func NewReconciler(mgr manager.Manager, params operator.Parameters) *ReconcileElasticsearch { + c := mgr.GetClient() + return &ReconcileElasticsearch{ + Client: c, + Parameters: params, + esClientProvider: newElasticsearchClient, + recorder: mgr.GetEventRecorderFor(controllerName), + licenseChecker: license.NewLicenseChecker(c, params.OperatorNamespace), + } +} + +// Reconcile updates the ResourceRequirements and PersistentVolumeClaim fields for each elasticsearch container in a +// NodeSet managed by an autoscaling policy. ResourceRequirements are updated according to the response of the Elasticsearch +// _autoscaling/capacity API and given the constraints provided by the user in the autoscaling specification. +func (r *ReconcileElasticsearch) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { + ctx = common.NewReconciliationContext(ctx, &r.iteration, r.Tracer, controllerName, "es_name", request) + defer common.LogReconciliationRunNoSideEffects(logconf.FromContext(ctx))() + defer tracing.EndContextTransaction(ctx) + + // Fetch the Elasticsearch instance + var es esv1.Elasticsearch + requeue, err := r.fetchElasticsearch(ctx, request, &es) + if err != nil || requeue { + return reconcile.Result{}, tracing.CaptureError(ctx, err) + } + + if !es.IsAutoscalingDefined() { + return reconcile.Result{}, nil + } + + log := logconf.FromContext(ctx) + + enabled, err := r.licenseChecker.EnterpriseFeaturesEnabled() + if err != nil { + return reconcile.Result{}, err + } + if !enabled { + log.Info(enterpriseFeaturesDisabledMsg) + r.recorder.Eventf(&es, corev1.EventTypeWarning, license.EventInvalidLicense, enterpriseFeaturesDisabledMsg) + // We still schedule a reconciliation in case a valid license is applied later + return defaultReconcile, nil + } + + if common.IsUnmanaged(&es) { + log.Info("Object is currently not managed by this controller. Skipping reconciliation", "namespace", es.Namespace, "es_name", es.Name) + return reconcile.Result{}, nil + } + + selector := map[string]string{label.ClusterNameLabelName: es.Name} + compat, err := annotation.ReconcileCompatibility(ctx, r.Client, &es, selector, r.OperatorInfo.BuildInfo.Version) + if err != nil { + k8s.EmitErrorEvent(r.recorder, err, &es, events.EventCompatCheckError, "Error during compatibility check: %v", err) + return reconcile.Result{}, tracing.CaptureError(ctx, err) + } + + if !compat { + // this resource is not able to be reconciled by this version of the controller, so we will skip it and not requeue + return reconcile.Result{}, nil + } + + // Get resource policies from the Elasticsearch spec + autoscalingSpecification, err := es.GetAutoscalingSpecification() + if err != nil { + return reconcile.Result{}, tracing.CaptureError(ctx, err) + } + + // Validate Elasticsearch and Autoscaling spec + if err := validation.ValidateElasticsearch(es); err != nil { + log.Error( + err, + "Elasticsearch manifest validation failed", + "namespace", es.Namespace, + "es_name", es.Name, + ) + return reconcile.Result{}, tracing.CaptureError(ctx, err) + } + + // Build status from annotation or existing resources + autoscalingStatus, err := status.GetStatus(es) + if err != nil { + return reconcile.Result{}, tracing.CaptureError(ctx, err) + } + + if len(autoscalingSpecification.AutoscalingPolicySpecs) == 0 && len(autoscalingStatus.AutoscalingPolicyStatuses) == 0 { + // This cluster is not managed by the autoscaler + return reconcile.Result{}, nil + } + + // Compute named tiers + namedTiers, nodeSetErr := autoscalingSpecification.GetAutoscaledNodeSets() + if nodeSetErr != nil { + return reconcile.Result{}, tracing.CaptureError(ctx, nodeSetErr) + } + log.V(1).Info("Named tiers", "named_tiers", namedTiers) + + // Import existing resources in the actual Status if the cluster is managed by some autoscaling policies but + // the status annotation does not exist. + if err := autoscalingStatus.ImportExistingResources(log, r.Client, autoscalingSpecification, namedTiers); err != nil { + return reconcile.Result{}, tracing.CaptureError(ctx, err) + } + + // Call the main function + current, err := r.reconcileInternal(ctx, autoscalingStatus, namedTiers, autoscalingSpecification, es) + if err != nil { + return reconcile.Result{}, tracing.CaptureError(ctx, err) + } + results := &reconciler.Results{} + return results.WithResult(defaultReconcile).WithResult(current).Aggregate() +} + +func newElasticsearchClient( + ctx context.Context, + c k8s.Client, + dialer net.Dialer, + es esv1.Elasticsearch, +) (esclient.Client, error) { + defer tracing.Span(&ctx)() + url := services.ExternalServiceURL(es) + v, err := version.Parse(es.Spec.Version) + if err != nil { + return nil, err + } + // Get user Secret + var controllerUserSecret corev1.Secret + key := types.NamespacedName{ + Namespace: es.Namespace, + Name: esv1.InternalUsersSecret(es.Name), + } + if err := c.Get(context.Background(), key, &controllerUserSecret); err != nil { + return nil, err + } + password, ok := controllerUserSecret.Data[user.ControllerUserName] + if !ok { + return nil, fmt.Errorf("controller user %s not found in Secret %s/%s", user.ControllerUserName, key.Namespace, key.Name) + } + + // Get public certs + var caSecret corev1.Secret + key = types.NamespacedName{ + Namespace: es.Namespace, + Name: certificates.PublicCertsSecretName(esv1.ESNamer, es.Name), + } + if err := c.Get(context.Background(), key, &caSecret); err != nil { + return nil, err + } + trustedCerts, ok := caSecret.Data[certificates.CertFileName] + if !ok { + return nil, fmt.Errorf("%s not found in Secret %s/%s", certificates.CertFileName, key.Namespace, key.Name) + } + caCerts, err := certificates.ParsePEMCerts(trustedCerts) + if err != nil { + return nil, err + } + return esclient.NewElasticsearchClient( + dialer, + url, + esclient.BasicAuth{ + Name: user.ControllerUserName, + Password: string(password), + }, + *v, + caCerts, + esclient.Timeout(es), + ), nil +} diff --git a/pkg/controller/autoscaling/elasticsearch/controller_test.go b/pkg/controller/autoscaling/elasticsearch/controller_test.go new file mode 100644 index 0000000000..8b68fdf6a3 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/controller_test.go @@ -0,0 +1,362 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package elasticsearch + +import ( + "context" + "encoding/json" + "fmt" + "io/ioutil" + "path/filepath" + "reflect" + "testing" + + esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/resources" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/status" + "github.com/elastic/cloud-on-k8s/pkg/controller/common/license" + "github.com/elastic/cloud-on-k8s/pkg/controller/common/operator" + esclient "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/client" + "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/services" + "github.com/elastic/cloud-on-k8s/pkg/utils/k8s" + "github.com/elastic/cloud-on-k8s/pkg/utils/net" + "github.com/ghodss/yaml" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +var ( + fetchEvents = func(recorder *record.FakeRecorder) []string { + events := make([]string, 0) + select { + case event := <-recorder.Events: + events = append(events, event) + default: + break + } + return events + } + + fakeService = &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "testns", + Name: services.ExternalServiceName("testes"), + }, + } + fakeEndpoints = &corev1.Endpoints{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "testns", + Name: services.ExternalServiceName("testes"), + }, + Subsets: []corev1.EndpointSubset{{ + Addresses: []corev1.EndpointAddress{{ + IP: "10.0.0.2", + }}, + Ports: []corev1.EndpointPort{}, + }}, + } +) + +func TestReconcile(t *testing.T) { + type fields struct { + EsClient *fakeEsClient + Parameters operator.Parameters + recorder *record.FakeRecorder + licenseChecker license.Checker + } + type args struct { + esManifest string + isOnline bool + } + tests := []struct { + name string + fields fields + args args + want reconcile.Result + wantEvents []string + wantErr bool + }{ + { + name: "ML case where tier total memory was lower than node memory", + fields: fields{ + EsClient: newFakeEsClient(t).withCapacity("ml"), + Parameters: operator.Parameters{}, + recorder: record.NewFakeRecorder(1000), + licenseChecker: &fakeLicenceChecker{}, + }, + args: args{ + esManifest: "ml", + isOnline: true, + }, + want: defaultReconcile, + wantErr: false, + wantEvents: []string{}, + }, + { + name: "Simulate an error while updating the autoscaling policies, we still want to respect min nodes count set by user", + fields: fields{ + EsClient: newFakeEsClient(t).withErrorOnDeleteAutoscalingAutoscalingPolicies(), + Parameters: operator.Parameters{}, + recorder: record.NewFakeRecorder(1000), + licenseChecker: &fakeLicenceChecker{}, + }, + args: args{ + esManifest: "min-nodes-increased-by-user", + isOnline: true, // Online, but an error will be raised when updating the autoscaling policies. + }, + want: reconcile.Result{}, + wantErr: true, // Autoscaling API error should be returned. + wantEvents: []string{}, + }, + { + name: "Cluster is online, but answer from the API is empty, do not touch anything", + fields: fields{ + EsClient: newFakeEsClient(t).withCapacity("empty-autoscaling-api-response"), + Parameters: operator.Parameters{}, + recorder: record.NewFakeRecorder(1000), + licenseChecker: &fakeLicenceChecker{}, + }, + args: args{ + esManifest: "empty-autoscaling-api-response", + isOnline: true, + }, + want: defaultReconcile, + }, + { + name: "Cluster has just been created, initialize resources", + fields: fields{ + EsClient: newFakeEsClient(t), + Parameters: operator.Parameters{}, + recorder: record.NewFakeRecorder(1000), + licenseChecker: &fakeLicenceChecker{}, + }, + args: args{ + esManifest: "cluster-creation", + isOnline: false, + }, + want: defaultReconcile, + }, + { + name: "Cluster is online, data tier has reached max. capacity", + fields: fields{ + EsClient: newFakeEsClient(t).withCapacity("max-storage-reached"), + Parameters: operator.Parameters{}, + recorder: record.NewFakeRecorder(1000), + licenseChecker: &fakeLicenceChecker{}, + }, + args: args{ + esManifest: "max-storage-reached", + isOnline: true, + }, + want: defaultReconcile, + wantEvents: []string{"Warning HorizontalScalingLimitReached Can't provide total required storage 37106614256, max number of nodes is 8, requires 9 nodes"}, + }, + { + name: "Cluster is online, data tier needs to be scaled up from 8 to 9 nodes", + fields: fields{ + EsClient: newFakeEsClient(t).withCapacity("storage-scaled-horizontally"), + Parameters: operator.Parameters{}, + recorder: record.NewFakeRecorder(1000), + licenseChecker: &fakeLicenceChecker{}, + }, + args: args{ + esManifest: "storage-scaled-horizontally", + isOnline: true, + }, + want: defaultReconcile, + }, + { + name: "Cluster does not exit", + fields: fields{ + EsClient: newFakeEsClient(t), + Parameters: operator.Parameters{}, + recorder: record.NewFakeRecorder(1000), + licenseChecker: &fakeLicenceChecker{}, + }, + args: args{ + esManifest: "", + }, + want: reconcile.Result{ + Requeue: false, + RequeueAfter: 0, + }, + wantErr: false, + wantEvents: []string{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + k8sClient := k8s.NewFakeClient() + if tt.args.esManifest != "" { + // Load the actual Elasticsearch resource from the sample files. + es := esv1.Elasticsearch{} + bytes, err := ioutil.ReadFile(filepath.Join("testdata", tt.args.esManifest, "elasticsearch.yml")) + require.NoError(t, err) + if err := yaml.Unmarshal(bytes, &es); err != nil { + t.Fatalf("yaml.Unmarshal error = %v, wantErr %v", err, tt.wantErr) + } + if tt.args.isOnline { + k8sClient = k8s.NewFakeClient(es.DeepCopy(), fakeService, fakeEndpoints) + } else { + k8sClient = k8s.NewFakeClient(es.DeepCopy()) + } + } + + r := &ReconcileElasticsearch{ + Client: k8sClient, + esClientProvider: tt.fields.EsClient.newFakeElasticsearchClient, + Parameters: tt.fields.Parameters, + recorder: tt.fields.recorder, + licenseChecker: tt.fields.licenseChecker, + } + got, err := r.Reconcile( + context.Background(), + reconcile.Request{NamespacedName: types.NamespacedName{ + Namespace: "testns", + Name: "testes", // All the samples must have this name + }}) + if (err != nil) != tt.wantErr { + t.Errorf("autoscaling.Reconcile() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("ReconcileElasticsearch.reconcileInternal() = %v, want %v", got, tt.want) + } + if tt.args.esManifest != "" { + // Get back Elasticsearch from the API Server. + updatedElasticsearch := esv1.Elasticsearch{} + require.NoError(t, k8sClient.Get(context.Background(), client.ObjectKey{Namespace: "testns", Name: "testes"}, &updatedElasticsearch)) + // Read expected the expected Elasticsearch resource. + expectedElasticsearch := esv1.Elasticsearch{} + bytes, err := ioutil.ReadFile(filepath.Join("testdata", tt.args.esManifest, "elasticsearch-expected.yml")) + require.NoError(t, err) + require.NoError(t, yaml.Unmarshal(bytes, &expectedElasticsearch)) + assert.Equal(t, updatedElasticsearch.Spec, expectedElasticsearch.Spec) + // Check that the autoscaling spec is still the expected one. + assert.Equal( + t, + updatedElasticsearch.Annotations[esv1.ElasticsearchAutoscalingSpecAnnotationName], + expectedElasticsearch.Annotations[esv1.ElasticsearchAutoscalingSpecAnnotationName], + ) + // Compare the statuses. + statusesEqual(t, updatedElasticsearch, expectedElasticsearch) + // Check event raised + gotEvents := fetchEvents(tt.fields.recorder) + require.ElementsMatch(t, tt.wantEvents, gotEvents) + } + }) + } +} + +func statusesEqual(t *testing.T, got, want esv1.Elasticsearch) { + gotStatus, err := status.GetStatus(got) + require.NoError(t, err) + wantStatus, err := status.GetStatus(want) + require.NoError(t, err) + require.Equal(t, len(gotStatus.AutoscalingPolicyStatuses), len(wantStatus.AutoscalingPolicyStatuses)) + for _, wantPolicyStatus := range wantStatus.AutoscalingPolicyStatuses { + gotPolicyStatus := getPolicyStatus(gotStatus.AutoscalingPolicyStatuses, wantPolicyStatus.Name) + require.NotNil(t, gotPolicyStatus, "Autoscaling policy not found") + require.ElementsMatch(t, gotPolicyStatus.NodeSetNodeCount, wantPolicyStatus.NodeSetNodeCount) + for resource := range wantPolicyStatus.ResourcesSpecification.Requests { + require.True(t, resources.ResourceEqual(resource, wantPolicyStatus.ResourcesSpecification.Requests, gotPolicyStatus.ResourcesSpecification.Requests)) + } + for resource := range wantPolicyStatus.ResourcesSpecification.Limits { + require.True(t, resources.ResourceEqual(resource, wantPolicyStatus.ResourcesSpecification.Requests, gotPolicyStatus.ResourcesSpecification.Requests)) + } + } + +} + +func getPolicyStatus(autoscalingPolicyStatuses []status.AutoscalingPolicyStatus, name string) *status.AutoscalingPolicyStatus { + for _, policyStatus := range autoscalingPolicyStatuses { + if policyStatus.Name == name { + return &policyStatus + } + } + return nil +} + +// - Fake Elasticsearch Autoscaling Client + +type fakeEsClient struct { + t *testing.T + esclient.Client + + autoscalingPolicies AutoscalingCapacityResult + + policiesCleaned bool + errorOnDeleteAutoscalingAutoscalingPolicies bool + updatedPolicies map[string]esv1.AutoscalingPolicy +} + +func newFakeEsClient(t *testing.T) *fakeEsClient { + return &fakeEsClient{ + t: t, + autoscalingPolicies: esclient.AutoscalingCapacityResult{Policies: make(map[string]esclient.AutoscalingPolicyResult)}, + updatedPolicies: make(map[string]esv1.AutoscalingPolicy), + } +} + +func (f *fakeEsClient) withCapacity(testdata string) *fakeEsClient { + policies := esclient.AutoscalingCapacityResult{} + bytes, err := ioutil.ReadFile("testdata/" + testdata + "/capacity.json") + if err != nil { + f.t.Fatalf("Error while reading autoscaling capacity content: %v", err) + } + if err := json.Unmarshal(bytes, &policies); err != nil { + f.t.Fatalf("Error while parsing autoscaling capacity content: %v", err) + } + f.autoscalingPolicies = policies + return f +} + +func (f *fakeEsClient) withErrorOnDeleteAutoscalingAutoscalingPolicies() *fakeEsClient { + f.errorOnDeleteAutoscalingAutoscalingPolicies = true + return f +} + +func (f *fakeEsClient) newFakeElasticsearchClient(_ context.Context, _ k8s.Client, _ net.Dialer, _ esv1.Elasticsearch) (esclient.Client, error) { + return f, nil +} + +func (f *fakeEsClient) DeleteAutoscalingPolicies(_ context.Context) error { + f.policiesCleaned = true + if f.errorOnDeleteAutoscalingAutoscalingPolicies { + return fmt.Errorf("simulated error while calling DeleteAutoscalingAutoscalingPolicies") + } + return nil +} +func (f *fakeEsClient) CreateAutoscalingPolicy(_ context.Context, policyName string, autoscalingPolicy esv1.AutoscalingPolicy) error { + return nil +} +func (f *fakeEsClient) GetAutoscalingCapacity(_ context.Context) (esclient.AutoscalingCapacityResult, error) { + return f.autoscalingPolicies, nil +} +func (f *fakeEsClient) UpdateMLNodesSettings(_ context.Context, maxLazyMLNodes int32, maxMemory string) error { + return nil +} + +// - Fake licence checker + +type fakeLicenceChecker struct{} + +func (flc *fakeLicenceChecker) CurrentEnterpriseLicense() (*license.EnterpriseLicense, error) { + return nil, nil +} + +func (flc *fakeLicenceChecker) EnterpriseFeaturesEnabled() (bool, error) { + return true, nil +} + +func (flc *fakeLicenceChecker) Valid(l license.EnterpriseLicense) (bool, error) { + return true, nil +} diff --git a/pkg/controller/autoscaling/elasticsearch/driver.go b/pkg/controller/autoscaling/elasticsearch/driver.go new file mode 100644 index 0000000000..ef5a328894 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/driver.go @@ -0,0 +1,254 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package elasticsearch + +import ( + "context" + "fmt" + + esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/autoscaler" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/resources" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/status" + "github.com/elastic/cloud-on-k8s/pkg/controller/common/reconciler" + "github.com/elastic/cloud-on-k8s/pkg/controller/common/tracing" + esclient "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/client" + "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/services" + logconf "github.com/elastic/cloud-on-k8s/pkg/utils/log" + "github.com/go-logr/logr" + "go.elastic.co/apm" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +func (r *ReconcileElasticsearch) reconcileInternal( + ctx context.Context, + autoscalingStatus status.Status, + namedTiers esv1.AutoscaledNodeSets, + autoscalingSpec esv1.AutoscalingSpec, + es esv1.Elasticsearch, +) (reconcile.Result, error) { + defer tracing.Span(&ctx)() + results := &reconciler.Results{} + log := logconf.FromContext(ctx) + + if esReachable, err := r.isElasticsearchReachable(ctx, es); !esReachable || err != nil { + // Elasticsearch is not reachable, or we got an error while checking Elasticsearch availability, follow up with an offline reconciliation. + if err != nil { + log.V(1).Info( + "error while checking if Elasticsearch is available, attempting offline reconciliation", + "error.message", err.Error(), + ) + } + return r.doOfflineReconciliation(ctx, autoscalingStatus, namedTiers, autoscalingSpec, es, results) + } + + // Cluster is expected to be online and reachable, attempt a call to the autoscaling API. + // If an error occurs we still attempt an offline reconciliation to enforce limits set by the user. + result, err := r.attemptOnlineReconciliation(ctx, autoscalingStatus, namedTiers, autoscalingSpec, es, results) + if err != nil { + log.Error(tracing.CaptureError(ctx, err), "autoscaling online reconciliation failed") + // Attempt an offline reconciliation + if _, err := r.doOfflineReconciliation(ctx, autoscalingStatus, namedTiers, autoscalingSpec, es, results); err != nil { + log.Error(tracing.CaptureError(ctx, err), "autoscaling offline reconciliation failed") + } + } + return result, err +} + +// Check if the Service is available. +func (r *ReconcileElasticsearch) isElasticsearchReachable(ctx context.Context, es esv1.Elasticsearch) (bool, error) { + span, _ := apm.StartSpan(ctx, "is_es_reachable", tracing.SpanTypeApp) + defer span.End() + externalService, err := services.GetExternalService(r.Client, es) + if apierrors.IsNotFound(err) { + return false, nil + } + if err != nil { + return false, tracing.CaptureError(ctx, err) + } + esReachable, err := services.IsServiceReady(r.Client, externalService) + if err != nil { + return false, tracing.CaptureError(ctx, err) + } + return esReachable, nil +} + +// attemptOnlineReconciliation attempts an online autoscaling reconciliation with a call the Elasticsearch autoscaling API. +func (r *ReconcileElasticsearch) attemptOnlineReconciliation( + ctx context.Context, + actualAutoscalingStatus status.Status, + namedTiers esv1.AutoscaledNodeSets, + autoscalingSpecs esv1.AutoscalingSpec, + es esv1.Elasticsearch, + results *reconciler.Results, +) (reconcile.Result, error) { + span, _ := apm.StartSpan(ctx, "online_reconciliation", tracing.SpanTypeApp) + defer span.End() + log := logconf.FromContext(ctx) + log.V(1).Info("Starting online autoscaling reconciliation") + esClient, err := r.esClientProvider(ctx, r.Client, r.Dialer, es) + if err != nil { + return reconcile.Result{}, err + } + + // Update Machine Learning settings + mlNodes, maxMemory := autoscalingSpecs.GetMLNodesSettings() + if err := esClient.UpdateMLNodesSettings(ctx, mlNodes, maxMemory); err != nil { + log.Error(err, "Error while updating the ML settings") + return reconcile.Result{}, err + } + + // Update autoscaling policies in Elasticsearch + if err := updatePolicies(ctx, log, autoscalingSpecs, esClient); err != nil { + log.Error(err, "Error while updating the autoscaling policies") + return reconcile.Result{}, err + } + + // Get capacity requirements from the Elasticsearch autoscaling capacity API + decisions, err := esClient.GetAutoscalingCapacity(ctx) + if err != nil { + return reconcile.Result{}, err + } + + // Init. a new autoscaling status. + statusBuilder := status.NewAutoscalingStatusBuilder() + + // nextClusterResources holds the resources computed by the autoscaling algorithm for each nodeSet. + var nextClusterResources resources.ClusterResources + + // For each autoscaling policy we compute the resources to be applied to the related nodeSets. + for _, autoscalingPolicy := range autoscalingSpecs.AutoscalingPolicySpecs { + // Get the currentNodeSets + nodeSetList, exists := namedTiers[autoscalingPolicy.Name] + if !exists { + // This situation should be caught during the validation, we still want to trace this error if it happens. + err := fmt.Errorf("no nodeSets for tier %s", autoscalingPolicy.Name) + log.Error(err, "no nodeSet for a tier", "policy", autoscalingPolicy.Name) + results.WithError(fmt.Errorf("no nodeSets for tier %s", autoscalingPolicy.Name)) + statusBuilder.ForPolicy(autoscalingPolicy.Name).WithEvent(status.NoNodeSet, err.Error()) + continue + } + + // Get the decision from the Elasticsearch API + var nodeSetsResources resources.NodeSetsResources + switch capacity, hasCapacity := decisions.Policies[autoscalingPolicy.Name]; hasCapacity && !capacity.RequiredCapacity.IsEmpty() { + case false: + // We didn't receive a decision for this tier, or the decision is empty. We can only ensure that resources are within the allowed ranges. + log.V(1).Info( + "No decision received from Elasticsearch, ensure resources limits are respected", + "policy", autoscalingPolicy.Name, + ) + statusBuilder.ForPolicy(autoscalingPolicy.Name).WithEvent(status.EmptyResponse, "No required capacity from Elasticsearch") + nodeSetsResources = autoscaler.GetOfflineNodeSetsResources(log, nodeSetList.Names(), autoscalingPolicy, actualAutoscalingStatus) + case true: + // We received a capacity decision from Elasticsearch for this policy. + log.Info( + "Required capacity for policy", + "policy", autoscalingPolicy.Name, + "required_capacity", capacity.RequiredCapacity, + "current_capacity", capacity.CurrentCapacity, + "current_capacity.count", len(capacity.CurrentNodes), + "current_nodes", capacity.CurrentNodes) + // Ensure that the user provides the related resources policies + if !canDecide(log, capacity.RequiredCapacity, autoscalingPolicy, statusBuilder) { + continue + } + ctx := autoscaler.Context{ + Log: log, + AutoscalingSpec: autoscalingPolicy, + NodeSets: nodeSetList, + ActualAutoscalingStatus: actualAutoscalingStatus, + RequiredCapacity: capacity.RequiredCapacity, + StatusBuilder: statusBuilder, + } + nodeSetsResources = ctx.GetResources() + } + // Add the result to the list of the next resources + nextClusterResources = append(nextClusterResources, nodeSetsResources) + } + + // Emit the K8S events + status.EmitEvents(es, r.recorder, statusBuilder.Build()) + + // Update the Elasticsearch resource with the calculated resources. + if err := reconcileElasticsearch(log, &es, statusBuilder, nextClusterResources, actualAutoscalingStatus); err != nil { + return reconcile.Result{}, tracing.CaptureError(ctx, err) + } + + if results.HasError() { + return results.Aggregate() + } + + // Apply the update Elasticsearch manifest + if err := r.Client.Update(context.Background(), &es); err != nil { + if apierrors.IsConflict(err) { + return results.WithResult(reconcile.Result{Requeue: true}).Aggregate() + } + return results.WithError(err).Aggregate() + } + return reconcile.Result{}, nil +} + +// canDecide ensures that the user has provided resource ranges to apply Elasticsearch autoscaling decision. +// Expected ranges are not consistent across all deciders. For example ml may only require memory limits, while processing +// data deciders response may require storage limits. +// Only memory and storage are supported since CPU is not part of the autoscaling API specification. +func canDecide(log logr.Logger, requiredCapacity esclient.AutoscalingCapacityInfo, spec esv1.AutoscalingPolicySpec, statusBuilder *status.AutoscalingStatusBuilder) bool { + result := true + if (requiredCapacity.Node.Memory != nil || requiredCapacity.Total.Memory != nil) && !spec.IsMemoryDefined() { + log.Error(fmt.Errorf("min and max memory must be specified"), "Min and max memory must be specified", "policy", spec.Name) + statusBuilder.ForPolicy(spec.Name).WithEvent(status.MemoryRequired, "Min and max memory must be specified") + result = false + } + if (requiredCapacity.Node.Storage != nil || requiredCapacity.Total.Storage != nil) && !spec.IsStorageDefined() { + log.Error(fmt.Errorf("min and max memory must be specified"), "Min and max storage must be specified", "policy", spec.Name) + statusBuilder.ForPolicy(spec.Name).WithEvent(status.StorageRequired, "Min and max storage must be specified") + result = false + } + return result +} + +// doOfflineReconciliation runs an autoscaling reconciliation if the autoscaling API is not ready (yet). +func (r *ReconcileElasticsearch) doOfflineReconciliation( + ctx context.Context, + actualAutoscalingStatus status.Status, + namedTiers esv1.AutoscaledNodeSets, + autoscalingSpec esv1.AutoscalingSpec, + es esv1.Elasticsearch, + results *reconciler.Results, +) (reconcile.Result, error) { + defer tracing.Span(&ctx)() + log := logconf.FromContext(ctx) + log.V(1).Info("Starting offline autoscaling reconciliation") + statusBuilder := status.NewAutoscalingStatusBuilder() + var clusterNodeSetsResources resources.ClusterResources + // Elasticsearch is not reachable, we still want to ensure that min. requirements are set + for _, autoscalingSpec := range autoscalingSpec.AutoscalingPolicySpecs { + nodeSets, exists := namedTiers[autoscalingSpec.Name] + if !exists { + return results.WithError(fmt.Errorf("no nodeSets for tier %s", autoscalingSpec.Name)).Aggregate() + } + nodeSetsResources := autoscaler.GetOfflineNodeSetsResources(log, nodeSets.Names(), autoscalingSpec, actualAutoscalingStatus) + clusterNodeSetsResources = append(clusterNodeSetsResources, nodeSetsResources) + } + + // Emit the K8S events + status.EmitEvents(es, r.recorder, statusBuilder.Build()) + + // Update the Elasticsearch manifest + if err := reconcileElasticsearch(log, &es, statusBuilder, clusterNodeSetsResources, actualAutoscalingStatus); err != nil { + return reconcile.Result{}, tracing.CaptureError(ctx, err) + } + + // Apply the updated Elasticsearch manifest + if err := r.Client.Update(context.Background(), &es); err != nil { + if apierrors.IsConflict(err) { + return results.WithResult(reconcile.Result{Requeue: true}).Aggregate() + } + return results.WithError(err).Aggregate() + } + return results.WithResult(defaultReconcile).Aggregate() +} diff --git a/pkg/controller/autoscaling/elasticsearch/policy.go b/pkg/controller/autoscaling/elasticsearch/policy.go new file mode 100644 index 0000000000..cb33f353bf --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/policy.go @@ -0,0 +1,39 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package elasticsearch + +import ( + "context" + + esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + "github.com/elastic/cloud-on-k8s/pkg/controller/common/tracing" + "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/client" + "github.com/go-logr/logr" + "go.elastic.co/apm" +) + +// updatePolicies updates the autoscaling policies in the Elasticsearch cluster. +func updatePolicies( + ctx context.Context, + log logr.Logger, + autoscalingSpec esv1.AutoscalingSpec, + esclient client.AutoscalingClient, +) error { + span, _ := apm.StartSpan(ctx, "update_autoscaling_policies", tracing.SpanTypeApp) + defer span.End() + // Cleanup existing autoscaling policies + if err := esclient.DeleteAutoscalingPolicies(ctx); err != nil { + log.Error(err, "Error while deleting policies") + return err + } + // Create the expected autoscaling policies + for _, rp := range autoscalingSpec.AutoscalingPolicySpecs { + if err := esclient.CreateAutoscalingPolicy(ctx, rp.Name, rp.AutoscalingPolicy); err != nil { + log.Error(err, "Error while updating an autoscaling policy", "policy", rp.Name) + return err + } + } + return nil +} diff --git a/pkg/controller/autoscaling/elasticsearch/reconcile.go b/pkg/controller/autoscaling/elasticsearch/reconcile.go new file mode 100644 index 0000000000..591ccdfa0f --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/reconcile.go @@ -0,0 +1,151 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package elasticsearch + +import ( + "context" + "fmt" + + esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/resources" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/status" + "github.com/elastic/cloud-on-k8s/pkg/controller/common/tracing" + "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/validation" + "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/volume" + "github.com/go-logr/logr" + "go.elastic.co/apm" + corev1 "k8s.io/api/core/v1" + apiequality "k8s.io/apimachinery/pkg/api/equality" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +// reconcileElasticsearch updates the resources in the NodeSets of an Elasticsearch spec according to the NodeSetsResources +// computed by the autoscaling algorithm. It also updates the autoscaling status annotation. +func reconcileElasticsearch( + log logr.Logger, + es *esv1.Elasticsearch, + statusBuilder *status.AutoscalingStatusBuilder, + nextClusterResources resources.ClusterResources, + actualAutoscalingStatus status.Status, +) error { + nextResourcesByNodeSet := nextClusterResources.ByNodeSet() + for i := range es.Spec.NodeSets { + name := es.Spec.NodeSets[i].Name + nodeSetResources, ok := nextResourcesByNodeSet[name] + if !ok { + // No desired resources returned for this NodeSet, leave it untouched. + log.V(1).Info("Skipping nodeset update", "nodeset", name) + continue + } + + container, containers := removeContainer(esv1.ElasticsearchContainerName, es.Spec.NodeSets[i].PodTemplate.Spec.Containers) + // Create a copy to compare if some changes have been made. + actualContainer := container.DeepCopy() + if container == nil { + container = &corev1.Container{ + Name: esv1.ElasticsearchContainerName, + } + } + + // Update desired count + es.Spec.NodeSets[i].Count = nodeSetResources.NodeCount + + if container.Resources.Requests == nil { + container.Resources.Requests = corev1.ResourceList{} + } + if container.Resources.Limits == nil { + container.Resources.Limits = corev1.ResourceList{} + } + + // Update memory requests and limits + if nodeSetResources.HasRequest(corev1.ResourceMemory) { + container.Resources.Requests[corev1.ResourceMemory] = nodeSetResources.GetRequest(corev1.ResourceMemory) + container.Resources.Limits[corev1.ResourceMemory] = nodeSetResources.GetRequest(corev1.ResourceMemory) + } + if nodeSetResources.HasRequest(corev1.ResourceCPU) { + container.Resources.Requests[corev1.ResourceCPU] = nodeSetResources.GetRequest(corev1.ResourceCPU) + } + + if nodeSetResources.HasRequest(corev1.ResourceStorage) { + nextStorage, err := newVolumeClaimTemplate(nodeSetResources.GetRequest(corev1.ResourceStorage), es.Spec.NodeSets[i]) + if err != nil { + return err + } + es.Spec.NodeSets[i].VolumeClaimTemplates = nextStorage + } + + // Add the container to other containers + containers = append(containers, *container) + // Update the NodeSet + es.Spec.NodeSets[i].PodTemplate.Spec.Containers = containers + + if !apiequality.Semantic.DeepEqual(actualContainer, container) { + log.V(1).Info("Updating nodeset with resources", "nodeset", name, "resources", nextClusterResources) + } + } + + // Update autoscaling status + return status.UpdateAutoscalingStatus(es, statusBuilder, nextClusterResources, actualAutoscalingStatus) +} + +func newVolumeClaimTemplate(storageQuantity resource.Quantity, nodeSet esv1.NodeSet) ([]corev1.PersistentVolumeClaim, error) { + onlyOneVolumeClaimTemplate, volumeClaimTemplateName := validation.HasAtMostOnePersistentVolumeClaim(nodeSet) + if !onlyOneVolumeClaimTemplate { + return nil, fmt.Errorf(validation.UnexpectedVolumeClaimError) + } + if volumeClaimTemplateName == "" { + volumeClaimTemplateName = volume.ElasticsearchDataVolumeName + } + return []corev1.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: volumeClaimTemplateName, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{ + corev1.ReadWriteOnce, + }, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: storageQuantity, + }, + }, + }, + }, + }, nil +} + +func (r *ReconcileElasticsearch) fetchElasticsearch( + ctx context.Context, + request reconcile.Request, + es *esv1.Elasticsearch, +) (bool, error) { + span, _ := apm.StartSpan(ctx, "fetch_elasticsearch", tracing.SpanTypeApp) + defer span.End() + + err := r.Get(context.Background(), request.NamespacedName, es) + if err != nil { + if apierrors.IsNotFound(err) { + return true, nil + } + // Error reading the object - requeue the request. + return true, err + } + return false, nil +} + +// removeContainer remove a container from a slice and return the removed container if found. +func removeContainer(name string, containers []corev1.Container) (*corev1.Container, []corev1.Container) { + for i, container := range containers { + if container.Name == name { + // Remove the container + return &container, append(containers[:i], containers[i+1:]...) + } + } + return nil, containers +} diff --git a/pkg/controller/autoscaling/elasticsearch/resources/resources.go b/pkg/controller/autoscaling/elasticsearch/resources/resources.go new file mode 100644 index 0000000000..d6d342c56d --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/resources/resources.go @@ -0,0 +1,291 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package resources + +import ( + "fmt" + + v1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/equality" + "k8s.io/apimachinery/pkg/api/resource" +) + +// NodeSetsResources models for all the nodeSets managed by a same autoscaling policy: +// * the desired resources quantities (cpu, memory, storage) expected in the nodeSet specifications +// * the individual number of nodes (count) in each nodeSet +type NodeSetsResources struct { + // Name is the name of the autoscaling policy to witch this resources belong to. + Name string `json:"name"` + // NodeSetNodeCount holds the number of nodes for each nodeSet. + NodeSetNodeCount NodeSetNodeCountList `json:"nodeSets"` + // NodeResources holds the resource values common to all the nodeSet managed by a same autoscaling policy. + NodeResources +} + +// NewNodeSetsResources initialize an empty NodeSetsResources for a given set of NodeSets. +func NewNodeSetsResources(name string, nodeSetNames []string) NodeSetsResources { + return NodeSetsResources{ + Name: name, + NodeSetNodeCount: newNodeSetNodeCountList(nodeSetNames), + } +} + +// ClusterResources models the desired resources (CPU, memory, storage and number of nodes) for all the autoscaling policies in a cluster. +type ClusterResources []NodeSetsResources + +// IsUsedBy returns true if the resources assigned to a container in a NodeSet matches the one specified in the NodeSetsResources. +// It returns false if the container is not found in the NodeSet. +func (ntr NodeSetsResources) IsUsedBy(containerName string, nodeSet v1.NodeSet) (bool, error) { + for _, nodeSetNodeCount := range ntr.NodeSetNodeCount { + if nodeSetNodeCount.Name != nodeSet.Name { + continue + } + if nodeSetNodeCount.NodeCount != nodeSet.Count { + // The number of nodes in the NodeSetsResources and in the nodeSet is not equal. + return false, nil + } + + // Compare volume request + switch len(nodeSet.VolumeClaimTemplates) { + case 0: + // If there is no VolumeClaimTemplate in the NodeSet then there should be no storage request in the NodeSetsResources. + if ntr.HasRequest(corev1.ResourceStorage) { + return false, nil + } + case 1: + volumeClaim := nodeSet.VolumeClaimTemplates[0] + if !ResourceEqual(corev1.ResourceStorage, ntr.NodeResources.Requests, volumeClaim.Spec.Resources.Requests) { + return false, nil + } + default: + return false, fmt.Errorf("only 1 volume claim template is allowed when autoscaling is enabled, got %d in nodeSet %s", len(nodeSet.VolumeClaimTemplates), nodeSet.Name) + } + + // Compare CPU and Memory requests + container := getContainer(containerName, nodeSet.PodTemplate.Spec.Containers) + if container == nil { + return false, nil + } + return ResourceEqual(corev1.ResourceMemory, ntr.NodeResources.Requests, container.Resources.Requests) && + ResourceEqual(corev1.ResourceCPU, ntr.NodeResources.Requests, container.Resources.Requests), nil + } + return false, nil +} + +func ResourceEqual(resourceName corev1.ResourceName, expected, current corev1.ResourceList) bool { + if len(expected) == 0 { + // No value expected, return true + return true + } + expectedValue, hasExpectedValue := expected[resourceName] + if !hasExpectedValue { + // Expected values does not contain the resource + return true + } + if len(current) == 0 { + // Value is expected but current is nil or empty + return false + } + currentValue, hasCurrentValue := current[resourceName] + if !hasCurrentValue { + // Current values does not contain the resource + return false + } + return expectedValue.Equal(currentValue) +} + +func getContainer(name string, containers []corev1.Container) *corev1.Container { + for i := range containers { + container := containers[i] + if container.Name == name { + // Remove the container + return &container + } + } + return nil +} + +// NodeSetNodeCount models the number of nodes expected in a given NodeSet. +type NodeSetNodeCount struct { + // NodeSet name. + Name string `json:"name"` + // NodeCount is the number of nodes, as computed by the autoscaler, expected in this NodeSet. + NodeCount int32 `json:"nodeCount"` +} +type NodeSetNodeCountList []NodeSetNodeCount + +// TotalNodeCount returns the total number of nodes. +func (n NodeSetNodeCountList) TotalNodeCount() int32 { + var totalNodeCount int32 + for _, nodeSet := range n { + totalNodeCount += nodeSet.NodeCount + } + return totalNodeCount +} + +func (n NodeSetNodeCountList) ByNodeSet() map[string]int32 { + byNodeSet := make(map[string]int32) + for _, nodeSet := range n { + byNodeSet[nodeSet.Name] = nodeSet.NodeCount + } + return byNodeSet +} + +func newNodeSetNodeCountList(nodeSetNames []string) NodeSetNodeCountList { + nodeSetNodeCount := make([]NodeSetNodeCount, len(nodeSetNames)) + for i := range nodeSetNames { + nodeSetNodeCount[i] = NodeSetNodeCount{Name: nodeSetNames[i]} + } + return nodeSetNodeCount +} + +// NodeResources holds the resources to be used by each node managed by an autoscaling policy. +// All the nodes managed by an autoscaling policy have the same resources, even if they are in different NodeSets. +type NodeResources struct { + Limits corev1.ResourceList `json:"limits,omitempty"` + Requests corev1.ResourceList `json:"requests,omitempty"` +} + +// MaxMerge merge the specified resource into the NodeResources only if its quantity is greater +// than the existing one. +func (rs *NodeResources) MaxMerge( + other corev1.ResourceRequirements, + resourceName corev1.ResourceName, +) { + // Requests + otherResourceRequestValue, otherHasResourceRequest := other.Requests[resourceName] + if otherHasResourceRequest { + if rs.Requests == nil { + rs.Requests = make(corev1.ResourceList) + } + receiverValue, receiverHasResource := rs.Requests[resourceName] + if !receiverHasResource { + rs.Requests[resourceName] = otherResourceRequestValue + } else if otherResourceRequestValue.Cmp(receiverValue) > 0 { + rs.Requests[resourceName] = otherResourceRequestValue + } + } + + // Limits + otherResourceLimitValue, otherHasResourceLimit := other.Limits[resourceName] + if otherHasResourceLimit { + if rs.Limits == nil { + rs.Limits = make(corev1.ResourceList) + } + receiverValue, receiverHasResource := rs.Limits[resourceName] + if !receiverHasResource { + rs.Limits[resourceName] = otherResourceLimitValue + } else if otherResourceLimitValue.Cmp(receiverValue) > 0 { + rs.Limits[resourceName] = otherResourceLimitValue + } + } +} + +func (rs *NodeResources) SetRequest(resourceName corev1.ResourceName, quantity resource.Quantity) { + if rs.Requests == nil { + rs.Requests = make(corev1.ResourceList) + } + rs.Requests[resourceName] = quantity +} + +func (rs *NodeResources) SetLimit(resourceName corev1.ResourceName, quantity resource.Quantity) { + if rs.Limits == nil { + rs.Limits = make(corev1.ResourceList) + } + rs.Limits[resourceName] = quantity +} + +func (rs *NodeResources) HasRequest(resourceName corev1.ResourceName) bool { + if rs.Requests == nil { + return false + } + _, hasRequest := rs.Requests[resourceName] + return hasRequest +} + +func (rs *NodeResources) GetRequest(resourceName corev1.ResourceName) resource.Quantity { + return rs.Requests[resourceName] +} + +// ResourceList is a set of (resource name, quantity) pairs. +type ResourceListInt64 map[corev1.ResourceName]int64 + +// NodeResourcesInt64 is mostly use in logs to print comparable values which can be used in dashboards. +type NodeResourcesInt64 struct { + Requests ResourceListInt64 `json:"requests,omitempty"` + Limits ResourceListInt64 `json:"limits,omitempty"` +} + +// ToInt64 converts all the resource quantities to int64, mostly to be logged and build dashboard. +func (rs NodeResources) ToInt64() NodeResourcesInt64 { + rs64 := NodeResourcesInt64{ + Requests: make(ResourceListInt64), + Limits: make(ResourceListInt64), + } + for resource, value := range rs.Requests { + switch resource { + case corev1.ResourceCPU: + rs64.Requests[resource] = value.MilliValue() + default: + rs64.Requests[resource] = value.Value() + } + } + for resource, value := range rs.Limits { + switch resource { + case corev1.ResourceCPU: + rs64.Requests[resource] = value.MilliValue() + default: + rs64.Requests[resource] = value.Value() + } + } + return rs64 +} + +type NodeSetResources struct { + NodeCount int32 + *NodeSetsResources +} + +// SameResources compares the resources allocated to the nodes in a named tier and returns true +// if they are equal. +func (ntr NodeSetsResources) SameResources(other NodeSetsResources) bool { + thisByName := ntr.NodeSetNodeCount.ByNodeSet() + otherByName := other.NodeSetNodeCount.ByNodeSet() + if len(thisByName) != len(otherByName) { + return false + } + for nodeSet, nodeCount := range thisByName { + otherNodeCount, ok := otherByName[nodeSet] + if !ok || nodeCount != otherNodeCount { + return false + } + } + return equality.Semantic.DeepEqual(ntr.NodeResources, other.NodeResources) +} + +func (cr ClusterResources) ByNodeSet() map[string]NodeSetResources { + byNodeSet := make(map[string]NodeSetResources) + for i := range cr { + nodeSetsResource := cr[i] + for j := range nodeSetsResource.NodeSetNodeCount { + nodeSetNodeCount := nodeSetsResource.NodeSetNodeCount[j] + nodeSetResources := NodeSetResources{ + NodeCount: nodeSetNodeCount.NodeCount, + NodeSetsResources: &nodeSetsResource, + } + byNodeSet[nodeSetNodeCount.Name] = nodeSetResources + } + } + return byNodeSet +} + +func (cr ClusterResources) ByAutoscalingPolicy() map[string]NodeSetsResources { + byNamedTier := make(map[string]NodeSetsResources) + for _, namedTierResources := range cr { + byNamedTier[namedTierResources.Name] = namedTierResources + } + return byNamedTier +} diff --git a/pkg/controller/autoscaling/elasticsearch/resources/resources_test.go b/pkg/controller/autoscaling/elasticsearch/resources/resources_test.go new file mode 100644 index 0000000000..181f38a3f5 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/resources/resources_test.go @@ -0,0 +1,366 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package resources + +import ( + "testing" + + esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/volume" + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + apiequality "k8s.io/apimachinery/pkg/api/equality" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestResourcesSpecification_MaxMerge(t *testing.T) { + type fields struct { + Limits corev1.ResourceList + Requests corev1.ResourceList + } + type args struct { + other corev1.ResourceRequirements + resourceName corev1.ResourceName + want NodeResources + } + tests := []struct { + name string + fields fields + args args + }{ + { + name: "max is receiver", + fields: fields{ + Limits: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceMemory: resource.MustParse("8Gi"), + }, + Requests: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceMemory: resource.MustParse("8Gi"), + corev1.ResourceCPU: resource.MustParse("2000"), + }, + }, + args: args{ + other: corev1.ResourceRequirements{ + Limits: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + Requests: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceMemory: resource.MustParse("4Gi"), + corev1.ResourceCPU: resource.MustParse("1000"), + }, + }, + resourceName: corev1.ResourceMemory, + want: NodeResources{ + Limits: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceMemory: resource.MustParse("8Gi"), + }, + Requests: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceMemory: resource.MustParse("8Gi"), + corev1.ResourceCPU: resource.MustParse("2000"), + }, + }, + }, + }, + { + name: "max is other", + fields: fields{ + // receiver + Limits: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + Requests: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceMemory: resource.MustParse("4Gi"), + corev1.ResourceCPU: resource.MustParse("1000"), + }, + }, + args: args{ + other: corev1.ResourceRequirements{ + Limits: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceCPU: resource.MustParse("2000"), + corev1.ResourceMemory: resource.MustParse("8Gi"), + }, + Requests: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceMemory: resource.MustParse("8Gi"), + corev1.ResourceCPU: resource.MustParse("2000"), + }, + }, + resourceName: corev1.ResourceMemory, + want: NodeResources{ + Limits: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceMemory: resource.MustParse("8Gi"), + }, + Requests: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceMemory: resource.MustParse("8Gi"), + corev1.ResourceCPU: resource.MustParse("1000"), + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rs := &NodeResources{ + Limits: tt.fields.Limits, + Requests: tt.fields.Requests, + } + rs.MaxMerge(tt.args.other, tt.args.resourceName) + assert.True(t, apiequality.Semantic.DeepEqual(rs.Requests, tt.args.want.Requests), "Unexpected requests") + assert.True(t, apiequality.Semantic.DeepEqual(rs.Limits, tt.args.want.Limits), "Unexpected limits") + }) + } +} + +func TestNamedTierResources_IsUsedBy(t *testing.T) { + type fields struct { + Name string + NodeSetNodeCount NodeSetNodeCountList + ResourcesSpecification NodeResources + } + type args struct { + nodeSet esv1.NodeSet + } + tests := []struct { + name string + fields fields + args args + want bool + wantErr bool + }{ + { + name: "Volume claim does not exist in nodeSet spec", + fields: fields{ + Name: "data-inject", + NodeSetNodeCount: NodeSetNodeCountList{NodeSetNodeCount{Name: "nodeset-1", NodeCount: 3}, NodeSetNodeCount{Name: "nodeset-2", NodeCount: 5}}, + ResourcesSpecification: NodeResources{ + Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceStorage: resource.MustParse("2Gi"), corev1.ResourceMemory: resource.MustParse("4Gi"), corev1.ResourceCPU: resource.MustParse("2000m")}, + }, + }, + args: args{nodeSet: newNodeSetBuilder("nodeset-2", 5).withMemoryRequest("4Gi").withCPURequest("2000m").build()}, + want: false, + }, + { + name: "Volume claim are not equals", + fields: fields{ + Name: "data-inject", + NodeSetNodeCount: NodeSetNodeCountList{NodeSetNodeCount{Name: "nodeset-1", NodeCount: 3}, NodeSetNodeCount{Name: "nodeset-2", NodeCount: 5}}, + ResourcesSpecification: NodeResources{ + Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceStorage: resource.MustParse("2Gi"), corev1.ResourceMemory: resource.MustParse("4Gi"), corev1.ResourceCPU: resource.MustParse("2000m")}, + }, + }, + args: args{nodeSet: newNodeSetBuilder("nodeset-2", 5).withStorageRequest("1Gi").withMemoryRequest("4Gi").withCPURequest("2000m").build()}, + want: false, + }, + { + name: "Node count is not the same", + fields: fields{ + Name: "data-inject", + NodeSetNodeCount: NodeSetNodeCountList{NodeSetNodeCount{Name: "nodeset-1", NodeCount: 3}, NodeSetNodeCount{Name: "nodeset-2", NodeCount: 6}}, + ResourcesSpecification: NodeResources{ + Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceStorage: resource.MustParse("2Gi"), corev1.ResourceMemory: resource.MustParse("4Gi"), corev1.ResourceCPU: resource.MustParse("2000m")}, + }, + }, + args: args{nodeSet: newNodeSetBuilder("nodeset-2", 5).withStorageRequest("2Gi").withMemoryRequest("4Gi").withCPURequest("2000m").build()}, + want: false, + }, + { + name: "Memory is not equal", + fields: fields{ + Name: "data-inject", + NodeSetNodeCount: NodeSetNodeCountList{NodeSetNodeCount{Name: "nodeset-1", NodeCount: 3}, NodeSetNodeCount{Name: "nodeset-2", NodeCount: 5}}, + ResourcesSpecification: NodeResources{ + Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceStorage: resource.MustParse("2Gi"), corev1.ResourceMemory: resource.MustParse("1Gi"), corev1.ResourceCPU: resource.MustParse("2000m")}, + }, + }, + args: args{nodeSet: newNodeSetBuilder("nodeset-2", 5).withMemoryRequest("4Gi").withCPURequest("2000m").build()}, + want: false, + }, + { + name: "CPU is not equal", + fields: fields{ + Name: "data-inject", + NodeSetNodeCount: NodeSetNodeCountList{NodeSetNodeCount{Name: "nodeset-1", NodeCount: 3}, NodeSetNodeCount{Name: "nodeset-2", NodeCount: 5}}, + ResourcesSpecification: NodeResources{ + Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceStorage: resource.MustParse("2Gi"), corev1.ResourceMemory: resource.MustParse("4Gi"), corev1.ResourceCPU: resource.MustParse("8000m")}, + }, + }, + args: args{nodeSet: newNodeSetBuilder("nodeset-2", 5).withStorageRequest("2Gi").withMemoryRequest("4Gi").withCPURequest("2000m").build()}, + want: false, + }, + { + name: "Happy path", + fields: fields{ + Name: "data-inject", + NodeSetNodeCount: NodeSetNodeCountList{NodeSetNodeCount{Name: "nodeset-1", NodeCount: 3}, NodeSetNodeCount{Name: "nodeset-2", NodeCount: 5}}, + ResourcesSpecification: NodeResources{ + Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceStorage: resource.MustParse("2Gi"), corev1.ResourceMemory: resource.MustParse("4Gi"), corev1.ResourceCPU: resource.MustParse("2000m")}, + }, + }, + args: args{nodeSet: newNodeSetBuilder("nodeset-2", 5).withStorageRequest("2Gi").withMemoryRequest("4Gi").withCPURequest("2000m").build()}, + want: true, + }, + { + name: "CPU and Memory are equal, no storage", + fields: fields{ + Name: "data-inject", + NodeSetNodeCount: NodeSetNodeCountList{NodeSetNodeCount{Name: "nodeset-1", NodeCount: 3}, NodeSetNodeCount{Name: "nodeset-2", NodeCount: 5}}, + ResourcesSpecification: NodeResources{ + Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: resource.MustParse("4Gi"), corev1.ResourceCPU: resource.MustParse("2000m")}, + }, + }, + args: args{nodeSet: newNodeSetBuilder("nodeset-2", 5).withMemoryRequest("4Gi").withCPURequest("2000m").build()}, + want: true, + }, + { + name: "Only memory", + fields: fields{ + Name: "data-inject", + NodeSetNodeCount: NodeSetNodeCountList{NodeSetNodeCount{Name: "nodeset-1", NodeCount: 3}, NodeSetNodeCount{Name: "nodeset-2", NodeCount: 5}}, + ResourcesSpecification: NodeResources{ + Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: resource.MustParse("4Gi")}, + }, + }, + args: args{nodeSet: newNodeSetBuilder("nodeset-2", 5).withMemoryRequest("4Gi").build()}, + want: true, + }, + { + name: "Only memory, not equal", + fields: fields{ + Name: "data-inject", + NodeSetNodeCount: NodeSetNodeCountList{NodeSetNodeCount{Name: "nodeset-1", NodeCount: 3}, NodeSetNodeCount{Name: "nodeset-2", NodeCount: 5}}, + ResourcesSpecification: NodeResources{ + Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: resource.MustParse("8Gi")}, + }, + }, + args: args{nodeSet: newNodeSetBuilder("nodeset-2", 5).withMemoryRequest("4Gi").build()}, + want: false, + }, + { + name: "Only CPU", + fields: fields{ + Name: "data-inject", + NodeSetNodeCount: NodeSetNodeCountList{NodeSetNodeCount{Name: "nodeset-1", NodeCount: 3}, NodeSetNodeCount{Name: "nodeset-2", NodeCount: 5}}, + ResourcesSpecification: NodeResources{ + Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceCPU: resource.MustParse("2000m")}, + }, + }, + args: args{nodeSet: newNodeSetBuilder("nodeset-2", 5).withCPURequest("2000m").build()}, + want: true, + }, + { + name: "Only CPU, not equal", + fields: fields{ + Name: "data-inject", + NodeSetNodeCount: NodeSetNodeCountList{NodeSetNodeCount{Name: "nodeset-1", NodeCount: 3}, NodeSetNodeCount{Name: "nodeset-2", NodeCount: 5}}, + ResourcesSpecification: NodeResources{ + Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceCPU: resource.MustParse("4000m")}, + }, + }, + args: args{nodeSet: newNodeSetBuilder("nodeset-2", 5).withCPURequest("2000m").build()}, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ntr := NodeSetsResources{ + Name: tt.fields.Name, + NodeSetNodeCount: tt.fields.NodeSetNodeCount, + NodeResources: tt.fields.ResourcesSpecification, + } + got, err := ntr.IsUsedBy(esv1.ElasticsearchContainerName, tt.args.nodeSet) + if (err != nil) != tt.wantErr { + t.Errorf("NodeSetsResources.IsUsedBy() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("NodeSetsResources.IsUsedBy() = %v, want %v", got, tt.want) + } + }) + } +} + +// - NodeSet builder + +type nodeSetBuilder struct { + name string + count int32 + memoryRequest, cpuRequest, storageRequest *resource.Quantity +} + +func newNodeSetBuilder(name string, count int) *nodeSetBuilder { + return &nodeSetBuilder{ + name: name, + count: int32(count), + } +} + +func (nsb *nodeSetBuilder) withMemoryRequest(qs string) *nodeSetBuilder { + q := resource.MustParse(qs) + nsb.memoryRequest = &q + return nsb +} + +func (nsb *nodeSetBuilder) withCPURequest(qs string) *nodeSetBuilder { + q := resource.MustParse(qs) + nsb.cpuRequest = &q + return nsb +} + +func (nsb *nodeSetBuilder) withStorageRequest(qs string) *nodeSetBuilder { + q := resource.MustParse(qs) + nsb.storageRequest = &q + return nsb +} + +func (nsb *nodeSetBuilder) build() esv1.NodeSet { + nodeSet := esv1.NodeSet{ + Name: nsb.name, + Config: nil, + Count: nsb.count, + PodTemplate: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: esv1.ElasticsearchContainerName, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{}, + }, + }, + }, + }, + }, + } + + // Set memory + if nsb.memoryRequest != nil { + nodeSet.PodTemplate.Spec.Containers[0].Resources.Requests[corev1.ResourceMemory] = *nsb.memoryRequest + } + + // Set CPU + if nsb.cpuRequest != nil { + nodeSet.PodTemplate.Spec.Containers[0].Resources.Requests[corev1.ResourceCPU] = *nsb.cpuRequest + } + + // Set storage + if nsb.storageRequest != nil { + storageRequest := corev1.ResourceList{} + storageRequest[corev1.ResourceStorage] = *nsb.storageRequest + nodeSet.VolumeClaimTemplates = append(nodeSet.VolumeClaimTemplates, + corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: volume.ElasticsearchDataVolumeName, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: *nsb.storageRequest, + }, + }, + }, + }, + ) + } + return nodeSet +} diff --git a/pkg/controller/autoscaling/elasticsearch/status/actual.go b/pkg/controller/autoscaling/elasticsearch/status/actual.go new file mode 100644 index 0000000000..91c34b6872 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/status/actual.go @@ -0,0 +1,169 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package status + +import ( + "context" + "fmt" + + esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/resources" + "github.com/elastic/cloud-on-k8s/pkg/utils/k8s" + "github.com/go-logr/logr" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// ImportExistingResources attempts to infer the resources to use in a tier if an autoscaling policy is not in the Status. +// It can be the case if: +// * The cluster was manually managed and the user wants to manage resources with the autoscaling controller. In that case +// we want to be able to set some good default resources even if the autoscaling API is not responding. +// * The Elasticsearch has been replaced and the status annotation has been lost. +func (s *Status) ImportExistingResources( + log logr.Logger, + c k8s.Client, + as esv1.AutoscalingSpec, + namedTiers esv1.AutoscaledNodeSets, +) error { + for _, autoscalingPolicy := range as.AutoscalingPolicySpecs { + if _, inStatus := s.GetNamedTierResources(autoscalingPolicy.Name); inStatus { + // This autoscaling policy is already managed and we have some resources in the Status. + continue + } + // Get the nodeSets + nodeSetList, exists := namedTiers[autoscalingPolicy.Name] + if !exists { + // Not supposed to happen with a proper validation in place, but we still want to report this error + return fmt.Errorf("no nodeSet associated to autoscaling policy %s", autoscalingPolicy.Name) + } + resources, err := namedTierResourcesFromStatefulSets(c, as.Elasticsearch, autoscalingPolicy, nodeSetList.Names()) + if err != nil { + return err + } + if resources == nil { + // No StatefulSet, the cluster or the tier might be a new one. + continue + } + log.Info("Importing resources from existing StatefulSets", + "policy", autoscalingPolicy.Name, + "nodeset", resources.NodeSetNodeCount, + "count", resources.NodeSetNodeCount.TotalNodeCount(), + "resources", resources.ToInt64(), + ) + // We only want to save the status the resources + s.AutoscalingPolicyStatuses = append(s.AutoscalingPolicyStatuses, + AutoscalingPolicyStatus{ + Name: autoscalingPolicy.Name, + NodeSetNodeCount: resources.NodeSetNodeCount, + ResourcesSpecification: resources.NodeResources, + }) + } + return nil +} + +// namedTierResourcesFromStatefulSets creates NodeSetsResources from existing StatefulSets +func namedTierResourcesFromStatefulSets( + c k8s.Client, + es esv1.Elasticsearch, + autoscalingPolicySpec esv1.AutoscalingPolicySpec, + nodeSets []string, +) (*resources.NodeSetsResources, error) { + namedTierResources := resources.NodeSetsResources{ + Name: autoscalingPolicySpec.Name, + } + found := false + // For each nodeSet: + // 1. we try to get the corresponding StatefulSet + // 2. we build a NodeSetsResources from the max. resources of each StatefulSet + for _, nodeSetName := range nodeSets { + statefulSetName := esv1.StatefulSet(es.Name, nodeSetName) + statefulSet := appsv1.StatefulSet{} + err := c.Get( + context.Background(), + client.ObjectKey{ + Namespace: es.Namespace, + Name: statefulSetName, + }, &statefulSet) + if errors.IsNotFound(err) { + continue + } + if err != nil { + return nil, err + } + + found = true + namedTierResources.NodeSetNodeCount = append(namedTierResources.NodeSetNodeCount, resources.NodeSetNodeCount{ + Name: nodeSetName, + NodeCount: getStatefulSetReplicas(statefulSet), + }) + + // Get data volume volume size + ssetStorageRequest, err := getElasticsearchDataVolumeQuantity(statefulSet) + if err != nil { + return nil, err + } + if ssetStorageRequest != nil && autoscalingPolicySpec.IsStorageDefined() { + if namedTierResources.HasRequest(corev1.ResourceStorage) { + if ssetStorageRequest.Cmp(namedTierResources.GetRequest(corev1.ResourceStorage)) > 0 { + namedTierResources.SetRequest(corev1.ResourceStorage, *ssetStorageRequest) + } + } else { + namedTierResources.SetRequest(corev1.ResourceStorage, *ssetStorageRequest) + } + } + + // Get the memory and the CPU if any + container := getContainer(esv1.ElasticsearchContainerName, statefulSet.Spec.Template.Spec.Containers) + if container == nil { + continue + } + if autoscalingPolicySpec.IsMemoryDefined() { + namedTierResources.MaxMerge(container.Resources, corev1.ResourceMemory) + } + if autoscalingPolicySpec.IsCPUDefined() { + namedTierResources.MaxMerge(container.Resources, corev1.ResourceCPU) + } + } + if !found { + return nil, nil + } + return &namedTierResources, nil +} + +// getElasticsearchDataVolumeQuantity returns the volume claim quantity for the esv1.ElasticsearchDataVolumeName volume +func getElasticsearchDataVolumeQuantity(statefulSet appsv1.StatefulSet) (*resource.Quantity, error) { + if len(statefulSet.Spec.VolumeClaimTemplates) > 1 { + // We do not support nodeSets with more than one volume. + return nil, fmt.Errorf("autoscaling does not support nodeSet with more than one volume claim") + } + + if len(statefulSet.Spec.VolumeClaimTemplates) == 1 { + volumeClaimTemplate := statefulSet.Spec.VolumeClaimTemplates[0] + ssetStorageRequest, ssetHasStorageRequest := volumeClaimTemplate.Spec.Resources.Requests[corev1.ResourceStorage] + if ssetHasStorageRequest { + return &ssetStorageRequest, nil + } + } + return nil, nil +} + +func getStatefulSetReplicas(sset appsv1.StatefulSet) int32 { + if sset.Spec.Replicas != nil { + return *sset.Spec.Replicas + } + return 0 +} + +func getContainer(containerName string, containers []corev1.Container) *corev1.Container { + for _, container := range containers { + if container.Name == containerName { + return &container + } + } + return nil +} diff --git a/pkg/controller/autoscaling/elasticsearch/status/actual_test.go b/pkg/controller/autoscaling/elasticsearch/status/actual_test.go new file mode 100644 index 0000000000..b202678e8b --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/status/actual_test.go @@ -0,0 +1,284 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package status + +import ( + "reflect" + "testing" + + esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/resources" + "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/volume" + "github.com/elastic/cloud-on-k8s/pkg/utils/k8s" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" +) + +func TestNamedTierResourcesFromStatefulSets(t *testing.T) { + type args struct { + statefulSets []runtime.Object + es esv1.Elasticsearch + autoscalingPolicySpec esv1.AutoscalingPolicySpec + nodeSets []string + } + tests := []struct { + name string + args args + wantNamedTierResources *resources.NodeSetsResources + wantErr bool + }{ + { + name: "No existing StatefulSet", + args: args{ + statefulSets: []runtime.Object{ /* no existing StatefulSet */ }, + es: esv1.Elasticsearch{ObjectMeta: metav1.ObjectMeta{Name: "esname", Namespace: "esns"}}, + autoscalingPolicySpec: esv1.AutoscalingPolicySpec{ + NamedAutoscalingPolicy: esv1.NamedAutoscalingPolicy{Name: "aspec"}, + AutoscalingResources: esv1.AutoscalingResources{Storage: &esv1.QuantityRange{Min: resource.MustParse("7Gi"), Max: resource.MustParse("50Gi")}}}, + nodeSets: []string{"nodeset-1", "nodeset-2"}, + }, + wantNamedTierResources: nil, + }, + { + name: "Has existing resources only with storage", + args: args{ + statefulSets: []runtime.Object{ + buildStatefulSet( + "nodeset-1", + 3, + map[string]corev1.ResourceRequirements{}, + map[string]resource.Quantity{volume.ElasticsearchDataVolumeName: resource.MustParse("5Gi")}, + ), + buildStatefulSet( + "nodeset-2", + 2, + map[string]corev1.ResourceRequirements{}, + map[string]resource.Quantity{volume.ElasticsearchDataVolumeName: resource.MustParse("10Gi")}, + ), + }, + es: esv1.Elasticsearch{ObjectMeta: metav1.ObjectMeta{Name: "esname", Namespace: "esns"}}, + autoscalingPolicySpec: esv1.AutoscalingPolicySpec{ + NamedAutoscalingPolicy: esv1.NamedAutoscalingPolicy{Name: "aspec"}, + AutoscalingResources: esv1.AutoscalingResources{Storage: &esv1.QuantityRange{Min: resource.MustParse("7Gi"), Max: resource.MustParse("50Gi")}}}, + nodeSets: []string{"nodeset-1", "nodeset-2"}, + }, + wantNamedTierResources: &resources.NodeSetsResources{ + Name: "aspec", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "nodeset-1", NodeCount: 3}, {Name: "nodeset-2", NodeCount: 2}}, + NodeResources: resources.NodeResources{ + Requests: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceStorage: resource.MustParse("10Gi"), + }, + }, + }, + }, + { + name: "Has existing resources, happy path", + args: args{ + statefulSets: []runtime.Object{ + buildStatefulSet( + "nodeset-1", + 3, + map[string]corev1.ResourceRequirements{"elasticsearch": { + Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: resource.MustParse("32Gi")}, + }}, + map[string]resource.Quantity{volume.ElasticsearchDataVolumeName: resource.MustParse("5Gi")}, + ), + buildStatefulSet( + "nodeset-2", + 2, + map[string]corev1.ResourceRequirements{"elasticsearch": { + Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: resource.MustParse("24Gi")}, + }}, + map[string]resource.Quantity{volume.ElasticsearchDataVolumeName: resource.MustParse("10Gi")}, + ), + }, + es: esv1.Elasticsearch{ObjectMeta: metav1.ObjectMeta{Name: "esname", Namespace: "esns"}}, + autoscalingPolicySpec: esv1.AutoscalingPolicySpec{ + NamedAutoscalingPolicy: esv1.NamedAutoscalingPolicy{Name: "aspec"}, + AutoscalingResources: esv1.AutoscalingResources{ + Memory: &esv1.QuantityRange{Min: resource.MustParse("12Gi"), Max: resource.MustParse("64Gi")}, + Storage: &esv1.QuantityRange{Min: resource.MustParse("7Gi"), Max: resource.MustParse("50Gi")}, + }, + }, + nodeSets: []string{"nodeset-1", "nodeset-2"}, + }, + wantNamedTierResources: &resources.NodeSetsResources{ + Name: "aspec", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "nodeset-1", NodeCount: 3}, {Name: "nodeset-2", NodeCount: 2}}, + NodeResources: resources.NodeResources{ + Requests: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceMemory: resource.MustParse("32Gi"), + corev1.ResourceStorage: resource.MustParse("10Gi"), + }, + }, + }, + }, + { + name: "No volume claim", + args: args{ + statefulSets: []runtime.Object{ + buildStatefulSet( + "nodeset-1", + 3, + map[string]corev1.ResourceRequirements{"elasticsearch": { + Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: resource.MustParse("32Gi")}, + }}, + map[string]resource.Quantity{}, + ), + buildStatefulSet( + "nodeset-2", + 2, + map[string]corev1.ResourceRequirements{"elasticsearch": { + Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: resource.MustParse("24Gi")}, + }}, + map[string]resource.Quantity{}, + ), + }, + es: esv1.Elasticsearch{ObjectMeta: metav1.ObjectMeta{Name: "esname", Namespace: "esns"}}, + autoscalingPolicySpec: esv1.AutoscalingPolicySpec{ + NamedAutoscalingPolicy: esv1.NamedAutoscalingPolicy{Name: "aspec"}, + AutoscalingResources: esv1.AutoscalingResources{ + Memory: &esv1.QuantityRange{Min: resource.MustParse("12Gi"), Max: resource.MustParse("64Gi")}, + Storage: &esv1.QuantityRange{Min: resource.MustParse("7Gi"), Max: resource.MustParse("50Gi")}, + }, + }, + nodeSets: []string{"nodeset-1", "nodeset-2"}, + }, + wantNamedTierResources: &resources.NodeSetsResources{ + Name: "aspec", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "nodeset-1", NodeCount: 3}, {Name: "nodeset-2", NodeCount: 2}}, + NodeResources: resources.NodeResources{ + Requests: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceMemory: resource.MustParse("32Gi"), + }, + }, + }, + }, + { + name: "Several volume claims", + args: args{ + statefulSets: []runtime.Object{ + buildStatefulSet( + "nodeset-1", + 3, + map[string]corev1.ResourceRequirements{}, + map[string]resource.Quantity{volume.ElasticsearchDataVolumeName: resource.MustParse("5Gi")}, + ), + buildStatefulSet( + "nodeset-2", + 2, + map[string]corev1.ResourceRequirements{}, + map[string]resource.Quantity{volume.ElasticsearchDataVolumeName: resource.MustParse("10Gi"), "other": resource.MustParse("10Gi")}, + ), + }, + es: esv1.Elasticsearch{ObjectMeta: metav1.ObjectMeta{Name: "esname", Namespace: "esns"}}, + autoscalingPolicySpec: esv1.AutoscalingPolicySpec{NamedAutoscalingPolicy: esv1.NamedAutoscalingPolicy{Name: "aspec"}}, + nodeSets: []string{"nodeset-1", "nodeset-2"}, + }, + wantErr: true, + wantNamedTierResources: nil, + }, + { + name: "Not the default volume claims", + args: args{ + statefulSets: []runtime.Object{ + buildStatefulSet( + "nodeset-1", + 3, + map[string]corev1.ResourceRequirements{}, + map[string]resource.Quantity{volume.ElasticsearchDataVolumeName: resource.MustParse("5Gi")}, + ), + buildStatefulSet( + "nodeset-2", + 2, + map[string]corev1.ResourceRequirements{}, + map[string]resource.Quantity{"other": resource.MustParse("10Gi")}, + ), + }, + es: esv1.Elasticsearch{ObjectMeta: metav1.ObjectMeta{Name: "esname", Namespace: "esns"}}, + autoscalingPolicySpec: esv1.AutoscalingPolicySpec{ + NamedAutoscalingPolicy: esv1.NamedAutoscalingPolicy{Name: "aspec"}, + AutoscalingResources: esv1.AutoscalingResources{ + Memory: &esv1.QuantityRange{Min: resource.MustParse("12Gi"), Max: resource.MustParse("64Gi")}, + Storage: &esv1.QuantityRange{Min: resource.MustParse("7Gi"), Max: resource.MustParse("50Gi")}, + }, + }, + nodeSets: []string{"nodeset-1", "nodeset-2"}, + }, + wantErr: false, + wantNamedTierResources: &resources.NodeSetsResources{ + Name: "aspec", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "nodeset-1", NodeCount: 3}, {Name: "nodeset-2", NodeCount: 2}}, + NodeResources: resources.NodeResources{ + Requests: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceStorage: resource.MustParse("10Gi"), + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := k8s.NewFakeClient(tt.args.statefulSets...) + got, err := namedTierResourcesFromStatefulSets(c, tt.args.es, tt.args.autoscalingPolicySpec, tt.args.nodeSets) + if (err != nil) != tt.wantErr { + t.Errorf("namedTierResourcesFromStatefulSets() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.wantNamedTierResources) { + t.Errorf("namedTierResourcesFromStatefulSets() got = %v, want %v", got, tt.wantNamedTierResources) + } + }) + } +} + +func buildStatefulSet( + nodeSetName string, replicas int, + containersResources map[string]corev1.ResourceRequirements, + volumeClaimTemplates map[string]resource.Quantity, +) *appsv1.StatefulSet { + statefulSet := appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: esv1.StatefulSet("esname", nodeSetName), + Namespace: "esns", + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: int32ptr(replicas), + }, + } + + // Add volumes + for volumeName, volumeRequest := range volumeClaimTemplates { + pvc := corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: volumeName}, + Spec: corev1.PersistentVolumeClaimSpec{ + Resources: corev1.ResourceRequirements{ + Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceStorage: volumeRequest}, + }, + }, + } + statefulSet.Spec.VolumeClaimTemplates = append(statefulSet.Spec.VolumeClaimTemplates, pvc) + } + + // Add containers + for containerName, containerResources := range containersResources { + container := corev1.Container{ + Name: containerName, + Resources: containerResources, + } + statefulSet.Spec.Template.Spec.Containers = append(statefulSet.Spec.Template.Spec.Containers, container) + } + + return &statefulSet +} + +func int32ptr(i int) *int32 { + v := int32(i) + return &v +} diff --git a/pkg/controller/autoscaling/elasticsearch/status/events.go b/pkg/controller/autoscaling/elasticsearch/status/events.go new file mode 100644 index 0000000000..6f9c4e3079 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/status/events.go @@ -0,0 +1,29 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package status + +import ( + "strings" + + esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/client-go/tools/record" +) + +// EmitEvents emits a selected type of event on the Kubernetes cluster event channel. +func EmitEvents(elasticsearch esv1.Elasticsearch, recorder record.EventRecorder, status Status) { + for _, status := range status.AutoscalingPolicyStatuses { + emitEventForAutoscalingPolicy(elasticsearch, recorder, status) + } +} + +func emitEventForAutoscalingPolicy(elasticsearch esv1.Elasticsearch, recorder record.EventRecorder, status AutoscalingPolicyStatus) { + for _, event := range status.PolicyStates { + switch event.Type { + case VerticalScalingLimitReached, HorizontalScalingLimitReached, MemoryRequired, StorageRequired: + recorder.Event(&elasticsearch, corev1.EventTypeWarning, string(event.Type), strings.Join(event.Messages, ". ")) + } + } +} diff --git a/pkg/controller/autoscaling/elasticsearch/status/status.go b/pkg/controller/autoscaling/elasticsearch/status/status.go new file mode 100644 index 0000000000..a082a8fd5d --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/status/status.go @@ -0,0 +1,212 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package status + +import ( + "encoding/json" + + esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/resources" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + ElasticsearchAutoscalingStatusAnnotationName = "elasticsearch.alpha.elastic.co/autoscaling-status" + + VerticalScalingLimitReached PolicyStateType = "VerticalScalingLimitReached" + HorizontalScalingLimitReached PolicyStateType = "HorizontalScalingLimitReached" + MemoryRequired PolicyStateType = "MemoryRequired" + EmptyResponse PolicyStateType = "EmptyResponse" + StorageRequired PolicyStateType = "StorageRequired" + NoNodeSet PolicyStateType = "NoNodeSet" +) + +type Status struct { + // PolicyStatus is used to expose state messages to user or external system + AutoscalingPolicyStatuses []AutoscalingPolicyStatus `json:"policies"` +} + +type AutoscalingPolicyStatus struct { + // Name is the name of the autoscaling policy + Name string `json:"name"` + // NodeSetNodeCount holds the number of nodes for each nodeSet. + NodeSetNodeCount resources.NodeSetNodeCountList `json:"nodeSets"` + // ResourcesSpecification holds the resource values common to all the nodeSet managed by a same autoscaling policy. + // Only the resources managed by the autoscaling controller are saved in the Status. + ResourcesSpecification resources.NodeResources `json:"resources"` + // PolicyStates may contain various messages regarding the current state of this autoscaling policy. + PolicyStates []PolicyState `json:"state"` + // LastModificationTime is the last time the resources have been updated, used by the cooldown algorithm. + LastModificationTime metav1.Time `json:"lastModificationTime"` +} + +func (s *Status) GetNamedTierResources(policyName string) (resources.NodeSetsResources, bool) { + for _, policyStatus := range s.AutoscalingPolicyStatuses { + if policyStatus.Name == policyName { + return resources.NodeSetsResources{ + Name: policyStatus.Name, + NodeSetNodeCount: policyStatus.NodeSetNodeCount, + NodeResources: policyStatus.ResourcesSpecification, + }, true + } + } + return resources.NodeSetsResources{}, false +} + +func (s *Status) GetLastModificationTime(policyName string) (metav1.Time, bool) { + for _, policyState := range s.AutoscalingPolicyStatuses { + if policyState.Name == policyName { + return policyState.LastModificationTime, true + } + } + return metav1.Time{}, false +} + +type AutoscalingPolicyStatusBuilder struct { + policyName string + namedTierResources resources.NodeSetsResources + lastModificationTime metav1.Time + states map[PolicyStateType]PolicyState +} + +func NewAutoscalingPolicyStatusBuilder(name string) *AutoscalingPolicyStatusBuilder { + return &AutoscalingPolicyStatusBuilder{ + policyName: name, + states: make(map[PolicyStateType]PolicyState), + } +} + +func (psb *AutoscalingPolicyStatusBuilder) Build() AutoscalingPolicyStatus { + policyStates := make([]PolicyState, len(psb.states)) + i := 0 + for _, v := range psb.states { + policyStates[i] = PolicyState{ + Type: v.Type, + Messages: v.Messages, + } + i++ + } + return AutoscalingPolicyStatus{ + Name: psb.policyName, + NodeSetNodeCount: psb.namedTierResources.NodeSetNodeCount, + ResourcesSpecification: psb.namedTierResources.NodeResources, + LastModificationTime: psb.lastModificationTime, + PolicyStates: policyStates, + } +} + +// SetNamedTierResources sets the compute resources associated to a tier. +func (psb *AutoscalingPolicyStatusBuilder) SetNamedTierResources(namedTierResources resources.NodeSetsResources) *AutoscalingPolicyStatusBuilder { + psb.namedTierResources = namedTierResources + return psb +} + +func (psb *AutoscalingPolicyStatusBuilder) SetLastModificationTime(lastModificationTime metav1.Time) *AutoscalingPolicyStatusBuilder { + psb.lastModificationTime = lastModificationTime + return psb +} + +// WithEvent records a new event (type + message) for the tier. +func (psb *AutoscalingPolicyStatusBuilder) WithEvent(stateType PolicyStateType, message string) *AutoscalingPolicyStatusBuilder { + if policyState, ok := psb.states[stateType]; ok { + policyState.Messages = append(policyState.Messages, message) + psb.states[stateType] = policyState + return psb + } + psb.states[stateType] = PolicyState{ + Type: stateType, + Messages: []string{message}, + } + return psb +} + +type PolicyStateType string + +type PolicyState struct { + Type PolicyStateType `json:"type"` + Messages []string `json:"messages"` +} + +type AutoscalingStatusBuilder struct { + policyStatesBuilder map[string]*AutoscalingPolicyStatusBuilder +} + +func NewAutoscalingStatusBuilder() *AutoscalingStatusBuilder { + return &AutoscalingStatusBuilder{ + policyStatesBuilder: make(map[string]*AutoscalingPolicyStatusBuilder), + } +} + +func (psb *AutoscalingStatusBuilder) ForPolicy(policyName string) *AutoscalingPolicyStatusBuilder { + if value, ok := psb.policyStatesBuilder[policyName]; ok { + return value + } + policyStatusBuilder := NewAutoscalingPolicyStatusBuilder(policyName) + psb.policyStatesBuilder[policyName] = policyStatusBuilder + return policyStatusBuilder +} + +func (psb *AutoscalingStatusBuilder) Build() Status { + policyStates := make([]AutoscalingPolicyStatus, len(psb.policyStatesBuilder)) + i := 0 + for _, policyStateBuilder := range psb.policyStatesBuilder { + policyStates[i] = policyStateBuilder.Build() + i++ + } + + return Status{ + AutoscalingPolicyStatuses: policyStates, + } +} + +func GetStatus(es esv1.Elasticsearch) (Status, error) { + status := Status{} + if es.Annotations == nil { + return status, nil + } + serializedStatus, ok := es.Annotations[ElasticsearchAutoscalingStatusAnnotationName] + if !ok { + return status, nil + } + err := json.Unmarshal([]byte(serializedStatus), &status) + return status, err +} + +func UpdateAutoscalingStatus( + es *esv1.Elasticsearch, + statusBuilder *AutoscalingStatusBuilder, + nextClusterResources resources.ClusterResources, + actualAutoscalingStatus Status, +) error { + // Update the timestamp on tiers resources + now := metav1.Now() + for _, nextNodeSetResource := range nextClusterResources { + // Save the resources in the status + statusBuilder.ForPolicy(nextNodeSetResource.Name).SetNamedTierResources(nextNodeSetResource) + + // Restore the previous timestamp + previousTimestamp, ok := actualAutoscalingStatus.GetLastModificationTime(nextNodeSetResource.Name) + if ok { + statusBuilder.ForPolicy(nextNodeSetResource.Name).SetLastModificationTime(previousTimestamp) + } + + actualNodeSetResource, ok := actualAutoscalingStatus.GetNamedTierResources(nextNodeSetResource.Name) + if !ok || !actualNodeSetResource.SameResources(nextNodeSetResource) { + statusBuilder.ForPolicy(nextNodeSetResource.Name).SetLastModificationTime(now) + } + } + + // Create the annotation + if es.Annotations == nil { + es.Annotations = make(map[string]string) + } + status := statusBuilder.Build() + serializedStatus, err := json.Marshal(&status) + if err != nil { + return err + } + es.Annotations[ElasticsearchAutoscalingStatusAnnotationName] = string(serializedStatus) + return nil +} diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/cluster-creation/elasticsearch-expected.yml b/pkg/controller/autoscaling/elasticsearch/testdata/cluster-creation/elasticsearch-expected.yml new file mode 100644 index 0000000000..e580443ced --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/testdata/cluster-creation/elasticsearch-expected.yml @@ -0,0 +1,130 @@ +apiVersion: elasticsearch.k8s.elastic.co/v1 +kind: Elasticsearch +metadata: + annotations: + elasticsearch.alpha.elastic.co/autoscaling-spec: '{ + "policies": [{ + "name": "di", + "roles": ["data", "ingest"], + "deciders": { + "proactive_storage": { + "forecast_window": "5m" + } + }, + "resources": { + "nodeCount": { + "min": 3, + "max": 8 + }, + "cpu": { + "min": 2, + "max": 6 + }, + "memory": { + "min": "2Gi", + "max": "8Gi" + }, + "storage": { + "min": "1Gi", + "max": "4Gi" + } + } + }, + { + "name": "ml", + "roles": ["ml"], + "deciders": { + "ml": { + "down_scale_delay": "5m" + } + }, + "resources": { + "nodeCount": { + "min": 1, + "max": 9 + }, + "cpu": { + "min": 2, + "max": 2 + }, + "memory": { + "min": "2Gi", + "max": "6Gi" + }, + "storage": { + "min": "1Gi", + "max": "2Gi" + } + } + }] + }' + elasticsearch.alpha.elastic.co/autoscaling-status: '{"policies":[{"name":"di","nodeSets":[{"name":"di","nodeCount":3}],"resources":{"requests":{"cpu":"2","memory":"2Gi","storage":"1Gi"}},"state":[],"lastModificationTime":"2021-01-17T05:59:22Z"},{"name":"ml","nodeSets":[{"name":"ml","nodeCount":1}],"resources":{"requests":{"cpu":"2","memory":"2Gi"}},"state":[],"lastModificationTime":"2021-01-17T13:25:18Z"}]}' + elasticsearch.k8s.elastic.co/cluster-uuid: FghvC9XFS16wDXdAusm9yg + name: testes + namespace: testns + uid: 0e400c1f-57ff-4d6e-99e7-ce9ab8a83930 +spec: + nodeSets: + - config: + node: + roles: + - master + count: 1 + name: master + - config: + node: + roles: + - data + - ingest + count: 3 + name: di + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: 2Gi + requests: + cpu: "2" + memory: 2Gi + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + - config: + node: + roles: + - ml + count: 1 + name: ml + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: 2Gi + requests: + cpu: "2" + memory: 2Gi + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + version: 7.11.0 +status: + availableNodes: 10 + health: green + phase: Ready + version: 7.11.0 diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/cluster-creation/elasticsearch.yml b/pkg/controller/autoscaling/elasticsearch/testdata/cluster-creation/elasticsearch.yml new file mode 100644 index 0000000000..f09fda0ac6 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/testdata/cluster-creation/elasticsearch.yml @@ -0,0 +1,80 @@ +# Brand new Elasticsearch resource with dedicated data and ml tiers. +apiVersion: elasticsearch.k8s.elastic.co/v1 +kind: Elasticsearch +metadata: + name: testes + namespace: testns + annotations: + elasticsearch.alpha.elastic.co/autoscaling-spec: '{ + "policies": [{ + "name": "di", + "roles": ["data", "ingest"], + "deciders": { + "proactive_storage": { + "forecast_window": "5m" + } + }, + "resources": { + "nodeCount": { + "min": 3, + "max": 8 + }, + "cpu": { + "min": 2, + "max": 6 + }, + "memory": { + "min": "2Gi", + "max": "8Gi" + }, + "storage": { + "min": "1Gi", + "max": "4Gi" + } + } + }, + { + "name": "ml", + "roles": ["ml"], + "deciders": { + "ml": { + "down_scale_delay": "5m" + } + }, + "resources": { + "nodeCount": { + "min": 1, + "max": 9 + }, + "cpu": { + "min": 2, + "max": 2 + }, + "memory": { + "min": "2Gi", + "max": "6Gi" + }, + "storage": { + "min": "1Gi", + "max": "2Gi" + } + } + }] + }' +spec: + version: 7.11.0 + nodeSets: + - name: master + count: 1 + config: + node: + roles: [ "master" ] + - name: di + config: + node: + roles: [ "data", "ingest" ] + - name: ml + config: + node: + roles: [ "ml" ] + diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/capacity.json b/pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/capacity.json new file mode 100644 index 0000000000..ae2489c77c --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/capacity.json @@ -0,0 +1,91 @@ +{ + "policies": { + "di": { + "required_capacity": {}, + "current_capacity": { + "node": { + "storage": 4193976320, + "memory": 8589934592 + }, + "total": { + "storage": 33384038400, + "memory": 68719476736 + } + }, + "current_nodes": [], + "deciders": { + "proactive_storage": { + "required_capacity": {}, + "reason_summary": "not enough storage available, needs 3.4gb", + "reason_details": { + "reason": "not enough storage available, needs 3.4gb", + "unassigned": 0, + "assigned": 3722575856, + "forecasted": 0, + "forecast_window": "5m" + } + }, + "reactive_storage": { + "required_capacity": {}, + "reason_summary": "", + "reason_details": {} + } + } + }, + "ml": { + "required_capacity": {}, + "current_capacity": { + "node": { + "storage": 0, + "memory": 2147483648 + }, + "total": { + "storage": 0, + "memory": 2147483648 + } + }, + "current_nodes": [ + { + "name": "testes-es-ml-0" + } + ], + "deciders": { + "ml": { + "required_capacity": { + "node": { + "memory": 0 + }, + "total": { + "memory": 0 + } + }, + "reason_summary": "Requesting scale down as tier and/or node size could be smaller", + "reason_details": { + "waiting_analytics_jobs": [], + "waiting_anomaly_jobs": [], + "configuration": { + "down_scale_delay": "5m" + }, + "perceived_current_capacity": { + "node": { + "memory": 2147483646 + }, + "total": { + "memory": 2147483647 + } + }, + "required_capacity": { + "node": { + "memory": 0 + }, + "total": { + "memory": 0 + } + }, + "reason": "Requesting scale down as tier and/or node size could be smaller" + } + } + } + } + } +} diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/elasticsearch-expected.yml b/pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/elasticsearch-expected.yml new file mode 100644 index 0000000000..ad10c8ae1f --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/elasticsearch-expected.yml @@ -0,0 +1,75 @@ +apiVersion: elasticsearch.k8s.elastic.co/v1 +kind: Elasticsearch +metadata: + annotations: + elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "di", "roles": ["data", "ingest"], "resources": { "nodeCount": { "min": 3, "max": 9 }, "cpu": { "min": 2, "max": 6 }, "memory": { "min": "2Gi", "max": "8Gi" }, "storage": { "min": "1Gi", "max": "4Gi" } } }, { "name": "ml", "roles": ["ml"], "deciders": { "ml": { "down_scale_delay": "5m" } }, "resources": { "nodeCount": { "min": 1, "max": 9 }, "cpu": { "min": 2, "max": 2 }, "memory": { "min": "2Gi", "max": "6Gi" }, "storage": { "min": "1Gi", "max": "2Gi" } } }] }' + elasticsearch.alpha.elastic.co/autoscaling-status: '{"policies":[{"name":"di","nodeSets":[{"name":"di","nodeCount":8}],"resources":{"requests":{"cpu":"6","memory":"8Gi","storage":"4Gi"}},"state":[],"lastModificationTime":"2021-01-17T05:59:22Z"},{"name":"ml","nodeSets":[{"name":"ml","nodeCount":1}],"resources":{"requests":{"cpu":"2","memory":"2Gi"}},"state":[],"lastModificationTime":"2021-01-17T13:25:18Z"}]}' + elasticsearch.k8s.elastic.co/cluster-uuid: FghvC9XFS16wDXdAusm9yg + name: testes + namespace: testns + uid: 0e400c1f-57ff-4d6e-99e7-ce9ab8a83930 +spec: + nodeSets: + - config: + node: + roles: + - master + count: 1 + name: master + - config: + node: + roles: + - data + - ingest + count: 8 + name: di + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: 8Gi + requests: + cpu: "6" + memory: 8Gi + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 4Gi + - config: + node: + roles: + - ml + count: 1 + name: ml + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: 2Gi + requests: + cpu: "2" + memory: 2Gi + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + version: 7.11.0 +status: + availableNodes: 10 + health: green + phase: Ready + version: 7.11.0 diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/elasticsearch.yml b/pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/elasticsearch.yml new file mode 100644 index 0000000000..51336f0035 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/elasticsearch.yml @@ -0,0 +1,75 @@ +apiVersion: elasticsearch.k8s.elastic.co/v1 +kind: Elasticsearch +metadata: + annotations: + elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "di", "roles": ["data", "ingest"], "resources": { "nodeCount": { "min": 3, "max": 9 }, "cpu": { "min": 2, "max": 6 }, "memory": { "min": "2Gi", "max": "8Gi" }, "storage": { "min": "1Gi", "max": "4Gi" } } }, { "name": "ml", "roles": ["ml"], "deciders": { "ml": { "down_scale_delay": "5m" } }, "resources": { "nodeCount": { "min": 1, "max": 9 }, "cpu": { "min": 2, "max": 2 }, "memory": { "min": "2Gi", "max": "6Gi" }, "storage": { "min": "1Gi", "max": "2Gi" } } }] }' + elasticsearch.alpha.elastic.co/autoscaling-status: '{"policies":[{"name":"di","nodeSets":[{"name":"di","nodeCount":8}],"resources":{"requests":{"cpu":"6","memory":"8Gi","storage":"4Gi"}},"state":[{"type":"HorizontalScalingLimitReached","messages":["Can''t provide total required storage 37106614256, max number of nodes is 8, requires 9 nodes"]}],"lastModificationTime":"2021-01-17T05:59:22Z"},{"name":"ml","nodeSets":[{"name":"ml","nodeCount":1}],"resources":{"requests":{"cpu":"2","memory":"2Gi"}},"state":[],"lastModificationTime":"2021-01-17T13:25:18Z"}]}' + elasticsearch.k8s.elastic.co/cluster-uuid: FghvC9XFS16wDXdAusm9yg + name: testes + namespace: testns + uid: 0e400c1f-57ff-4d6e-99e7-ce9ab8a83930 +spec: + nodeSets: + - config: + node: + roles: + - master + count: 1 + name: master + - config: + node: + roles: + - data + - ingest + count: 8 + name: di + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: 8Gi + requests: + cpu: "6" + memory: 8Gi + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 4Gi + - config: + node: + roles: + - ml + count: 1 + name: ml + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: 2Gi + requests: + cpu: "2" + memory: 2Gi + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + version: 7.11.0 +status: + availableNodes: 10 + health: green + phase: Ready + version: 7.11.0 diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/capacity.json b/pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/capacity.json new file mode 100644 index 0000000000..8450ffd4a1 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/capacity.json @@ -0,0 +1,148 @@ +{ + "policies": { + "di": { + "required_capacity": { + "node": { + "storage": 3722575856 + }, + "total": { + "storage": 37106614256 + } + }, + "current_capacity": { + "node": { + "storage": 4193976320, + "memory": 8589934592 + }, + "total": { + "storage": 33384038400, + "memory": 68719476736 + } + }, + "current_nodes": [ + { + "name": "testes-es-di-0" + }, + { + "name": "testes-es-di-1" + }, + { + "name": "testes-es-di-2" + }, + { + "name": "testes-es-di-3" + }, + { + "name": "testes-es-di-4" + }, + { + "name": "testes-es-di-5" + }, + { + "name": "testes-es-di-6" + }, + { + "name": "testes-es-di-7" + } + ], + "deciders": { + "proactive_storage": { + "required_capacity": { + "node": { + "storage": 3722575856 + }, + "total": { + "storage": 37106614256 + } + }, + "reason_summary": "not enough storage available, needs 3.4gb", + "reason_details": { + "reason": "not enough storage available, needs 3.4gb", + "unassigned": 0, + "assigned": 3722575856, + "forecasted": 0, + "forecast_window": "5m" + } + }, + "reactive_storage": { + "required_capacity": { + "node": { + "storage": 3722575856 + }, + "total": { + "storage": 37106614256 + } + }, + "reason_summary": "not enough storage available, needs 3.4gb", + "reason_details": { + "reason": "not enough storage available, needs 3.4gb", + "unassigned": 0, + "assigned": 3722575856 + } + } + } + }, + "ml": { + "required_capacity": { + "node": { + "memory": 0 + }, + "total": { + "memory": 0 + } + }, + "current_capacity": { + "node": { + "storage": 0, + "memory": 2147483648 + }, + "total": { + "storage": 0, + "memory": 2147483648 + } + }, + "current_nodes": [ + { + "name": "testes-es-ml-0" + } + ], + "deciders": { + "ml": { + "required_capacity": { + "node": { + "memory": 0 + }, + "total": { + "memory": 0 + } + }, + "reason_summary": "Requesting scale down as tier and/or node size could be smaller", + "reason_details": { + "waiting_analytics_jobs": [], + "waiting_anomaly_jobs": [], + "configuration": { + "down_scale_delay": "5m" + }, + "perceived_current_capacity": { + "node": { + "memory": 2147483646 + }, + "total": { + "memory": 2147483647 + } + }, + "required_capacity": { + "node": { + "memory": 0 + }, + "total": { + "memory": 0 + } + }, + "reason": "Requesting scale down as tier and/or node size could be smaller" + } + } + } + } + } +} diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/elasticsearch-expected.yml b/pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/elasticsearch-expected.yml new file mode 100644 index 0000000000..d597fe9e2b --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/elasticsearch-expected.yml @@ -0,0 +1,130 @@ +apiVersion: elasticsearch.k8s.elastic.co/v1 +kind: Elasticsearch +metadata: + annotations: + elasticsearch.alpha.elastic.co/autoscaling-spec: '{ + "policies": [{ + "name": "di", + "roles": ["data", "ingest"], + "deciders": { + "proactive_storage": { + "forecast_window": "5m" + } + }, + "resources": { + "nodeCount": { + "min": 3, + "max": 8 + }, + "cpu": { + "min": 2, + "max": 6 + }, + "memory": { + "min": "2Gi", + "max": "8Gi" + }, + "storage": { + "min": "1Gi", + "max": "4Gi" + } + } + }, + { + "name": "ml", + "roles": ["ml"], + "deciders": { + "ml": { + "down_scale_delay": "5m" + } + }, + "resources": { + "nodeCount": { + "min": 1, + "max": 9 + }, + "cpu": { + "min": 2, + "max": 2 + }, + "memory": { + "min": "2Gi", + "max": "6Gi" + }, + "storage": { + "min": "1Gi", + "max": "2Gi" + } + } + }] + }' + elasticsearch.alpha.elastic.co/autoscaling-status: '{"policies":[{"name":"di","nodeSets":[{"name":"di","nodeCount":8}],"resources":{"requests":{"cpu":"6","memory":"8Gi","storage":"4Gi"}},"state":[{"type":"HorizontalScalingLimitReached","messages":["Can''t provide total required storage 37106614256, max number of nodes is 8, requires 9 nodes"]}],"lastModificationTime":"2021-01-17T05:59:22Z"},{"name":"ml","nodeSets":[{"name":"ml","nodeCount":1}],"resources":{"requests":{"cpu":"2","memory":"2Gi"}},"state":[],"lastModificationTime":"2021-01-17T13:25:18Z"}]}' + elasticsearch.k8s.elastic.co/cluster-uuid: FghvC9XFS16wDXdAusm9yg + name: testes + namespace: testns + uid: 0e400c1f-57ff-4d6e-99e7-ce9ab8a83930 +spec: + nodeSets: + - config: + node: + roles: + - master + count: 1 + name: master + - config: + node: + roles: + - data + - ingest + count: 8 + name: di + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: 8Gi + requests: + cpu: "6" + memory: 8Gi + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 4Gi + - config: + node: + roles: + - ml + count: 1 + name: ml + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: 2Gi + requests: + cpu: "2" + memory: 2Gi + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + version: 7.11.0 +status: + availableNodes: 10 + health: green + phase: Ready + version: 7.11.0 diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/elasticsearch.yml b/pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/elasticsearch.yml new file mode 100644 index 0000000000..d597fe9e2b --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/elasticsearch.yml @@ -0,0 +1,130 @@ +apiVersion: elasticsearch.k8s.elastic.co/v1 +kind: Elasticsearch +metadata: + annotations: + elasticsearch.alpha.elastic.co/autoscaling-spec: '{ + "policies": [{ + "name": "di", + "roles": ["data", "ingest"], + "deciders": { + "proactive_storage": { + "forecast_window": "5m" + } + }, + "resources": { + "nodeCount": { + "min": 3, + "max": 8 + }, + "cpu": { + "min": 2, + "max": 6 + }, + "memory": { + "min": "2Gi", + "max": "8Gi" + }, + "storage": { + "min": "1Gi", + "max": "4Gi" + } + } + }, + { + "name": "ml", + "roles": ["ml"], + "deciders": { + "ml": { + "down_scale_delay": "5m" + } + }, + "resources": { + "nodeCount": { + "min": 1, + "max": 9 + }, + "cpu": { + "min": 2, + "max": 2 + }, + "memory": { + "min": "2Gi", + "max": "6Gi" + }, + "storage": { + "min": "1Gi", + "max": "2Gi" + } + } + }] + }' + elasticsearch.alpha.elastic.co/autoscaling-status: '{"policies":[{"name":"di","nodeSets":[{"name":"di","nodeCount":8}],"resources":{"requests":{"cpu":"6","memory":"8Gi","storage":"4Gi"}},"state":[{"type":"HorizontalScalingLimitReached","messages":["Can''t provide total required storage 37106614256, max number of nodes is 8, requires 9 nodes"]}],"lastModificationTime":"2021-01-17T05:59:22Z"},{"name":"ml","nodeSets":[{"name":"ml","nodeCount":1}],"resources":{"requests":{"cpu":"2","memory":"2Gi"}},"state":[],"lastModificationTime":"2021-01-17T13:25:18Z"}]}' + elasticsearch.k8s.elastic.co/cluster-uuid: FghvC9XFS16wDXdAusm9yg + name: testes + namespace: testns + uid: 0e400c1f-57ff-4d6e-99e7-ce9ab8a83930 +spec: + nodeSets: + - config: + node: + roles: + - master + count: 1 + name: master + - config: + node: + roles: + - data + - ingest + count: 8 + name: di + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: 8Gi + requests: + cpu: "6" + memory: 8Gi + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 4Gi + - config: + node: + roles: + - ml + count: 1 + name: ml + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: 2Gi + requests: + cpu: "2" + memory: 2Gi + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + version: 7.11.0 +status: + availableNodes: 10 + health: green + phase: Ready + version: 7.11.0 diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/min-nodes-increased-by-user/elasticsearch-expected.yml b/pkg/controller/autoscaling/elasticsearch/testdata/min-nodes-increased-by-user/elasticsearch-expected.yml new file mode 100644 index 0000000000..69421fd15a --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/testdata/min-nodes-increased-by-user/elasticsearch-expected.yml @@ -0,0 +1,75 @@ +apiVersion: elasticsearch.k8s.elastic.co/v1 +kind: Elasticsearch +metadata: + annotations: + elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "di", "roles": ["data", "ingest"], "resources": { "nodeCount": { "min": 9, "max": 9 }, "cpu": { "min": 2, "max": 6 }, "memory": { "min": "2Gi", "max": "8Gi" }, "storage": { "min": "1Gi", "max": "4Gi" } } }, { "name": "ml", "roles": ["ml"], "deciders": { "ml": { "down_scale_delay": "5m" } }, "resources": { "nodeCount": { "min": 3, "max": 9 }, "cpu": { "min": 2, "max": 2 }, "memory": { "min": "2Gi", "max": "6Gi" }, "storage": { "min": "1Gi", "max": "2Gi" } } }] }' + elasticsearch.alpha.elastic.co/autoscaling-status: '{"policies":[{"name":"di","nodeSets":[{"name":"di","nodeCount":9}],"resources":{"requests":{"cpu":"6","memory":"8Gi","storage":"4Gi"}},"state":[],"lastModificationTime":"2021-01-17T05:59:22Z"},{"name":"ml","nodeSets":[{"name":"ml","nodeCount":3}],"resources":{"requests":{"cpu":"2","memory":"2Gi"}},"state":[],"lastModificationTime":"2021-01-17T13:25:18Z"}]}' + elasticsearch.k8s.elastic.co/cluster-uuid: FghvC9XFS16wDXdAusm9yg + name: testes + namespace: testns + uid: 0e400c1f-57ff-4d6e-99e7-ce9ab8a83930 +spec: + nodeSets: + - config: + node: + roles: + - master + count: 1 + name: master + - config: + node: + roles: + - data + - ingest + count: 9 + name: di + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: 8Gi + requests: + cpu: "6" + memory: 8Gi + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 4Gi + - config: + node: + roles: + - ml + count: 3 + name: ml + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: 2Gi + requests: + cpu: "2" + memory: 2Gi + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + version: 7.11.0 +status: + availableNodes: 10 + health: green + phase: Ready + version: 7.11.0 diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/min-nodes-increased-by-user/elasticsearch.yml b/pkg/controller/autoscaling/elasticsearch/testdata/min-nodes-increased-by-user/elasticsearch.yml new file mode 100644 index 0000000000..6875d0a2f7 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/testdata/min-nodes-increased-by-user/elasticsearch.yml @@ -0,0 +1,78 @@ +# This manifest can be used for offline tests, ensuring that user node increased is taken into account by the controller event if the cluster is not available. +# Data min nodes is increased from to 3 to 9 +# ML min nodes is increased from to 1 to 3 +apiVersion: elasticsearch.k8s.elastic.co/v1 +kind: Elasticsearch +metadata: + annotations: + elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "di", "roles": ["data", "ingest"], "resources": { "nodeCount": { "min": 9, "max": 9 }, "cpu": { "min": 2, "max": 6 }, "memory": { "min": "2Gi", "max": "8Gi" }, "storage": { "min": "1Gi", "max": "4Gi" } } }, { "name": "ml", "roles": ["ml"], "deciders": { "ml": { "down_scale_delay": "5m" } }, "resources": { "nodeCount": { "min": 3, "max": 9 }, "cpu": { "min": 2, "max": 2 }, "memory": { "min": "2Gi", "max": "6Gi" }, "storage": { "min": "1Gi", "max": "2Gi" } } }] }' + elasticsearch.alpha.elastic.co/autoscaling-status: '{"policies":[{"name":"di","nodeSets":[{"name":"di","nodeCount":8}],"resources":{"requests":{"cpu":"6","memory":"8Gi","storage":"4Gi"}},"state":[{"type":"HorizontalScalingLimitReached","messages":["Can''t provide total required storage 37106614256, max number of nodes is 8, requires 9 nodes"]}],"lastModificationTime":"2021-01-17T05:59:22Z"},{"name":"ml","nodeSets":[{"name":"ml","nodeCount":1}],"resources":{"requests":{"cpu":"2","memory":"2Gi"}},"state":[],"lastModificationTime":"2021-01-17T13:25:18Z"}]}' + elasticsearch.k8s.elastic.co/cluster-uuid: FghvC9XFS16wDXdAusm9yg + name: testes + namespace: testns + uid: 0e400c1f-57ff-4d6e-99e7-ce9ab8a83930 +spec: + nodeSets: + - config: + node: + roles: + - master + count: 1 + name: master + - config: + node: + roles: + - data + - ingest + count: 8 + name: di + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: 8Gi + requests: + cpu: "6" + memory: 8Gi + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 4Gi + - config: + node: + roles: + - ml + count: 1 + name: ml + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: 2Gi + requests: + cpu: "2" + memory: 2Gi + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + version: 7.11.0 +status: + availableNodes: 10 + health: green + phase: Ready + version: 7.11.0 diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/ml/capacity.json b/pkg/controller/autoscaling/elasticsearch/testdata/ml/capacity.json new file mode 100644 index 0000000000..c520a00ac0 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/testdata/ml/capacity.json @@ -0,0 +1,64 @@ +{ + "policies": { + "ml_only": { + "required_capacity": { + "node": { + "memory": 3520439718 + }, + "total": { + "memory": 3119893519 + } + }, + "current_capacity": { + "node": { + "storage": 0, + "memory": 0 + }, + "total": { + "storage": 0, + "memory": 0 + } + }, + "current_nodes": [], + "deciders": { + "ml": { + "required_capacity": { + "node": { + "memory": 3520439718 + }, + "total": { + "memory": 3119893519 + } + }, + "reason_summary": "requesting scale up as number of jobs in queues exceeded configured limit", + "reason_details": { + "waiting_analytics_jobs": [ + "a" + ], + "waiting_anomaly_jobs": [ + "a" + ], + "configuration": {}, + "perceived_current_capacity": { + "node": { + "memory": 0 + }, + "total": { + "memory": 0 + } + }, + "required_capacity": { + "node": { + "memory": 3520439718 + }, + "total": { + "memory": 3119893519 + } + }, + "reason": "requesting scale up as number of jobs in queues exceeded configured limit" + } + } + } + } + } +} \ No newline at end of file diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/ml/elasticsearch-expected.yml b/pkg/controller/autoscaling/elasticsearch/testdata/ml/elasticsearch-expected.yml new file mode 100644 index 0000000000..fc7e3f97b8 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/testdata/ml/elasticsearch-expected.yml @@ -0,0 +1,57 @@ +apiVersion: elasticsearch.k8s.elastic.co/v1 +kind: Elasticsearch +metadata: + annotations: + elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "ml_only", "roles": ["ml"], "deciders": { "ml": {} }, "resources": { "nodeCount": { "min": 1, "max": 9 }, "cpu": { "min": 1, "max": 3 }, "memory": { "min": "2Gi", "max": "7Gi" }, "storage": { "min": "5Gi", "max": "20Gi" } } }] }' + elasticsearch.alpha.elastic.co/autoscaling-status: '{"policies":[{"name":"ml_only","nodeSets":[{"name":"ml","nodeCount":1}],"resources":{"requests":{"cpu":"2","memory":"4Gi"}},"state":[],"lastModificationTime":"2021-01-19T14:20:58Z"}]}' + name: testes + namespace: testns + uid: 898d54d8-a35a-4cd7-9f36-c76ba118c090 +spec: + nodeSets: + - config: + node: + roles: + - master + count: 1 + name: master + - config: + node: + roles: + - data + - ingest + store.allow_mmap: false + count: 3 + name: data + - config: + node: + roles: + - ml + store.allow_mmap: false + count: 1 + name: ml + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: "4Gi" + requests: + cpu: "2" + memory: "4Gi" + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + version: 7.11.0 +status: + availableNodes: 4 + health: green + phase: Ready + version: 7.11.0 diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/ml/elasticsearch.yml b/pkg/controller/autoscaling/elasticsearch/testdata/ml/elasticsearch.yml new file mode 100644 index 0000000000..9acd824901 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/testdata/ml/elasticsearch.yml @@ -0,0 +1,57 @@ +apiVersion: elasticsearch.k8s.elastic.co/v1 +kind: Elasticsearch +metadata: + annotations: + elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "ml_only", "roles": ["ml"], "deciders": { "ml": {} }, "resources": { "nodeCount": { "min": 1, "max": 9 }, "cpu": { "min": 1, "max": 3 }, "memory": { "min": "2Gi", "max": "7Gi" }, "storage": { "min": "5Gi", "max": "20Gi" } } }] }' + elasticsearch.alpha.elastic.co/autoscaling-status: '{"policies":[{"name":"ml_only","nodeSets":[{"name":"ml","nodeCount":1}],"resources":{"requests":{"cpu":"2","memory":"3520439718"}},"state":[],"lastModificationTime":"2021-01-19T14:20:58Z"}]}' + name: testes + namespace: testns + uid: 898d54d8-a35a-4cd7-9f36-c76ba118c090 +spec: + nodeSets: + - config: + node: + roles: + - master + count: 1 + name: master + - config: + node: + roles: + - data + - ingest + store.allow_mmap: false + count: 3 + name: data + - config: + node: + roles: + - ml + store.allow_mmap: false + count: 1 + name: ml + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: "3520439718" + requests: + cpu: "2" + memory: "3520439718" + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + version: 7.11.0 +status: + availableNodes: 4 + health: green + phase: Ready + version: 7.11.0 diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/capacity.json b/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/capacity.json new file mode 100644 index 0000000000..8450ffd4a1 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/capacity.json @@ -0,0 +1,148 @@ +{ + "policies": { + "di": { + "required_capacity": { + "node": { + "storage": 3722575856 + }, + "total": { + "storage": 37106614256 + } + }, + "current_capacity": { + "node": { + "storage": 4193976320, + "memory": 8589934592 + }, + "total": { + "storage": 33384038400, + "memory": 68719476736 + } + }, + "current_nodes": [ + { + "name": "testes-es-di-0" + }, + { + "name": "testes-es-di-1" + }, + { + "name": "testes-es-di-2" + }, + { + "name": "testes-es-di-3" + }, + { + "name": "testes-es-di-4" + }, + { + "name": "testes-es-di-5" + }, + { + "name": "testes-es-di-6" + }, + { + "name": "testes-es-di-7" + } + ], + "deciders": { + "proactive_storage": { + "required_capacity": { + "node": { + "storage": 3722575856 + }, + "total": { + "storage": 37106614256 + } + }, + "reason_summary": "not enough storage available, needs 3.4gb", + "reason_details": { + "reason": "not enough storage available, needs 3.4gb", + "unassigned": 0, + "assigned": 3722575856, + "forecasted": 0, + "forecast_window": "5m" + } + }, + "reactive_storage": { + "required_capacity": { + "node": { + "storage": 3722575856 + }, + "total": { + "storage": 37106614256 + } + }, + "reason_summary": "not enough storage available, needs 3.4gb", + "reason_details": { + "reason": "not enough storage available, needs 3.4gb", + "unassigned": 0, + "assigned": 3722575856 + } + } + } + }, + "ml": { + "required_capacity": { + "node": { + "memory": 0 + }, + "total": { + "memory": 0 + } + }, + "current_capacity": { + "node": { + "storage": 0, + "memory": 2147483648 + }, + "total": { + "storage": 0, + "memory": 2147483648 + } + }, + "current_nodes": [ + { + "name": "testes-es-ml-0" + } + ], + "deciders": { + "ml": { + "required_capacity": { + "node": { + "memory": 0 + }, + "total": { + "memory": 0 + } + }, + "reason_summary": "Requesting scale down as tier and/or node size could be smaller", + "reason_details": { + "waiting_analytics_jobs": [], + "waiting_anomaly_jobs": [], + "configuration": { + "down_scale_delay": "5m" + }, + "perceived_current_capacity": { + "node": { + "memory": 2147483646 + }, + "total": { + "memory": 2147483647 + } + }, + "required_capacity": { + "node": { + "memory": 0 + }, + "total": { + "memory": 0 + } + }, + "reason": "Requesting scale down as tier and/or node size could be smaller" + } + } + } + } + } +} diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch-expected.yml b/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch-expected.yml new file mode 100644 index 0000000000..044601b2f8 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch-expected.yml @@ -0,0 +1,75 @@ +apiVersion: elasticsearch.k8s.elastic.co/v1 +kind: Elasticsearch +metadata: + annotations: + elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "di", "roles": ["data", "ingest"], "resources": { "nodeCount": { "min": 3, "max": 9 }, "cpu": { "min": 2, "max": 6 }, "memory": { "min": "2Gi", "max": "8Gi" }, "storage": { "min": "1Gi", "max": "4Gi" } } }, { "name": "ml", "roles": ["ml"], "deciders": { "ml": { "down_scale_delay": "5m" } }, "resources": { "nodeCount": { "min": 1, "max": 9 }, "cpu": { "min": 2, "max": 2 }, "memory": { "min": "2Gi", "max": "6Gi" }, "storage": { "min": "1Gi", "max": "2Gi" } } }] }' + elasticsearch.alpha.elastic.co/autoscaling-status: '{"policies":[{"name":"di","nodeSets":[{"name":"di","nodeCount":9}],"resources":{"requests":{"cpu":"6","memory":"8Gi","storage":"4Gi"}},"state":[],"lastModificationTime":"2021-01-17T05:59:22Z"},{"name":"ml","nodeSets":[{"name":"ml","nodeCount":1}],"resources":{"requests":{"cpu":"2","memory":"2Gi"}},"state":[],"lastModificationTime":"2021-01-17T13:25:18Z"}]}' + elasticsearch.k8s.elastic.co/cluster-uuid: FghvC9XFS16wDXdAusm9yg + name: testes + namespace: testns + uid: 0e400c1f-57ff-4d6e-99e7-ce9ab8a83930 +spec: + nodeSets: + - config: + node: + roles: + - master + count: 1 + name: master + - config: + node: + roles: + - data + - ingest + count: 9 + name: di + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: 8Gi + requests: + cpu: "6" + memory: 8Gi + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 4Gi + - config: + node: + roles: + - ml + count: 1 + name: ml + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: 2Gi + requests: + cpu: "2" + memory: 2Gi + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + version: 7.11.0 +status: + availableNodes: 10 + health: green + phase: Ready + version: 7.11.0 diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch.yml b/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch.yml new file mode 100644 index 0000000000..51336f0035 --- /dev/null +++ b/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch.yml @@ -0,0 +1,75 @@ +apiVersion: elasticsearch.k8s.elastic.co/v1 +kind: Elasticsearch +metadata: + annotations: + elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "di", "roles": ["data", "ingest"], "resources": { "nodeCount": { "min": 3, "max": 9 }, "cpu": { "min": 2, "max": 6 }, "memory": { "min": "2Gi", "max": "8Gi" }, "storage": { "min": "1Gi", "max": "4Gi" } } }, { "name": "ml", "roles": ["ml"], "deciders": { "ml": { "down_scale_delay": "5m" } }, "resources": { "nodeCount": { "min": 1, "max": 9 }, "cpu": { "min": 2, "max": 2 }, "memory": { "min": "2Gi", "max": "6Gi" }, "storage": { "min": "1Gi", "max": "2Gi" } } }] }' + elasticsearch.alpha.elastic.co/autoscaling-status: '{"policies":[{"name":"di","nodeSets":[{"name":"di","nodeCount":8}],"resources":{"requests":{"cpu":"6","memory":"8Gi","storage":"4Gi"}},"state":[{"type":"HorizontalScalingLimitReached","messages":["Can''t provide total required storage 37106614256, max number of nodes is 8, requires 9 nodes"]}],"lastModificationTime":"2021-01-17T05:59:22Z"},{"name":"ml","nodeSets":[{"name":"ml","nodeCount":1}],"resources":{"requests":{"cpu":"2","memory":"2Gi"}},"state":[],"lastModificationTime":"2021-01-17T13:25:18Z"}]}' + elasticsearch.k8s.elastic.co/cluster-uuid: FghvC9XFS16wDXdAusm9yg + name: testes + namespace: testns + uid: 0e400c1f-57ff-4d6e-99e7-ce9ab8a83930 +spec: + nodeSets: + - config: + node: + roles: + - master + count: 1 + name: master + - config: + node: + roles: + - data + - ingest + count: 8 + name: di + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: 8Gi + requests: + cpu: "6" + memory: 8Gi + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 4Gi + - config: + node: + roles: + - ml + count: 1 + name: ml + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + limits: + memory: 2Gi + requests: + cpu: "2" + memory: 2Gi + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + version: 7.11.0 +status: + availableNodes: 10 + health: green + phase: Ready + version: 7.11.0 diff --git a/pkg/controller/common/license/check.go b/pkg/controller/common/license/check.go index bc39def62a..b1d6bb0157 100644 --- a/pkg/controller/common/license/check.go +++ b/pkg/controller/common/license/check.go @@ -15,6 +15,11 @@ import ( "k8s.io/apimachinery/pkg/types" ) +const ( + // EventInvalidLicense describes an event fired when a license is not valid. + EventInvalidLicense = "InvalidLicense" +) + type Checker interface { CurrentEnterpriseLicense() (*EnterpriseLicense, error) EnterpriseFeaturesEnabled() (bool, error) diff --git a/pkg/controller/elasticsearch/driver/autoscaling.go b/pkg/controller/elasticsearch/driver/autoscaling.go new file mode 100644 index 0000000000..3a2f813577 --- /dev/null +++ b/pkg/controller/elasticsearch/driver/autoscaling.go @@ -0,0 +1,61 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package driver + +import ( + esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" + "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/status" +) + +// autoscaledResourcesSynced checks that the autoscaler controller has updated the resources +// when autoscaling is enabled. This is to avoid situations where resources have been manually +// deleted or replaced by an external event. The Elasticsearch controller should then wait for +// the Elasticsearch autoscaling controller to update again the resources in the NodeSets. +func autoscaledResourcesSynced(es esv1.Elasticsearch) (bool, error) { + if !es.IsAutoscalingDefined() { + return true, nil + } + autoscalingSpec, err := es.GetAutoscalingSpecification() + if err != nil { + return false, err + } + autoscalingStatus, err := status.GetStatus(es) + if err != nil { + return false, err + } + + for _, nodeSet := range es.Spec.NodeSets { + nodeSetAutoscalingSpec, err := autoscalingSpec.GetAutoscalingSpecFor(nodeSet) + if err != nil { + return false, err + } + if nodeSetAutoscalingSpec == nil { + // This nodeSet is not managed by an autoscaling configuration + log.V(1).Info("NodeSet not managed by an autoscaling controller", "nodeset", nodeSet.Name) + continue + } + + s, ok := autoscalingStatus.GetNamedTierResources(nodeSetAutoscalingSpec.Name) + if !ok { + log.Info("NodeSet managed by the autoscaling controller but not found in status", + "nodeset", nodeSet.Name, + ) + return false, nil + } + inSync, err := s.IsUsedBy(esv1.ElasticsearchContainerName, nodeSet) + if err != nil { + return false, err + } + if !inSync { + log.Info("NodeSet managed by the autoscaling controller but not in sync", + "nodeset", nodeSet.Name, + "expected", s.NodeResources, + ) + return false, nil + } + } + + return true, nil +} diff --git a/pkg/controller/elasticsearch/driver/nodes.go b/pkg/controller/elasticsearch/driver/nodes.go index 03139c2f47..f3ecf8a6fa 100644 --- a/pkg/controller/elasticsearch/driver/nodes.go +++ b/pkg/controller/elasticsearch/driver/nodes.go @@ -42,6 +42,13 @@ func (d *defaultDriver) reconcileNodeSpecs( results := &reconciler.Results{} + // If some nodeSets are managed by the autoscaler, wait for them to be updated. + if ok, err := autoscaledResourcesSynced(d.ES); err != nil { + return results.WithError(fmt.Errorf("StatefulSet recreation: %w", err)) + } else if !ok { + return results.WithResult(defaultRequeue) + } + // check if actual StatefulSets and corresponding pods match our expectations before applying any change ok, err := d.expectationsSatisfied() if err != nil { diff --git a/pkg/controller/elasticsearch/volume/defaults.go b/pkg/controller/elasticsearch/volume/defaults.go index 573745e752..7fefc08f37 100644 --- a/pkg/controller/elasticsearch/volume/defaults.go +++ b/pkg/controller/elasticsearch/volume/defaults.go @@ -11,6 +11,8 @@ import ( ) var ( + DefaultPersistentVolumeSize = resource.MustParse("1Gi") + // DefaultDataVolumeClaim is the default data volume claim for Elasticsearch pods. // We default to a 1GB persistent volume, using the default storage class. DefaultDataVolumeClaim = corev1.PersistentVolumeClaim{ @@ -23,7 +25,7 @@ var ( }, Resources: corev1.ResourceRequirements{ Requests: corev1.ResourceList{ - corev1.ResourceStorage: resource.MustParse("1Gi"), + corev1.ResourceStorage: DefaultPersistentVolumeSize, }, }, }, From d1b78d7db6b9aa4ad8b6894a415272bba077f9ef Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Thu, 4 Feb 2021 12:45:24 +0100 Subject: [PATCH 02/19] Improve naming and comments --- .../elasticsearch/autoscaler/autoscaler.go | 12 ++--- .../autoscaler/autoscaler_test.go | 12 ++--- .../elasticsearch/autoscaler/context.go | 4 +- .../elasticsearch/autoscaler/horizontal.go | 2 +- .../elasticsearch/autoscaler/offline.go | 36 +++++++------- .../elasticsearch/autoscaler/offline_test.go | 14 +++--- .../elasticsearch/autoscaler/vertical.go | 2 +- .../autoscaling/elasticsearch/controller.go | 14 +++--- .../elasticsearch/controller_test.go | 6 +-- .../autoscaling/elasticsearch/driver.go | 48 +++++++++---------- .../autoscaling/elasticsearch/reconcile.go | 8 ++-- .../elasticsearch/resources/resources.go | 2 +- .../elasticsearch/status/actual.go | 6 +-- .../elasticsearch/status/status.go | 42 ++++++++-------- .../elasticsearch/driver/autoscaling.go | 4 +- 15 files changed, 106 insertions(+), 106 deletions(-) diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler.go index ca334ac143..d356831d60 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler.go @@ -31,14 +31,14 @@ func (ctx *Context) GetResources() resources.NodeSetsResources { return ctx.scaleHorizontally(desiredNodeResources) } -// scaleVertically calculates the desired resources for all the nodes managed a same autoscaling policy, given the requested +// scaleVertically calculates the desired resources for all the nodes managed by the same autoscaling policy, given the requested // capacity returned by the Elasticsearch autoscaling API and the AutoscalingSpec specified by the user. // It attempts to scale all the resources vertically until the required resources are provided or the limits set by the user are reached. func (ctx *Context) scaleVertically() resources.NodeResources { // All resources can be computed "from scratch", without knowing the previous values. // This is however not true for storage. Storage can't be scaled down, current storage capacity must be considered - // as an hard min. limit. This storage limit must be taken into consideration when computing the desired resources. - currentStorage := getStorage(ctx.AutoscalingSpec, ctx.ActualAutoscalingStatus) + // as a hard min. limit. This storage limit must be taken into consideration when computing the desired resources. + currentStorage := getStorage(ctx.AutoscalingSpec, ctx.CurrentAutoscalingStatus) return ctx.nodeResources( int64(ctx.AutoscalingSpec.NodeCount.Min), currentStorage, @@ -49,7 +49,7 @@ func (ctx *Context) scaleVertically() resources.NodeResources { // The value is the max. value of either: // * the current value in the status // * the min. value set by the user in the autoscaling spec. -func getStorage(autoscalingSpec esv1.AutoscalingPolicySpec, actualAutoscalingStatus status.Status) resource.Quantity { +func getStorage(autoscalingSpec esv1.AutoscalingPolicySpec, currentAutoscalingStatus status.Status) resource.Quantity { // If no storage spec is defined in the autoscaling status we return the default volume size. storage := volume.DefaultPersistentVolumeSize.DeepCopy() // Always adjust to the min value specified by the user in the limits. @@ -57,8 +57,8 @@ func getStorage(autoscalingSpec esv1.AutoscalingPolicySpec, actualAutoscalingSta storage = autoscalingSpec.Storage.Min } // If a storage value is stored in the status then reuse it. - if actualResources, exists := actualAutoscalingStatus.GetNamedTierResources(autoscalingSpec.Name); exists && actualResources.HasRequest(corev1.ResourceStorage) { - storageInStatus := actualResources.GetRequest(corev1.ResourceStorage) + if currentResourcesInStatus, exists := currentAutoscalingStatus.CurrentResourcesForPolicy(autoscalingSpec.Name); exists && currentResourcesInStatus.HasRequest(corev1.ResourceStorage) { + storageInStatus := currentResourcesInStatus.GetRequest(corev1.ResourceStorage) if storageInStatus.Cmp(storage) > 0 { storage = storageInStatus } diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler_test.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler_test.go index df99bb9fb6..9b9468ba3a 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler_test.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler_test.go @@ -143,12 +143,12 @@ func Test_applyScaleDecision(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := Context{ - Log: logTest, - AutoscalingSpec: tt.args.policy, - NodeSets: tt.args.currentNodeSets, - ActualAutoscalingStatus: tt.args.nodeSetsStatus, - RequiredCapacity: tt.args.requiredCapacity, - StatusBuilder: status.NewAutoscalingStatusBuilder(), + Log: logTest, + AutoscalingSpec: tt.args.policy, + NodeSets: tt.args.currentNodeSets, + CurrentAutoscalingStatus: tt.args.nodeSetsStatus, + RequiredCapacity: tt.args.requiredCapacity, + StatusBuilder: status.NewAutoscalingStatusBuilder(), } if got := ctx.GetResources(); !equality.Semantic.DeepEqual(got, tt.want) { t.Errorf("autoscaler.GetResources() = %v, want %v", got, tt.want) diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/context.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/context.go index 042454d2cc..12cae7a8c9 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/context.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/context.go @@ -18,8 +18,8 @@ type Context struct { AutoscalingSpec esv1.AutoscalingPolicySpec // NodeSets is the list of the NodeSets managed by the autoscaling specification. NodeSets esv1.NodeSetList - // ActualAutoscalingStatus is the current resources status as stored in the Elasticsearch resource. - ActualAutoscalingStatus status.Status + // CurrentAutoscalingStatus is the current resources status as stored in the Elasticsearch resource. + CurrentAutoscalingStatus status.Status // RequiredCapacity contains the Elasticsearch Autoscaling API result. RequiredCapacity client.AutoscalingCapacityInfo // StatusBuilder is used to track any event that should be surfaced to the user. diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go index 0355a32062..4a10fd47e3 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go @@ -81,7 +81,7 @@ func (ctx *Context) getNodesToAdd( // Also surface this situation in the status. ctx.StatusBuilder. ForPolicy(ctx.AutoscalingSpec.Name). - WithEvent( + RecordEvent( status.HorizontalScalingLimitReached, fmt.Sprintf("Can't provide total required %s %d, max number of nodes is %d, requires %d nodes", resourceName, totalRequiredCapacity, maxNodes, minNodes+nodeToAdd), ) diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go index 1e5975297c..20e6ec288f 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go @@ -21,20 +21,20 @@ func GetOfflineNodeSetsResources( log logr.Logger, nodeSets []string, autoscalingSpec esv1.AutoscalingPolicySpec, - actualAutoscalingStatus status.Status, + currentAutoscalingStatus status.Status, ) resources.NodeSetsResources { - actualNamedTierResources, hasNamedTierResources := actualAutoscalingStatus.GetNamedTierResources(autoscalingSpec.Name) + currentNamedTierResources, hasNamedTierResources := currentAutoscalingStatus.CurrentResourcesForPolicy(autoscalingSpec.Name) - var namedTierResources resources.NodeSetsResources + var nodeSetsResources resources.NodeSetsResources var expectedNodeCount int32 if !hasNamedTierResources { // There's no current status for this nodeSet, this happens when the Elasticsearch cluster does not exist. // In that case we create a new one from the minimum values provided by the user. - namedTierResources = newMinNodeSetResources(autoscalingSpec, nodeSets) + nodeSetsResources = newMinNodeSetResources(autoscalingSpec, nodeSets) } else { // The status contains some resource values for the NodeSets managed by this autoscaling policy, let's reuse them. - namedTierResources = nodeSetResourcesFromStatus(actualAutoscalingStatus, actualNamedTierResources, autoscalingSpec, nodeSets) - for _, nodeSet := range actualNamedTierResources.NodeSetNodeCount { + nodeSetsResources = nodeSetResourcesFromStatus(currentAutoscalingStatus, currentNamedTierResources, autoscalingSpec, nodeSets) + for _, nodeSet := range currentNamedTierResources.NodeSetNodeCount { expectedNodeCount += nodeSet.NodeCount } } @@ -48,7 +48,7 @@ func GetOfflineNodeSetsResources( // User may have added or removed some NodeSets while the autoscaling API is not available. // We distribute the nodes to reflect that change. - fnm := NewFairNodesManager(log, namedTierResources.NodeSetNodeCount) + fnm := NewFairNodesManager(log, nodeSetsResources.NodeSetNodeCount) for expectedNodeCount > 0 { fnm.AddNode() expectedNodeCount-- @@ -58,28 +58,28 @@ func GetOfflineNodeSetsResources( "Offline autoscaling", "state", "offline", "policy", autoscalingSpec.Name, - "nodeset", namedTierResources.NodeSetNodeCount, - "count", namedTierResources.NodeSetNodeCount.TotalNodeCount(), - "resources", namedTierResources.ToInt64(), + "nodeset", nodeSetsResources.NodeSetNodeCount, + "count", nodeSetsResources.NodeSetNodeCount.TotalNodeCount(), + "resources", nodeSetsResources.ToInt64(), ) - return namedTierResources + return nodeSetsResources } // nodeSetResourcesFromStatus restores NodeSetResources from the status. // If user removed the limits while offline we are assuming that it wants to take back control on the resources. func nodeSetResourcesFromStatus( - actualAutoscalingStatus status.Status, - actualNamedTierResources resources.NodeSetsResources, + currentAutoscalingStatus status.Status, + currentNamedTierResources resources.NodeSetsResources, autoscalingSpec esv1.AutoscalingPolicySpec, nodeSets []string, ) resources.NodeSetsResources { namedTierResources := resources.NewNodeSetsResources(autoscalingSpec.Name, nodeSets) // Ensure memory settings are in the allowed limit range. if autoscalingSpec.IsMemoryDefined() { - if actualNamedTierResources.HasRequest(corev1.ResourceMemory) { + if currentNamedTierResources.HasRequest(corev1.ResourceMemory) { namedTierResources.SetRequest( corev1.ResourceMemory, - adjustQuantity(actualNamedTierResources.GetRequest(corev1.ResourceMemory), autoscalingSpec.Memory.Min, autoscalingSpec.Memory.Max), + adjustQuantity(currentNamedTierResources.GetRequest(corev1.ResourceMemory), autoscalingSpec.Memory.Min, autoscalingSpec.Memory.Max), ) } else { namedTierResources.SetRequest(corev1.ResourceMemory, autoscalingSpec.Memory.Min.DeepCopy()) @@ -88,10 +88,10 @@ func nodeSetResourcesFromStatus( // Ensure CPU settings are in the allowed limit range. if autoscalingSpec.IsCPUDefined() { - if actualNamedTierResources.HasRequest(corev1.ResourceCPU) { + if currentNamedTierResources.HasRequest(corev1.ResourceCPU) { namedTierResources.SetRequest( corev1.ResourceCPU, - adjustQuantity(actualNamedTierResources.GetRequest(corev1.ResourceCPU), autoscalingSpec.CPU.Min, autoscalingSpec.CPU.Max), + adjustQuantity(currentNamedTierResources.GetRequest(corev1.ResourceCPU), autoscalingSpec.CPU.Min, autoscalingSpec.CPU.Max), ) } else { namedTierResources.SetRequest(corev1.ResourceCPU, autoscalingSpec.CPU.Min.DeepCopy()) @@ -99,7 +99,7 @@ func nodeSetResourcesFromStatus( } // Ensure storage capacity is set - namedTierResources.SetRequest(corev1.ResourceStorage, getStorage(autoscalingSpec, actualAutoscalingStatus)) + namedTierResources.SetRequest(corev1.ResourceStorage, getStorage(autoscalingSpec, currentAutoscalingStatus)) return namedTierResources } diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/offline_test.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/offline_test.go index 9970b6dba0..5ce7069536 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/offline_test.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/offline_test.go @@ -20,9 +20,9 @@ var logTest = logf.Log.WithName("autoscaling-test") func TestGetOfflineNodeSetsResources(t *testing.T) { type args struct { - nodeSets []string - autoscalingSpec esv1.AutoscalingPolicySpec - actualAutoscalingStatus status.Status + nodeSets []string + autoscalingSpec esv1.AutoscalingPolicySpec + currentAutoscalingStatus status.Status } tests := []struct { name string @@ -34,7 +34,7 @@ func TestGetOfflineNodeSetsResources(t *testing.T) { args: args{ nodeSets: []string{"region-a", "region-b"}, autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithNodeCounts(1, 6).WithMemory("2Gi", "6Gi").WithStorage("10Gi", "20Gi").Build(), - actualAutoscalingStatus: status.Status{AutoscalingPolicyStatuses: []status.AutoscalingPolicyStatus{{ + currentAutoscalingStatus: status.Status{AutoscalingPolicyStatuses: []status.AutoscalingPolicyStatus{{ Name: "my-autoscaling-policy", NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "region-a", NodeCount: 3}, {Name: "region-b", NodeCount: 3}}, ResourcesSpecification: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("3Gi"), corev1.ResourceStorage: q("35Gi")}}}}}, @@ -50,7 +50,7 @@ func TestGetOfflineNodeSetsResources(t *testing.T) { args: args{ nodeSets: []string{"region-a", "region-b"}, autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithNodeCounts(1, 6).WithMemory("50Gi", "60Gi").WithStorage("10Gi", "20Gi").Build(), - actualAutoscalingStatus: status.Status{AutoscalingPolicyStatuses: []status.AutoscalingPolicyStatus{{ + currentAutoscalingStatus: status.Status{AutoscalingPolicyStatuses: []status.AutoscalingPolicyStatus{{ Name: "my-autoscaling-policy", NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "region-a", NodeCount: 3}, {Name: "region-b", NodeCount: 3}}, ResourcesSpecification: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("3Gi"), corev1.ResourceStorage: q("35Gi")}}}}}, @@ -66,7 +66,7 @@ func TestGetOfflineNodeSetsResources(t *testing.T) { args: args{ nodeSets: []string{"region-a", "region-b", "region-new"}, autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithNodeCounts(1, 6).WithMemory("2Gi", "6Gi").WithStorage("10Gi", "20Gi").Build(), - actualAutoscalingStatus: status.Status{AutoscalingPolicyStatuses: []status.AutoscalingPolicyStatus{{ + currentAutoscalingStatus: status.Status{AutoscalingPolicyStatuses: []status.AutoscalingPolicyStatus{{ Name: "my-autoscaling-policy", NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "region-a", NodeCount: 3}, {Name: "region-b", NodeCount: 3}}, ResourcesSpecification: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("3Gi"), corev1.ResourceStorage: q("35Gi")}}}}}, @@ -80,7 +80,7 @@ func TestGetOfflineNodeSetsResources(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := GetOfflineNodeSetsResources(logTest, tt.args.nodeSets, tt.args.autoscalingSpec, tt.args.actualAutoscalingStatus); !reflect.DeepEqual(got, tt.want) { + if got := GetOfflineNodeSetsResources(logTest, tt.args.nodeSets, tt.args.autoscalingSpec, tt.args.currentAutoscalingStatus); !reflect.DeepEqual(got, tt.want) { t.Errorf("GetOfflineNodeSetsResources() = %v, want %v", got, tt.want) } }) diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go index 44e7126868..9a4f4ede7f 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go @@ -98,7 +98,7 @@ func (ctx *Context) getResourceValue( // Also update the autoscaling status accordingly ctx.StatusBuilder. ForPolicy(autoscalingPolicyName). - WithEvent( + RecordEvent( status.VerticalScalingLimitReached, fmt.Sprintf("Node required %s %d is greater than max allowed: %d", resourceType, nodeRequired, max.Value()), ) diff --git a/pkg/controller/autoscaling/elasticsearch/controller.go b/pkg/controller/autoscaling/elasticsearch/controller.go index 5a82b1a06d..48e3dfc2fc 100644 --- a/pkg/controller/autoscaling/elasticsearch/controller.go +++ b/pkg/controller/autoscaling/elasticsearch/controller.go @@ -139,7 +139,7 @@ func (r *ReconcileElasticsearch) Reconcile(ctx context.Context, request reconcil } // Build status from annotation or existing resources - autoscalingStatus, err := status.GetStatus(es) + autoscalingStatus, err := status.From(es) if err != nil { return reconcile.Result{}, tracing.CaptureError(ctx, err) } @@ -149,21 +149,21 @@ func (r *ReconcileElasticsearch) Reconcile(ctx context.Context, request reconcil return reconcile.Result{}, nil } - // Compute named tiers - namedTiers, nodeSetErr := autoscalingSpecification.GetAutoscaledNodeSets() + // Get autoscaling policies and the associated node sets. + autoscaledNodeSets, nodeSetErr := autoscalingSpecification.GetAutoscaledNodeSets() if nodeSetErr != nil { return reconcile.Result{}, tracing.CaptureError(ctx, nodeSetErr) } - log.V(1).Info("Named tiers", "named_tiers", namedTiers) + log.V(1).Info("Autoscaling policies and node sets", "policies", autoscaledNodeSets) - // Import existing resources in the actual Status if the cluster is managed by some autoscaling policies but + // Import existing resources in the current Status if the cluster is managed by some autoscaling policies but // the status annotation does not exist. - if err := autoscalingStatus.ImportExistingResources(log, r.Client, autoscalingSpecification, namedTiers); err != nil { + if err := autoscalingStatus.ImportExistingResources(log, r.Client, autoscalingSpecification, autoscaledNodeSets); err != nil { return reconcile.Result{}, tracing.CaptureError(ctx, err) } // Call the main function - current, err := r.reconcileInternal(ctx, autoscalingStatus, namedTiers, autoscalingSpecification, es) + current, err := r.reconcileInternal(ctx, autoscalingStatus, autoscaledNodeSets, autoscalingSpecification, es) if err != nil { return reconcile.Result{}, tracing.CaptureError(ctx, err) } diff --git a/pkg/controller/autoscaling/elasticsearch/controller_test.go b/pkg/controller/autoscaling/elasticsearch/controller_test.go index 8b68fdf6a3..9c90b1dc22 100644 --- a/pkg/controller/autoscaling/elasticsearch/controller_test.go +++ b/pkg/controller/autoscaling/elasticsearch/controller_test.go @@ -196,7 +196,7 @@ func TestReconcile(t *testing.T) { t.Run(tt.name, func(t *testing.T) { k8sClient := k8s.NewFakeClient() if tt.args.esManifest != "" { - // Load the actual Elasticsearch resource from the sample files. + // Load the current Elasticsearch resource from the sample files. es := esv1.Elasticsearch{} bytes, err := ioutil.ReadFile(filepath.Join("testdata", tt.args.esManifest, "elasticsearch.yml")) require.NoError(t, err) @@ -257,9 +257,9 @@ func TestReconcile(t *testing.T) { } func statusesEqual(t *testing.T, got, want esv1.Elasticsearch) { - gotStatus, err := status.GetStatus(got) + gotStatus, err := status.From(got) require.NoError(t, err) - wantStatus, err := status.GetStatus(want) + wantStatus, err := status.From(want) require.NoError(t, err) require.Equal(t, len(gotStatus.AutoscalingPolicyStatuses), len(wantStatus.AutoscalingPolicyStatuses)) for _, wantPolicyStatus := range wantStatus.AutoscalingPolicyStatuses { diff --git a/pkg/controller/autoscaling/elasticsearch/driver.go b/pkg/controller/autoscaling/elasticsearch/driver.go index ef5a328894..f61767a4cd 100644 --- a/pkg/controller/autoscaling/elasticsearch/driver.go +++ b/pkg/controller/autoscaling/elasticsearch/driver.go @@ -26,7 +26,7 @@ import ( func (r *ReconcileElasticsearch) reconcileInternal( ctx context.Context, autoscalingStatus status.Status, - namedTiers esv1.AutoscaledNodeSets, + autoscaledNodeSets esv1.AutoscaledNodeSets, autoscalingSpec esv1.AutoscalingSpec, es esv1.Elasticsearch, ) (reconcile.Result, error) { @@ -42,16 +42,16 @@ func (r *ReconcileElasticsearch) reconcileInternal( "error.message", err.Error(), ) } - return r.doOfflineReconciliation(ctx, autoscalingStatus, namedTiers, autoscalingSpec, es, results) + return r.doOfflineReconciliation(ctx, autoscalingStatus, autoscaledNodeSets, autoscalingSpec, es, results) } // Cluster is expected to be online and reachable, attempt a call to the autoscaling API. // If an error occurs we still attempt an offline reconciliation to enforce limits set by the user. - result, err := r.attemptOnlineReconciliation(ctx, autoscalingStatus, namedTiers, autoscalingSpec, es, results) + result, err := r.attemptOnlineReconciliation(ctx, autoscalingStatus, autoscaledNodeSets, autoscalingSpec, es, results) if err != nil { log.Error(tracing.CaptureError(ctx, err), "autoscaling online reconciliation failed") // Attempt an offline reconciliation - if _, err := r.doOfflineReconciliation(ctx, autoscalingStatus, namedTiers, autoscalingSpec, es, results); err != nil { + if _, err := r.doOfflineReconciliation(ctx, autoscalingStatus, autoscaledNodeSets, autoscalingSpec, es, results); err != nil { log.Error(tracing.CaptureError(ctx, err), "autoscaling offline reconciliation failed") } } @@ -79,8 +79,8 @@ func (r *ReconcileElasticsearch) isElasticsearchReachable(ctx context.Context, e // attemptOnlineReconciliation attempts an online autoscaling reconciliation with a call the Elasticsearch autoscaling API. func (r *ReconcileElasticsearch) attemptOnlineReconciliation( ctx context.Context, - actualAutoscalingStatus status.Status, - namedTiers esv1.AutoscaledNodeSets, + currentAutoscalingStatus status.Status, + autoscaledNodeSets esv1.AutoscaledNodeSets, autoscalingSpecs esv1.AutoscalingSpec, es esv1.Elasticsearch, results *reconciler.Results, @@ -122,13 +122,13 @@ func (r *ReconcileElasticsearch) attemptOnlineReconciliation( // For each autoscaling policy we compute the resources to be applied to the related nodeSets. for _, autoscalingPolicy := range autoscalingSpecs.AutoscalingPolicySpecs { // Get the currentNodeSets - nodeSetList, exists := namedTiers[autoscalingPolicy.Name] + nodeSetList, exists := autoscaledNodeSets[autoscalingPolicy.Name] if !exists { // This situation should be caught during the validation, we still want to trace this error if it happens. err := fmt.Errorf("no nodeSets for tier %s", autoscalingPolicy.Name) log.Error(err, "no nodeSet for a tier", "policy", autoscalingPolicy.Name) results.WithError(fmt.Errorf("no nodeSets for tier %s", autoscalingPolicy.Name)) - statusBuilder.ForPolicy(autoscalingPolicy.Name).WithEvent(status.NoNodeSet, err.Error()) + statusBuilder.ForPolicy(autoscalingPolicy.Name).RecordEvent(status.NoNodeSet, err.Error()) continue } @@ -141,8 +141,8 @@ func (r *ReconcileElasticsearch) attemptOnlineReconciliation( "No decision received from Elasticsearch, ensure resources limits are respected", "policy", autoscalingPolicy.Name, ) - statusBuilder.ForPolicy(autoscalingPolicy.Name).WithEvent(status.EmptyResponse, "No required capacity from Elasticsearch") - nodeSetsResources = autoscaler.GetOfflineNodeSetsResources(log, nodeSetList.Names(), autoscalingPolicy, actualAutoscalingStatus) + statusBuilder.ForPolicy(autoscalingPolicy.Name).RecordEvent(status.EmptyResponse, "No required capacity from Elasticsearch") + nodeSetsResources = autoscaler.GetOfflineNodeSetsResources(log, nodeSetList.Names(), autoscalingPolicy, currentAutoscalingStatus) case true: // We received a capacity decision from Elasticsearch for this policy. log.Info( @@ -157,12 +157,12 @@ func (r *ReconcileElasticsearch) attemptOnlineReconciliation( continue } ctx := autoscaler.Context{ - Log: log, - AutoscalingSpec: autoscalingPolicy, - NodeSets: nodeSetList, - ActualAutoscalingStatus: actualAutoscalingStatus, - RequiredCapacity: capacity.RequiredCapacity, - StatusBuilder: statusBuilder, + Log: log, + AutoscalingSpec: autoscalingPolicy, + NodeSets: nodeSetList, + CurrentAutoscalingStatus: currentAutoscalingStatus, + RequiredCapacity: capacity.RequiredCapacity, + StatusBuilder: statusBuilder, } nodeSetsResources = ctx.GetResources() } @@ -174,7 +174,7 @@ func (r *ReconcileElasticsearch) attemptOnlineReconciliation( status.EmitEvents(es, r.recorder, statusBuilder.Build()) // Update the Elasticsearch resource with the calculated resources. - if err := reconcileElasticsearch(log, &es, statusBuilder, nextClusterResources, actualAutoscalingStatus); err != nil { + if err := reconcileElasticsearch(log, &es, statusBuilder, nextClusterResources, currentAutoscalingStatus); err != nil { return reconcile.Result{}, tracing.CaptureError(ctx, err) } @@ -200,12 +200,12 @@ func canDecide(log logr.Logger, requiredCapacity esclient.AutoscalingCapacityInf result := true if (requiredCapacity.Node.Memory != nil || requiredCapacity.Total.Memory != nil) && !spec.IsMemoryDefined() { log.Error(fmt.Errorf("min and max memory must be specified"), "Min and max memory must be specified", "policy", spec.Name) - statusBuilder.ForPolicy(spec.Name).WithEvent(status.MemoryRequired, "Min and max memory must be specified") + statusBuilder.ForPolicy(spec.Name).RecordEvent(status.MemoryRequired, "Min and max memory must be specified") result = false } if (requiredCapacity.Node.Storage != nil || requiredCapacity.Total.Storage != nil) && !spec.IsStorageDefined() { log.Error(fmt.Errorf("min and max memory must be specified"), "Min and max storage must be specified", "policy", spec.Name) - statusBuilder.ForPolicy(spec.Name).WithEvent(status.StorageRequired, "Min and max storage must be specified") + statusBuilder.ForPolicy(spec.Name).RecordEvent(status.StorageRequired, "Min and max storage must be specified") result = false } return result @@ -214,8 +214,8 @@ func canDecide(log logr.Logger, requiredCapacity esclient.AutoscalingCapacityInf // doOfflineReconciliation runs an autoscaling reconciliation if the autoscaling API is not ready (yet). func (r *ReconcileElasticsearch) doOfflineReconciliation( ctx context.Context, - actualAutoscalingStatus status.Status, - namedTiers esv1.AutoscaledNodeSets, + currentAutoscalingStatus status.Status, + autoscaledNodeSets esv1.AutoscaledNodeSets, autoscalingSpec esv1.AutoscalingSpec, es esv1.Elasticsearch, results *reconciler.Results, @@ -227,11 +227,11 @@ func (r *ReconcileElasticsearch) doOfflineReconciliation( var clusterNodeSetsResources resources.ClusterResources // Elasticsearch is not reachable, we still want to ensure that min. requirements are set for _, autoscalingSpec := range autoscalingSpec.AutoscalingPolicySpecs { - nodeSets, exists := namedTiers[autoscalingSpec.Name] + nodeSets, exists := autoscaledNodeSets[autoscalingSpec.Name] if !exists { return results.WithError(fmt.Errorf("no nodeSets for tier %s", autoscalingSpec.Name)).Aggregate() } - nodeSetsResources := autoscaler.GetOfflineNodeSetsResources(log, nodeSets.Names(), autoscalingSpec, actualAutoscalingStatus) + nodeSetsResources := autoscaler.GetOfflineNodeSetsResources(log, nodeSets.Names(), autoscalingSpec, currentAutoscalingStatus) clusterNodeSetsResources = append(clusterNodeSetsResources, nodeSetsResources) } @@ -239,7 +239,7 @@ func (r *ReconcileElasticsearch) doOfflineReconciliation( status.EmitEvents(es, r.recorder, statusBuilder.Build()) // Update the Elasticsearch manifest - if err := reconcileElasticsearch(log, &es, statusBuilder, clusterNodeSetsResources, actualAutoscalingStatus); err != nil { + if err := reconcileElasticsearch(log, &es, statusBuilder, clusterNodeSetsResources, currentAutoscalingStatus); err != nil { return reconcile.Result{}, tracing.CaptureError(ctx, err) } diff --git a/pkg/controller/autoscaling/elasticsearch/reconcile.go b/pkg/controller/autoscaling/elasticsearch/reconcile.go index 591ccdfa0f..849c4c4214 100644 --- a/pkg/controller/autoscaling/elasticsearch/reconcile.go +++ b/pkg/controller/autoscaling/elasticsearch/reconcile.go @@ -31,7 +31,7 @@ func reconcileElasticsearch( es *esv1.Elasticsearch, statusBuilder *status.AutoscalingStatusBuilder, nextClusterResources resources.ClusterResources, - actualAutoscalingStatus status.Status, + currentAutoscalingStatus status.Status, ) error { nextResourcesByNodeSet := nextClusterResources.ByNodeSet() for i := range es.Spec.NodeSets { @@ -45,7 +45,7 @@ func reconcileElasticsearch( container, containers := removeContainer(esv1.ElasticsearchContainerName, es.Spec.NodeSets[i].PodTemplate.Spec.Containers) // Create a copy to compare if some changes have been made. - actualContainer := container.DeepCopy() + currentContainer := container.DeepCopy() if container == nil { container = &corev1.Container{ Name: esv1.ElasticsearchContainerName, @@ -84,13 +84,13 @@ func reconcileElasticsearch( // Update the NodeSet es.Spec.NodeSets[i].PodTemplate.Spec.Containers = containers - if !apiequality.Semantic.DeepEqual(actualContainer, container) { + if !apiequality.Semantic.DeepEqual(currentContainer, container) { log.V(1).Info("Updating nodeset with resources", "nodeset", name, "resources", nextClusterResources) } } // Update autoscaling status - return status.UpdateAutoscalingStatus(es, statusBuilder, nextClusterResources, actualAutoscalingStatus) + return status.UpdateAutoscalingStatus(es, statusBuilder, nextClusterResources, currentAutoscalingStatus) } func newVolumeClaimTemplate(storageQuantity resource.Quantity, nodeSet esv1.NodeSet) ([]corev1.PersistentVolumeClaim, error) { diff --git a/pkg/controller/autoscaling/elasticsearch/resources/resources.go b/pkg/controller/autoscaling/elasticsearch/resources/resources.go index d6d342c56d..4a8c2707f7 100644 --- a/pkg/controller/autoscaling/elasticsearch/resources/resources.go +++ b/pkg/controller/autoscaling/elasticsearch/resources/resources.go @@ -249,7 +249,7 @@ type NodeSetResources struct { *NodeSetsResources } -// SameResources compares the resources allocated to the nodes in a named tier and returns true +// SameResources compares the resources allocated to 2 set of node sets in an autoscaling policy and returns true // if they are equal. func (ntr NodeSetsResources) SameResources(other NodeSetsResources) bool { thisByName := ntr.NodeSetNodeCount.ByNodeSet() diff --git a/pkg/controller/autoscaling/elasticsearch/status/actual.go b/pkg/controller/autoscaling/elasticsearch/status/actual.go index 91c34b6872..c2645a87d5 100644 --- a/pkg/controller/autoscaling/elasticsearch/status/actual.go +++ b/pkg/controller/autoscaling/elasticsearch/status/actual.go @@ -19,11 +19,11 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -// ImportExistingResources attempts to infer the resources to use in a tier if an autoscaling policy is not in the Status. +// ImportExistingResources attempts to infer the resources to use in a tier if an autoscaling policy is not in the Status. // It can be the case if: // * The cluster was manually managed and the user wants to manage resources with the autoscaling controller. In that case // we want to be able to set some good default resources even if the autoscaling API is not responding. -// * The Elasticsearch has been replaced and the status annotation has been lost. +// * The Elasticsearch resource has been replaced and the status annotation has been lost in the process. func (s *Status) ImportExistingResources( log logr.Logger, c k8s.Client, @@ -31,7 +31,7 @@ func (s *Status) ImportExistingResources( namedTiers esv1.AutoscaledNodeSets, ) error { for _, autoscalingPolicy := range as.AutoscalingPolicySpecs { - if _, inStatus := s.GetNamedTierResources(autoscalingPolicy.Name); inStatus { + if _, inStatus := s.CurrentResourcesForPolicy(autoscalingPolicy.Name); inStatus { // This autoscaling policy is already managed and we have some resources in the Status. continue } diff --git a/pkg/controller/autoscaling/elasticsearch/status/status.go b/pkg/controller/autoscaling/elasticsearch/status/status.go index a082a8fd5d..153fbce2f7 100644 --- a/pkg/controller/autoscaling/elasticsearch/status/status.go +++ b/pkg/controller/autoscaling/elasticsearch/status/status.go @@ -24,7 +24,7 @@ const ( ) type Status struct { - // PolicyStatus is used to expose state messages to user or external system + // AutoscalingPolicyStatuses is used to expose state messages to user or external system AutoscalingPolicyStatuses []AutoscalingPolicyStatus `json:"policies"` } @@ -33,7 +33,7 @@ type AutoscalingPolicyStatus struct { Name string `json:"name"` // NodeSetNodeCount holds the number of nodes for each nodeSet. NodeSetNodeCount resources.NodeSetNodeCountList `json:"nodeSets"` - // ResourcesSpecification holds the resource values common to all the nodeSet managed by a same autoscaling policy. + // ResourcesSpecification holds the resource values common to all the nodeSets managed by a same autoscaling policy. // Only the resources managed by the autoscaling controller are saved in the Status. ResourcesSpecification resources.NodeResources `json:"resources"` // PolicyStates may contain various messages regarding the current state of this autoscaling policy. @@ -42,7 +42,7 @@ type AutoscalingPolicyStatus struct { LastModificationTime metav1.Time `json:"lastModificationTime"` } -func (s *Status) GetNamedTierResources(policyName string) (resources.NodeSetsResources, bool) { +func (s *Status) CurrentResourcesForPolicy(policyName string) (resources.NodeSetsResources, bool) { for _, policyStatus := range s.AutoscalingPolicyStatuses { if policyStatus.Name == policyName { return resources.NodeSetsResources{ @@ -55,7 +55,7 @@ func (s *Status) GetNamedTierResources(policyName string) (resources.NodeSetsRes return resources.NodeSetsResources{}, false } -func (s *Status) GetLastModificationTime(policyName string) (metav1.Time, bool) { +func (s *Status) LastModificationTime(policyName string) (metav1.Time, bool) { for _, policyState := range s.AutoscalingPolicyStatuses { if policyState.Name == policyName { return policyState.LastModificationTime, true @@ -108,8 +108,8 @@ func (psb *AutoscalingPolicyStatusBuilder) SetLastModificationTime(lastModificat return psb } -// WithEvent records a new event (type + message) for the tier. -func (psb *AutoscalingPolicyStatusBuilder) WithEvent(stateType PolicyStateType, message string) *AutoscalingPolicyStatusBuilder { +// RecordEvent records a new event (type + message) for the tier. +func (psb *AutoscalingPolicyStatusBuilder) RecordEvent(stateType PolicyStateType, message string) *AutoscalingPolicyStatusBuilder { if policyState, ok := psb.states[stateType]; ok { policyState.Messages = append(policyState.Messages, message) psb.states[stateType] = policyState @@ -130,28 +130,28 @@ type PolicyState struct { } type AutoscalingStatusBuilder struct { - policyStatesBuilder map[string]*AutoscalingPolicyStatusBuilder + policyStatusBuilder map[string]*AutoscalingPolicyStatusBuilder } func NewAutoscalingStatusBuilder() *AutoscalingStatusBuilder { return &AutoscalingStatusBuilder{ - policyStatesBuilder: make(map[string]*AutoscalingPolicyStatusBuilder), + policyStatusBuilder: make(map[string]*AutoscalingPolicyStatusBuilder), } } func (psb *AutoscalingStatusBuilder) ForPolicy(policyName string) *AutoscalingPolicyStatusBuilder { - if value, ok := psb.policyStatesBuilder[policyName]; ok { + if value, ok := psb.policyStatusBuilder[policyName]; ok { return value } policyStatusBuilder := NewAutoscalingPolicyStatusBuilder(policyName) - psb.policyStatesBuilder[policyName] = policyStatusBuilder + psb.policyStatusBuilder[policyName] = policyStatusBuilder return policyStatusBuilder } func (psb *AutoscalingStatusBuilder) Build() Status { - policyStates := make([]AutoscalingPolicyStatus, len(psb.policyStatesBuilder)) + policyStates := make([]AutoscalingPolicyStatus, len(psb.policyStatusBuilder)) i := 0 - for _, policyStateBuilder := range psb.policyStatesBuilder { + for _, policyStateBuilder := range psb.policyStatusBuilder { policyStates[i] = policyStateBuilder.Build() i++ } @@ -161,7 +161,7 @@ func (psb *AutoscalingStatusBuilder) Build() Status { } } -func GetStatus(es esv1.Elasticsearch) (Status, error) { +func From(es esv1.Elasticsearch) (Status, error) { status := Status{} if es.Annotations == nil { return status, nil @@ -178,23 +178,23 @@ func UpdateAutoscalingStatus( es *esv1.Elasticsearch, statusBuilder *AutoscalingStatusBuilder, nextClusterResources resources.ClusterResources, - actualAutoscalingStatus Status, + currentAutoscalingStatus Status, ) error { // Update the timestamp on tiers resources now := metav1.Now() - for _, nextNodeSetResource := range nextClusterResources { + for _, nextNodeSetResources := range nextClusterResources { // Save the resources in the status - statusBuilder.ForPolicy(nextNodeSetResource.Name).SetNamedTierResources(nextNodeSetResource) + statusBuilder.ForPolicy(nextNodeSetResources.Name).SetNamedTierResources(nextNodeSetResources) // Restore the previous timestamp - previousTimestamp, ok := actualAutoscalingStatus.GetLastModificationTime(nextNodeSetResource.Name) + previousTimestamp, ok := currentAutoscalingStatus.LastModificationTime(nextNodeSetResources.Name) if ok { - statusBuilder.ForPolicy(nextNodeSetResource.Name).SetLastModificationTime(previousTimestamp) + statusBuilder.ForPolicy(nextNodeSetResources.Name).SetLastModificationTime(previousTimestamp) } - actualNodeSetResource, ok := actualAutoscalingStatus.GetNamedTierResources(nextNodeSetResource.Name) - if !ok || !actualNodeSetResource.SameResources(nextNodeSetResource) { - statusBuilder.ForPolicy(nextNodeSetResource.Name).SetLastModificationTime(now) + currentNodeSetResources, ok := currentAutoscalingStatus.CurrentResourcesForPolicy(nextNodeSetResources.Name) + if !ok || !currentNodeSetResources.SameResources(nextNodeSetResources) { + statusBuilder.ForPolicy(nextNodeSetResources.Name).SetLastModificationTime(now) } } diff --git a/pkg/controller/elasticsearch/driver/autoscaling.go b/pkg/controller/elasticsearch/driver/autoscaling.go index 3a2f813577..9c08ff85ed 100644 --- a/pkg/controller/elasticsearch/driver/autoscaling.go +++ b/pkg/controller/elasticsearch/driver/autoscaling.go @@ -21,7 +21,7 @@ func autoscaledResourcesSynced(es esv1.Elasticsearch) (bool, error) { if err != nil { return false, err } - autoscalingStatus, err := status.GetStatus(es) + autoscalingStatus, err := status.From(es) if err != nil { return false, err } @@ -37,7 +37,7 @@ func autoscaledResourcesSynced(es esv1.Elasticsearch) (bool, error) { continue } - s, ok := autoscalingStatus.GetNamedTierResources(nodeSetAutoscalingSpec.Name) + s, ok := autoscalingStatus.CurrentResourcesForPolicy(nodeSetAutoscalingSpec.Name) if !ok { log.Info("NodeSet managed by the autoscaling controller but not found in status", "nodeset", nodeSet.Name, From 858446a74ec5e8c7febfcc01407924855547aa54 Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Thu, 4 Feb 2021 14:43:27 +0100 Subject: [PATCH 03/19] Fix compilation error --- pkg/controller/autoscaling/elasticsearch/controller_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/controller/autoscaling/elasticsearch/controller_test.go b/pkg/controller/autoscaling/elasticsearch/controller_test.go index 9c90b1dc22..4fd680656d 100644 --- a/pkg/controller/autoscaling/elasticsearch/controller_test.go +++ b/pkg/controller/autoscaling/elasticsearch/controller_test.go @@ -291,8 +291,7 @@ type fakeEsClient struct { t *testing.T esclient.Client - autoscalingPolicies AutoscalingCapacityResult - + autoscalingPolicies esclient.AutoscalingCapacityResult policiesCleaned bool errorOnDeleteAutoscalingAutoscalingPolicies bool updatedPolicies map[string]esv1.AutoscalingPolicy From 7486448b8421747178cb727130fd52ff6c0a0806 Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Thu, 4 Feb 2021 14:53:47 +0100 Subject: [PATCH 04/19] Elasticsearch is already in the autoscaling spec --- .../elasticsearch/autoscaler/vertical.go | 8 ++--- .../autoscaling/elasticsearch/driver.go | 36 +++++++++---------- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go index 9a4f4ede7f..79eefa836d 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go @@ -23,7 +23,6 @@ func (ctx *Context) nodeResources(minNodesCount int64, currentStorage resource.Q // Compute desired memory quantity for the nodes managed by this AutoscalingPolicySpec. if !ctx.RequiredCapacity.Node.Memory.IsEmpty() { memoryRequest := ctx.getResourceValue( - ctx.AutoscalingSpec.Name, "memory", ctx.RequiredCapacity.Node.Memory, ctx.RequiredCapacity.Total.Memory, @@ -37,7 +36,6 @@ func (ctx *Context) nodeResources(minNodesCount int64, currentStorage resource.Q // Compute desired storage quantity for the nodes managed by this AutoscalingPolicySpec. if !ctx.RequiredCapacity.Node.Storage.IsEmpty() { storageRequest := ctx.getResourceValue( - ctx.AutoscalingSpec.Name, "storage", ctx.RequiredCapacity.Node.Storage, ctx.RequiredCapacity.Total.Storage, @@ -73,7 +71,7 @@ func (ctx *Context) nodeResources(minNodesCount int64, currentStorage resource.Q // calculated according to the required value from the Elasticsearch autoscaling API and the resource constraints (limits) // set by the user in the autoscaling specification. func (ctx *Context) getResourceValue( - autoscalingPolicyName, resourceType string, + resourceType string, nodeRequired *client.AutoscalingCapacity, // node required capacity as returned by the Elasticsearch API totalRequired *client.AutoscalingCapacity, // tier required capacity as returned by the Elasticsearch API, considered as optional minNodesCount int64, // the minimum of nodes that will be deployed @@ -91,13 +89,13 @@ func (ctx *Context) getResourceValue( ctx.Log.Error( err, err.Error(), "scope", "node", - "policy", autoscalingPolicyName, + "policy", ctx.AutoscalingSpec.Name, "required_"+resourceType, nodeRequired, "max_allowed_memory", max.Value(), ) // Also update the autoscaling status accordingly ctx.StatusBuilder. - ForPolicy(autoscalingPolicyName). + ForPolicy(ctx.AutoscalingSpec.Name). RecordEvent( status.VerticalScalingLimitReached, fmt.Sprintf("Node required %s %d is greater than max allowed: %d", resourceType, nodeRequired, max.Value()), diff --git a/pkg/controller/autoscaling/elasticsearch/driver.go b/pkg/controller/autoscaling/elasticsearch/driver.go index f61767a4cd..f38d95b8db 100644 --- a/pkg/controller/autoscaling/elasticsearch/driver.go +++ b/pkg/controller/autoscaling/elasticsearch/driver.go @@ -42,16 +42,16 @@ func (r *ReconcileElasticsearch) reconcileInternal( "error.message", err.Error(), ) } - return r.doOfflineReconciliation(ctx, autoscalingStatus, autoscaledNodeSets, autoscalingSpec, es, results) + return r.doOfflineReconciliation(ctx, autoscalingStatus, autoscaledNodeSets, autoscalingSpec, results) } // Cluster is expected to be online and reachable, attempt a call to the autoscaling API. // If an error occurs we still attempt an offline reconciliation to enforce limits set by the user. - result, err := r.attemptOnlineReconciliation(ctx, autoscalingStatus, autoscaledNodeSets, autoscalingSpec, es, results) + result, err := r.attemptOnlineReconciliation(ctx, autoscalingStatus, autoscaledNodeSets, autoscalingSpec, results) if err != nil { log.Error(tracing.CaptureError(ctx, err), "autoscaling online reconciliation failed") // Attempt an offline reconciliation - if _, err := r.doOfflineReconciliation(ctx, autoscalingStatus, autoscaledNodeSets, autoscalingSpec, es, results); err != nil { + if _, err := r.doOfflineReconciliation(ctx, autoscalingStatus, autoscaledNodeSets, autoscalingSpec, results); err != nil { log.Error(tracing.CaptureError(ctx, err), "autoscaling offline reconciliation failed") } } @@ -76,39 +76,38 @@ func (r *ReconcileElasticsearch) isElasticsearchReachable(ctx context.Context, e return esReachable, nil } -// attemptOnlineReconciliation attempts an online autoscaling reconciliation with a call the Elasticsearch autoscaling API. +// attemptOnlineReconciliation attempts an online autoscaling reconciliation with a call to the Elasticsearch autoscaling API. func (r *ReconcileElasticsearch) attemptOnlineReconciliation( ctx context.Context, currentAutoscalingStatus status.Status, autoscaledNodeSets esv1.AutoscaledNodeSets, - autoscalingSpecs esv1.AutoscalingSpec, - es esv1.Elasticsearch, + autoscalingSpec esv1.AutoscalingSpec, results *reconciler.Results, ) (reconcile.Result, error) { span, _ := apm.StartSpan(ctx, "online_reconciliation", tracing.SpanTypeApp) defer span.End() log := logconf.FromContext(ctx) log.V(1).Info("Starting online autoscaling reconciliation") - esClient, err := r.esClientProvider(ctx, r.Client, r.Dialer, es) + esClient, err := r.esClientProvider(ctx, r.Client, r.Dialer, autoscalingSpec.Elasticsearch) if err != nil { return reconcile.Result{}, err } // Update Machine Learning settings - mlNodes, maxMemory := autoscalingSpecs.GetMLNodesSettings() + mlNodes, maxMemory := autoscalingSpec.GetMLNodesSettings() if err := esClient.UpdateMLNodesSettings(ctx, mlNodes, maxMemory); err != nil { log.Error(err, "Error while updating the ML settings") return reconcile.Result{}, err } // Update autoscaling policies in Elasticsearch - if err := updatePolicies(ctx, log, autoscalingSpecs, esClient); err != nil { + if err := updatePolicies(ctx, log, autoscalingSpec, esClient); err != nil { log.Error(err, "Error while updating the autoscaling policies") return reconcile.Result{}, err } // Get capacity requirements from the Elasticsearch autoscaling capacity API - decisions, err := esClient.GetAutoscalingCapacity(ctx) + requiredCapacity, err := esClient.GetAutoscalingCapacity(ctx) if err != nil { return reconcile.Result{}, err } @@ -120,7 +119,7 @@ func (r *ReconcileElasticsearch) attemptOnlineReconciliation( var nextClusterResources resources.ClusterResources // For each autoscaling policy we compute the resources to be applied to the related nodeSets. - for _, autoscalingPolicy := range autoscalingSpecs.AutoscalingPolicySpecs { + for _, autoscalingPolicy := range autoscalingSpec.AutoscalingPolicySpecs { // Get the currentNodeSets nodeSetList, exists := autoscaledNodeSets[autoscalingPolicy.Name] if !exists { @@ -134,7 +133,7 @@ func (r *ReconcileElasticsearch) attemptOnlineReconciliation( // Get the decision from the Elasticsearch API var nodeSetsResources resources.NodeSetsResources - switch capacity, hasCapacity := decisions.Policies[autoscalingPolicy.Name]; hasCapacity && !capacity.RequiredCapacity.IsEmpty() { + switch capacity, hasCapacity := requiredCapacity.Policies[autoscalingPolicy.Name]; hasCapacity && !capacity.RequiredCapacity.IsEmpty() { case false: // We didn't receive a decision for this tier, or the decision is empty. We can only ensure that resources are within the allowed ranges. log.V(1).Info( @@ -171,10 +170,10 @@ func (r *ReconcileElasticsearch) attemptOnlineReconciliation( } // Emit the K8S events - status.EmitEvents(es, r.recorder, statusBuilder.Build()) + status.EmitEvents(autoscalingSpec.Elasticsearch, r.recorder, statusBuilder.Build()) // Update the Elasticsearch resource with the calculated resources. - if err := reconcileElasticsearch(log, &es, statusBuilder, nextClusterResources, currentAutoscalingStatus); err != nil { + if err := reconcileElasticsearch(log, &autoscalingSpec.Elasticsearch, statusBuilder, nextClusterResources, currentAutoscalingStatus); err != nil { return reconcile.Result{}, tracing.CaptureError(ctx, err) } @@ -183,7 +182,7 @@ func (r *ReconcileElasticsearch) attemptOnlineReconciliation( } // Apply the update Elasticsearch manifest - if err := r.Client.Update(context.Background(), &es); err != nil { + if err := r.Client.Update(context.Background(), &autoscalingSpec.Elasticsearch); err != nil { if apierrors.IsConflict(err) { return results.WithResult(reconcile.Result{Requeue: true}).Aggregate() } @@ -217,7 +216,6 @@ func (r *ReconcileElasticsearch) doOfflineReconciliation( currentAutoscalingStatus status.Status, autoscaledNodeSets esv1.AutoscaledNodeSets, autoscalingSpec esv1.AutoscalingSpec, - es esv1.Elasticsearch, results *reconciler.Results, ) (reconcile.Result, error) { defer tracing.Span(&ctx)() @@ -236,15 +234,15 @@ func (r *ReconcileElasticsearch) doOfflineReconciliation( } // Emit the K8S events - status.EmitEvents(es, r.recorder, statusBuilder.Build()) + status.EmitEvents(autoscalingSpec.Elasticsearch, r.recorder, statusBuilder.Build()) // Update the Elasticsearch manifest - if err := reconcileElasticsearch(log, &es, statusBuilder, clusterNodeSetsResources, currentAutoscalingStatus); err != nil { + if err := reconcileElasticsearch(log, &autoscalingSpec.Elasticsearch, statusBuilder, clusterNodeSetsResources, currentAutoscalingStatus); err != nil { return reconcile.Result{}, tracing.CaptureError(ctx, err) } // Apply the updated Elasticsearch manifest - if err := r.Client.Update(context.Background(), &es); err != nil { + if err := r.Client.Update(context.Background(), &autoscalingSpec.Elasticsearch); err != nil { if apierrors.IsConflict(err) { return results.WithResult(reconcile.Result{Requeue: true}).Aggregate() } From 26b628728d005b5baec777b56b854df79c33e465 Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Thu, 4 Feb 2021 15:12:18 +0100 Subject: [PATCH 05/19] Remove references to named tiers --- .../elasticsearch/autoscaler/offline.go | 42 +++++++++---------- .../elasticsearch/resources/resources.go | 8 ---- .../elasticsearch/resources/resources_test.go | 2 +- .../elasticsearch/status/actual.go | 32 +++++++------- .../elasticsearch/status/actual_test.go | 32 +++++++------- .../elasticsearch/status/status.go | 14 +++---- 6 files changed, 61 insertions(+), 69 deletions(-) diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go index 20e6ec288f..90e4765283 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go @@ -23,18 +23,18 @@ func GetOfflineNodeSetsResources( autoscalingSpec esv1.AutoscalingPolicySpec, currentAutoscalingStatus status.Status, ) resources.NodeSetsResources { - currentNamedTierResources, hasNamedTierResources := currentAutoscalingStatus.CurrentResourcesForPolicy(autoscalingSpec.Name) + currentNodeSetsResources, hasNodeSetsResources := currentAutoscalingStatus.CurrentResourcesForPolicy(autoscalingSpec.Name) var nodeSetsResources resources.NodeSetsResources var expectedNodeCount int32 - if !hasNamedTierResources { + if !hasNodeSetsResources { // There's no current status for this nodeSet, this happens when the Elasticsearch cluster does not exist. // In that case we create a new one from the minimum values provided by the user. nodeSetsResources = newMinNodeSetResources(autoscalingSpec, nodeSets) } else { // The status contains some resource values for the NodeSets managed by this autoscaling policy, let's reuse them. - nodeSetsResources = nodeSetResourcesFromStatus(currentAutoscalingStatus, currentNamedTierResources, autoscalingSpec, nodeSets) - for _, nodeSet := range currentNamedTierResources.NodeSetNodeCount { + nodeSetsResources = nodeSetResourcesFromStatus(currentAutoscalingStatus, currentNodeSetsResources, autoscalingSpec, nodeSets) + for _, nodeSet := range currentNodeSetsResources.NodeSetNodeCount { expectedNodeCount += nodeSet.NodeCount } } @@ -69,53 +69,53 @@ func GetOfflineNodeSetsResources( // If user removed the limits while offline we are assuming that it wants to take back control on the resources. func nodeSetResourcesFromStatus( currentAutoscalingStatus status.Status, - currentNamedTierResources resources.NodeSetsResources, + currentNodeSetsResources resources.NodeSetsResources, autoscalingSpec esv1.AutoscalingPolicySpec, nodeSets []string, ) resources.NodeSetsResources { - namedTierResources := resources.NewNodeSetsResources(autoscalingSpec.Name, nodeSets) + nodeSetsResources := resources.NewNodeSetsResources(autoscalingSpec.Name, nodeSets) // Ensure memory settings are in the allowed limit range. if autoscalingSpec.IsMemoryDefined() { - if currentNamedTierResources.HasRequest(corev1.ResourceMemory) { - namedTierResources.SetRequest( + if currentNodeSetsResources.HasRequest(corev1.ResourceMemory) { + nodeSetsResources.SetRequest( corev1.ResourceMemory, - adjustQuantity(currentNamedTierResources.GetRequest(corev1.ResourceMemory), autoscalingSpec.Memory.Min, autoscalingSpec.Memory.Max), + adjustQuantity(currentNodeSetsResources.GetRequest(corev1.ResourceMemory), autoscalingSpec.Memory.Min, autoscalingSpec.Memory.Max), ) } else { - namedTierResources.SetRequest(corev1.ResourceMemory, autoscalingSpec.Memory.Min.DeepCopy()) + nodeSetsResources.SetRequest(corev1.ResourceMemory, autoscalingSpec.Memory.Min.DeepCopy()) } } // Ensure CPU settings are in the allowed limit range. if autoscalingSpec.IsCPUDefined() { - if currentNamedTierResources.HasRequest(corev1.ResourceCPU) { - namedTierResources.SetRequest( + if currentNodeSetsResources.HasRequest(corev1.ResourceCPU) { + nodeSetsResources.SetRequest( corev1.ResourceCPU, - adjustQuantity(currentNamedTierResources.GetRequest(corev1.ResourceCPU), autoscalingSpec.CPU.Min, autoscalingSpec.CPU.Max), + adjustQuantity(currentNodeSetsResources.GetRequest(corev1.ResourceCPU), autoscalingSpec.CPU.Min, autoscalingSpec.CPU.Max), ) } else { - namedTierResources.SetRequest(corev1.ResourceCPU, autoscalingSpec.CPU.Min.DeepCopy()) + nodeSetsResources.SetRequest(corev1.ResourceCPU, autoscalingSpec.CPU.Min.DeepCopy()) } } // Ensure storage capacity is set - namedTierResources.SetRequest(corev1.ResourceStorage, getStorage(autoscalingSpec, currentAutoscalingStatus)) - return namedTierResources + nodeSetsResources.SetRequest(corev1.ResourceStorage, getStorage(autoscalingSpec, currentAutoscalingStatus)) + return nodeSetsResources } // newMinNodeSetResources returns a NodeSetResources with minimums values func newMinNodeSetResources(autoscalingSpec esv1.AutoscalingPolicySpec, nodeSets []string) resources.NodeSetsResources { - namedTierResources := resources.NewNodeSetsResources(autoscalingSpec.Name, nodeSets) + nodeSetsResources := resources.NewNodeSetsResources(autoscalingSpec.Name, nodeSets) if autoscalingSpec.IsCPUDefined() { - namedTierResources.SetRequest(corev1.ResourceCPU, autoscalingSpec.CPU.Min.DeepCopy()) + nodeSetsResources.SetRequest(corev1.ResourceCPU, autoscalingSpec.CPU.Min.DeepCopy()) } if autoscalingSpec.IsMemoryDefined() { - namedTierResources.SetRequest(corev1.ResourceMemory, autoscalingSpec.Memory.Min.DeepCopy()) + nodeSetsResources.SetRequest(corev1.ResourceMemory, autoscalingSpec.Memory.Min.DeepCopy()) } if autoscalingSpec.IsStorageDefined() { - namedTierResources.SetRequest(corev1.ResourceStorage, autoscalingSpec.Storage.Min.DeepCopy()) + nodeSetsResources.SetRequest(corev1.ResourceStorage, autoscalingSpec.Storage.Min.DeepCopy()) } - return namedTierResources + return nodeSetsResources } // adjustQuantity ensures that a quantity is comprised between a min and a max. diff --git a/pkg/controller/autoscaling/elasticsearch/resources/resources.go b/pkg/controller/autoscaling/elasticsearch/resources/resources.go index 4a8c2707f7..ee67df4f68 100644 --- a/pkg/controller/autoscaling/elasticsearch/resources/resources.go +++ b/pkg/controller/autoscaling/elasticsearch/resources/resources.go @@ -281,11 +281,3 @@ func (cr ClusterResources) ByNodeSet() map[string]NodeSetResources { } return byNodeSet } - -func (cr ClusterResources) ByAutoscalingPolicy() map[string]NodeSetsResources { - byNamedTier := make(map[string]NodeSetsResources) - for _, namedTierResources := range cr { - byNamedTier[namedTierResources.Name] = namedTierResources - } - return byNamedTier -} diff --git a/pkg/controller/autoscaling/elasticsearch/resources/resources_test.go b/pkg/controller/autoscaling/elasticsearch/resources/resources_test.go index 181f38a3f5..964fbe29a9 100644 --- a/pkg/controller/autoscaling/elasticsearch/resources/resources_test.go +++ b/pkg/controller/autoscaling/elasticsearch/resources/resources_test.go @@ -113,7 +113,7 @@ func TestResourcesSpecification_MaxMerge(t *testing.T) { } } -func TestNamedTierResources_IsUsedBy(t *testing.T) { +func TestNodeSetsResources_IsUsedBy(t *testing.T) { type fields struct { Name string NodeSetNodeCount NodeSetNodeCountList diff --git a/pkg/controller/autoscaling/elasticsearch/status/actual.go b/pkg/controller/autoscaling/elasticsearch/status/actual.go index c2645a87d5..2d8132c3e6 100644 --- a/pkg/controller/autoscaling/elasticsearch/status/actual.go +++ b/pkg/controller/autoscaling/elasticsearch/status/actual.go @@ -19,7 +19,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -// ImportExistingResources attempts to infer the resources to use in a tier if an autoscaling policy is not in the Status. +// ImportExistingResources attempts to infer the resources to allocate to node sets if an autoscaling policy is not in the Status. // It can be the case if: // * The cluster was manually managed and the user wants to manage resources with the autoscaling controller. In that case // we want to be able to set some good default resources even if the autoscaling API is not responding. @@ -28,7 +28,7 @@ func (s *Status) ImportExistingResources( log logr.Logger, c k8s.Client, as esv1.AutoscalingSpec, - namedTiers esv1.AutoscaledNodeSets, + autoscaledNodeSets esv1.AutoscaledNodeSets, ) error { for _, autoscalingPolicy := range as.AutoscalingPolicySpecs { if _, inStatus := s.CurrentResourcesForPolicy(autoscalingPolicy.Name); inStatus { @@ -36,17 +36,17 @@ func (s *Status) ImportExistingResources( continue } // Get the nodeSets - nodeSetList, exists := namedTiers[autoscalingPolicy.Name] + nodeSetList, exists := autoscaledNodeSets[autoscalingPolicy.Name] if !exists { // Not supposed to happen with a proper validation in place, but we still want to report this error return fmt.Errorf("no nodeSet associated to autoscaling policy %s", autoscalingPolicy.Name) } - resources, err := namedTierResourcesFromStatefulSets(c, as.Elasticsearch, autoscalingPolicy, nodeSetList.Names()) + resources, err := nodeSetsResourcesResourcesFromStatefulSets(c, as.Elasticsearch, autoscalingPolicy, nodeSetList.Names()) if err != nil { return err } if resources == nil { - // No StatefulSet, the cluster or the tier might be a new one. + // No StatefulSet, the cluster or the autoscaling policy might be a new one. continue } log.Info("Importing resources from existing StatefulSets", @@ -66,14 +66,14 @@ func (s *Status) ImportExistingResources( return nil } -// namedTierResourcesFromStatefulSets creates NodeSetsResources from existing StatefulSets -func namedTierResourcesFromStatefulSets( +// nodeSetsResourcesResourcesFromStatefulSets creates NodeSetsResources from existing StatefulSets +func nodeSetsResourcesResourcesFromStatefulSets( c k8s.Client, es esv1.Elasticsearch, autoscalingPolicySpec esv1.AutoscalingPolicySpec, nodeSets []string, ) (*resources.NodeSetsResources, error) { - namedTierResources := resources.NodeSetsResources{ + nodeSetsResources := resources.NodeSetsResources{ Name: autoscalingPolicySpec.Name, } found := false @@ -97,7 +97,7 @@ func namedTierResourcesFromStatefulSets( } found = true - namedTierResources.NodeSetNodeCount = append(namedTierResources.NodeSetNodeCount, resources.NodeSetNodeCount{ + nodeSetsResources.NodeSetNodeCount = append(nodeSetsResources.NodeSetNodeCount, resources.NodeSetNodeCount{ Name: nodeSetName, NodeCount: getStatefulSetReplicas(statefulSet), }) @@ -108,12 +108,12 @@ func namedTierResourcesFromStatefulSets( return nil, err } if ssetStorageRequest != nil && autoscalingPolicySpec.IsStorageDefined() { - if namedTierResources.HasRequest(corev1.ResourceStorage) { - if ssetStorageRequest.Cmp(namedTierResources.GetRequest(corev1.ResourceStorage)) > 0 { - namedTierResources.SetRequest(corev1.ResourceStorage, *ssetStorageRequest) + if nodeSetsResources.HasRequest(corev1.ResourceStorage) { + if ssetStorageRequest.Cmp(nodeSetsResources.GetRequest(corev1.ResourceStorage)) > 0 { + nodeSetsResources.SetRequest(corev1.ResourceStorage, *ssetStorageRequest) } } else { - namedTierResources.SetRequest(corev1.ResourceStorage, *ssetStorageRequest) + nodeSetsResources.SetRequest(corev1.ResourceStorage, *ssetStorageRequest) } } @@ -123,16 +123,16 @@ func namedTierResourcesFromStatefulSets( continue } if autoscalingPolicySpec.IsMemoryDefined() { - namedTierResources.MaxMerge(container.Resources, corev1.ResourceMemory) + nodeSetsResources.MaxMerge(container.Resources, corev1.ResourceMemory) } if autoscalingPolicySpec.IsCPUDefined() { - namedTierResources.MaxMerge(container.Resources, corev1.ResourceCPU) + nodeSetsResources.MaxMerge(container.Resources, corev1.ResourceCPU) } } if !found { return nil, nil } - return &namedTierResources, nil + return &nodeSetsResources, nil } // getElasticsearchDataVolumeQuantity returns the volume claim quantity for the esv1.ElasticsearchDataVolumeName volume diff --git a/pkg/controller/autoscaling/elasticsearch/status/actual_test.go b/pkg/controller/autoscaling/elasticsearch/status/actual_test.go index b202678e8b..c4855cd84b 100644 --- a/pkg/controller/autoscaling/elasticsearch/status/actual_test.go +++ b/pkg/controller/autoscaling/elasticsearch/status/actual_test.go @@ -19,7 +19,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" ) -func TestNamedTierResourcesFromStatefulSets(t *testing.T) { +func TestNodeSetsResourcesResourcesFromStatefulSets(t *testing.T) { type args struct { statefulSets []runtime.Object es esv1.Elasticsearch @@ -27,10 +27,10 @@ func TestNamedTierResourcesFromStatefulSets(t *testing.T) { nodeSets []string } tests := []struct { - name string - args args - wantNamedTierResources *resources.NodeSetsResources - wantErr bool + name string + args args + wantNodeSetsResources *resources.NodeSetsResources + wantErr bool }{ { name: "No existing StatefulSet", @@ -42,7 +42,7 @@ func TestNamedTierResourcesFromStatefulSets(t *testing.T) { AutoscalingResources: esv1.AutoscalingResources{Storage: &esv1.QuantityRange{Min: resource.MustParse("7Gi"), Max: resource.MustParse("50Gi")}}}, nodeSets: []string{"nodeset-1", "nodeset-2"}, }, - wantNamedTierResources: nil, + wantNodeSetsResources: nil, }, { name: "Has existing resources only with storage", @@ -67,7 +67,7 @@ func TestNamedTierResourcesFromStatefulSets(t *testing.T) { AutoscalingResources: esv1.AutoscalingResources{Storage: &esv1.QuantityRange{Min: resource.MustParse("7Gi"), Max: resource.MustParse("50Gi")}}}, nodeSets: []string{"nodeset-1", "nodeset-2"}, }, - wantNamedTierResources: &resources.NodeSetsResources{ + wantNodeSetsResources: &resources.NodeSetsResources{ Name: "aspec", NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "nodeset-1", NodeCount: 3}, {Name: "nodeset-2", NodeCount: 2}}, NodeResources: resources.NodeResources{ @@ -108,7 +108,7 @@ func TestNamedTierResourcesFromStatefulSets(t *testing.T) { }, nodeSets: []string{"nodeset-1", "nodeset-2"}, }, - wantNamedTierResources: &resources.NodeSetsResources{ + wantNodeSetsResources: &resources.NodeSetsResources{ Name: "aspec", NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "nodeset-1", NodeCount: 3}, {Name: "nodeset-2", NodeCount: 2}}, NodeResources: resources.NodeResources{ @@ -150,7 +150,7 @@ func TestNamedTierResourcesFromStatefulSets(t *testing.T) { }, nodeSets: []string{"nodeset-1", "nodeset-2"}, }, - wantNamedTierResources: &resources.NodeSetsResources{ + wantNodeSetsResources: &resources.NodeSetsResources{ Name: "aspec", NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "nodeset-1", NodeCount: 3}, {Name: "nodeset-2", NodeCount: 2}}, NodeResources: resources.NodeResources{ @@ -181,8 +181,8 @@ func TestNamedTierResourcesFromStatefulSets(t *testing.T) { autoscalingPolicySpec: esv1.AutoscalingPolicySpec{NamedAutoscalingPolicy: esv1.NamedAutoscalingPolicy{Name: "aspec"}}, nodeSets: []string{"nodeset-1", "nodeset-2"}, }, - wantErr: true, - wantNamedTierResources: nil, + wantErr: true, + wantNodeSetsResources: nil, }, { name: "Not the default volume claims", @@ -212,7 +212,7 @@ func TestNamedTierResourcesFromStatefulSets(t *testing.T) { nodeSets: []string{"nodeset-1", "nodeset-2"}, }, wantErr: false, - wantNamedTierResources: &resources.NodeSetsResources{ + wantNodeSetsResources: &resources.NodeSetsResources{ Name: "aspec", NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "nodeset-1", NodeCount: 3}, {Name: "nodeset-2", NodeCount: 2}}, NodeResources: resources.NodeResources{ @@ -226,13 +226,13 @@ func TestNamedTierResourcesFromStatefulSets(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { c := k8s.NewFakeClient(tt.args.statefulSets...) - got, err := namedTierResourcesFromStatefulSets(c, tt.args.es, tt.args.autoscalingPolicySpec, tt.args.nodeSets) + got, err := nodeSetsResourcesResourcesFromStatefulSets(c, tt.args.es, tt.args.autoscalingPolicySpec, tt.args.nodeSets) if (err != nil) != tt.wantErr { - t.Errorf("namedTierResourcesFromStatefulSets() error = %v, wantErr %v", err, tt.wantErr) + t.Errorf("nodeSetsResourcesResourcesFromStatefulSets() error = %v, wantErr %v", err, tt.wantErr) return } - if !reflect.DeepEqual(got, tt.wantNamedTierResources) { - t.Errorf("namedTierResourcesFromStatefulSets() got = %v, want %v", got, tt.wantNamedTierResources) + if !reflect.DeepEqual(got, tt.wantNodeSetsResources) { + t.Errorf("nodeSetsResourcesResourcesFromStatefulSets() got = %v, want %v", got, tt.wantNodeSetsResources) } }) } diff --git a/pkg/controller/autoscaling/elasticsearch/status/status.go b/pkg/controller/autoscaling/elasticsearch/status/status.go index 153fbce2f7..d0e6c6ae5e 100644 --- a/pkg/controller/autoscaling/elasticsearch/status/status.go +++ b/pkg/controller/autoscaling/elasticsearch/status/status.go @@ -66,7 +66,7 @@ func (s *Status) LastModificationTime(policyName string) (metav1.Time, bool) { type AutoscalingPolicyStatusBuilder struct { policyName string - namedTierResources resources.NodeSetsResources + nodeSetsResources resources.NodeSetsResources lastModificationTime metav1.Time states map[PolicyStateType]PolicyState } @@ -90,16 +90,16 @@ func (psb *AutoscalingPolicyStatusBuilder) Build() AutoscalingPolicyStatus { } return AutoscalingPolicyStatus{ Name: psb.policyName, - NodeSetNodeCount: psb.namedTierResources.NodeSetNodeCount, - ResourcesSpecification: psb.namedTierResources.NodeResources, + NodeSetNodeCount: psb.nodeSetsResources.NodeSetNodeCount, + ResourcesSpecification: psb.nodeSetsResources.NodeResources, LastModificationTime: psb.lastModificationTime, PolicyStates: policyStates, } } -// SetNamedTierResources sets the compute resources associated to a tier. -func (psb *AutoscalingPolicyStatusBuilder) SetNamedTierResources(namedTierResources resources.NodeSetsResources) *AutoscalingPolicyStatusBuilder { - psb.namedTierResources = namedTierResources +// SetNodeSetsResources sets the compute resources associated to a tier. +func (psb *AutoscalingPolicyStatusBuilder) SetNodeSetsResources(nodeSetsResources resources.NodeSetsResources) *AutoscalingPolicyStatusBuilder { + psb.nodeSetsResources = nodeSetsResources return psb } @@ -184,7 +184,7 @@ func UpdateAutoscalingStatus( now := metav1.Now() for _, nextNodeSetResources := range nextClusterResources { // Save the resources in the status - statusBuilder.ForPolicy(nextNodeSetResources.Name).SetNamedTierResources(nextNodeSetResources) + statusBuilder.ForPolicy(nextNodeSetResources.Name).SetNodeSetsResources(nextNodeSetResources) // Restore the previous timestamp previousTimestamp, ok := currentAutoscalingStatus.LastModificationTime(nextNodeSetResources.Name) From 450dbe564919e885d24c03d4f954988e7a222cd5 Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Fri, 5 Feb 2021 10:26:09 +0100 Subject: [PATCH 06/19] Improve naming and comments --- .../autoscaling/elasticsearch/autoscaler/linear_scaler.go | 4 ++-- .../elasticsearch/autoscaler/linear_scaler_test.go | 6 +++--- .../autoscaling/elasticsearch/autoscaler/nodesets.go | 4 ++-- .../autoscaling/elasticsearch/autoscaler/vertical.go | 8 ++++---- .../autoscaling/elasticsearch/resources/resources.go | 4 ++-- .../autoscaling/elasticsearch/resources/resources_test.go | 6 +++--- pkg/controller/elasticsearch/driver/autoscaling.go | 6 +++--- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/linear_scaler.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/linear_scaler.go index 4bf9c94422..fd6fc45a9a 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/linear_scaler.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/linear_scaler.go @@ -66,8 +66,8 @@ func memoryFromStorage(requiredStorageCapacity resource.Quantity, storageRange, requiredMemoryCapacity := memoryRange.Min.Value() + requiredAdditionalMemoryCapacity // Round up memory to the next GB - requiredMemoryCapacity = roundUp(requiredMemoryCapacity, giga) - resourceMemoryAsGiga := resource.MustParse(fmt.Sprintf("%dGi", requiredMemoryCapacity/giga)) + requiredMemoryCapacity = roundUp(requiredMemoryCapacity, gibi) + resourceMemoryAsGiga := resource.MustParse(fmt.Sprintf("%dGi", requiredMemoryCapacity/gibi)) if resourceMemoryAsGiga.Cmp(memoryRange.Max) > 0 { resourceMemoryAsGiga = memoryRange.Max.DeepCopy() diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/linear_scaler_test.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/linear_scaler_test.go index 8c68ebff84..51005d3998 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/linear_scaler_test.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/linear_scaler_test.go @@ -53,7 +53,7 @@ func Test_memoryFromStorage(t *testing.T) { requiredStorageCapacity: q("2Gi"), autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithMemory("1Gi", "1500Mi").WithStorage("1Gi", "2Gi").Build(), }, - wantMemory: qPtr("1500Mi"), // keep the min. value + wantMemory: qPtr("1500Mi"), // keep the max. value }, { name: "Do not allocate more memory than max allowed II", @@ -61,7 +61,7 @@ func Test_memoryFromStorage(t *testing.T) { requiredStorageCapacity: q("1800Mi"), autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithMemory("1Gi", "1500Mi").WithStorage("1Gi", "2Gi").Build(), }, - wantMemory: qPtr("1500Mi"), // keep the min. value + wantMemory: qPtr("1500Mi"), // keep the max. value }, { name: "Allocate max of memory when it's possible", @@ -69,7 +69,7 @@ func Test_memoryFromStorage(t *testing.T) { requiredStorageCapacity: q("2Gi"), autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithMemory("1Gi", "2256Mi").WithStorage("1Gi", "2Gi").Build(), }, - wantMemory: qPtr("2256Mi"), // keep the min. value + wantMemory: qPtr("2256Mi"), // keep the max. value }, { name: "Half of the storage range should be translated to rounded value of half of the memory range", diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets.go index a6ace2247e..d8f83157c4 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets.go @@ -18,7 +18,7 @@ type FairNodesManager struct { nodeSetNodeCountList resources.NodeSetNodeCountList } -// sort sorts node sets by the value of the Count field, giving priority to node sets with less nodes. +// sort sorts node sets by the value of the Count field, giving priority to node sets with fewer nodes. // If several node sets have the same number of nodes they are sorted alphabetically. func (fnm *FairNodesManager) sort() { sort.SliceStable(fnm.nodeSetNodeCountList, func(i, j int) bool { @@ -42,7 +42,7 @@ func NewFairNodesManager(log logr.Logger, nodeSetNodeCount []resources.NodeSetNo // Priority is defined as the nodeSet with the lowest NodeCount value, or the first nodeSet in the alphabetical order if // several node sets have the same NodeCount value. func (fnm *FairNodesManager) AddNode() { - // Peak the first element, this is the one with the less nodes + // Peak the first element, this is the one with fewer nodes fnm.nodeSetNodeCountList[0].NodeCount++ // Ensure the set is sorted fnm.sort() diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go index 79eefa836d..a3a787e26c 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go @@ -14,7 +14,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" ) -var giga = int64(1024 * 1024 * 1024) +const gibi = int64(1024 * 1024 * 1024) // nodeResources computes the desired amount of memory and storage for a node managed by a given AutoscalingPolicySpec. func (ctx *Context) nodeResources(minNodesCount int64, currentStorage resource.Quantity) resources.NodeResources { @@ -116,7 +116,7 @@ func (ctx *Context) getResourceValue( } // Try to round up the Gb value - nodeResource = roundUp(nodeResource, giga) + nodeResource = roundUp(nodeResource, gibi) // Always ensure that the calculated resource quantity is at least equal to the min. limit provided by the user. if nodeResource < min.Value() { @@ -135,9 +135,9 @@ func (ctx *Context) getResourceValue( // resourceToQuantity attempts to convert a raw integer value into a human readable quantity. func resourceToQuantity(nodeResource int64) resource.Quantity { var nodeQuantity resource.Quantity - if nodeResource >= giga && nodeResource%giga == 0 { + if nodeResource >= gibi && nodeResource%gibi == 0 { // When it's possible we may want to express the memory with a "human readable unit" like the the Gi unit - nodeQuantity = resource.MustParse(fmt.Sprintf("%dGi", nodeResource/giga)) + nodeQuantity = resource.MustParse(fmt.Sprintf("%dGi", nodeResource/gibi)) } else { nodeQuantity = resource.NewQuantity(nodeResource, resource.DecimalSI).DeepCopy() } diff --git a/pkg/controller/autoscaling/elasticsearch/resources/resources.go b/pkg/controller/autoscaling/elasticsearch/resources/resources.go index ee67df4f68..5a50dad7ab 100644 --- a/pkg/controller/autoscaling/elasticsearch/resources/resources.go +++ b/pkg/controller/autoscaling/elasticsearch/resources/resources.go @@ -36,9 +36,9 @@ func NewNodeSetsResources(name string, nodeSetNames []string) NodeSetsResources // ClusterResources models the desired resources (CPU, memory, storage and number of nodes) for all the autoscaling policies in a cluster. type ClusterResources []NodeSetsResources -// IsUsedBy returns true if the resources assigned to a container in a NodeSet matches the one specified in the NodeSetsResources. +// Match returns true if the resources assigned to a container in a NodeSet matches the one specified in the NodeSetsResources. // It returns false if the container is not found in the NodeSet. -func (ntr NodeSetsResources) IsUsedBy(containerName string, nodeSet v1.NodeSet) (bool, error) { +func (ntr NodeSetsResources) Match(containerName string, nodeSet v1.NodeSet) (bool, error) { for _, nodeSetNodeCount := range ntr.NodeSetNodeCount { if nodeSetNodeCount.Name != nodeSet.Name { continue diff --git a/pkg/controller/autoscaling/elasticsearch/resources/resources_test.go b/pkg/controller/autoscaling/elasticsearch/resources/resources_test.go index 964fbe29a9..5369b741b0 100644 --- a/pkg/controller/autoscaling/elasticsearch/resources/resources_test.go +++ b/pkg/controller/autoscaling/elasticsearch/resources/resources_test.go @@ -269,13 +269,13 @@ func TestNodeSetsResources_IsUsedBy(t *testing.T) { NodeSetNodeCount: tt.fields.NodeSetNodeCount, NodeResources: tt.fields.ResourcesSpecification, } - got, err := ntr.IsUsedBy(esv1.ElasticsearchContainerName, tt.args.nodeSet) + got, err := ntr.Match(esv1.ElasticsearchContainerName, tt.args.nodeSet) if (err != nil) != tt.wantErr { - t.Errorf("NodeSetsResources.IsUsedBy() error = %v, wantErr %v", err, tt.wantErr) + t.Errorf("NodeSetsResources.Match() error = %v, wantErr %v", err, tt.wantErr) return } if got != tt.want { - t.Errorf("NodeSetsResources.IsUsedBy() = %v, want %v", got, tt.want) + t.Errorf("NodeSetsResources.Match() = %v, want %v", got, tt.want) } }) } diff --git a/pkg/controller/elasticsearch/driver/autoscaling.go b/pkg/controller/elasticsearch/driver/autoscaling.go index 9c08ff85ed..f875ba8154 100644 --- a/pkg/controller/elasticsearch/driver/autoscaling.go +++ b/pkg/controller/elasticsearch/driver/autoscaling.go @@ -37,21 +37,21 @@ func autoscaledResourcesSynced(es esv1.Elasticsearch) (bool, error) { continue } - s, ok := autoscalingStatus.CurrentResourcesForPolicy(nodeSetAutoscalingSpec.Name) + expectedNodeSetsResources, ok := autoscalingStatus.CurrentResourcesForPolicy(nodeSetAutoscalingSpec.Name) if !ok { log.Info("NodeSet managed by the autoscaling controller but not found in status", "nodeset", nodeSet.Name, ) return false, nil } - inSync, err := s.IsUsedBy(esv1.ElasticsearchContainerName, nodeSet) + inSync, err := expectedNodeSetsResources.Match(esv1.ElasticsearchContainerName, nodeSet) if err != nil { return false, err } if !inSync { log.Info("NodeSet managed by the autoscaling controller but not in sync", "nodeset", nodeSet.Name, - "expected", s.NodeResources, + "expected", expectedNodeSetsResources.NodeResources, ) return false, nil } From 3f462982f3064b976d5be1cfafe25be3fa8f6575 Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Fri, 5 Feb 2021 11:52:52 +0100 Subject: [PATCH 07/19] Remove FairNodesManager abstraction --- .../elasticsearch/autoscaler/horizontal.go | 26 +++---- .../elasticsearch/autoscaler/nodesets.go | 47 +++++-------- .../elasticsearch/autoscaler/nodesets_test.go | 70 +++++++++---------- .../elasticsearch/autoscaler/offline.go | 6 +- 4 files changed, 62 insertions(+), 87 deletions(-) diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go index 4a10fd47e3..449f653e08 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go @@ -16,24 +16,22 @@ import ( func (ctx *Context) scaleHorizontally( nodeCapacity resources.NodeResources, // resources for each node in the tier/policy, as computed by the vertical autoscaler. ) resources.NodeSetsResources { - minNodes := int(ctx.AutoscalingSpec.NodeCount.Min) - maxNodes := int(ctx.AutoscalingSpec.NodeCount.Max) totalRequiredCapacity := ctx.RequiredCapacity.Total // total required resources, at the tier level. - nodeToAdd := 0 + var nodeToAdd int32 // Scale horizontally to match memory requirements if !totalRequiredCapacity.Memory.IsZero() { nodeMemory := nodeCapacity.GetRequest(corev1.ResourceMemory) - nodeToAdd = ctx.getNodesToAdd(nodeMemory.Value(), totalRequiredCapacity.Memory.Value(), minNodes, maxNodes, string(corev1.ResourceMemory)) + nodeToAdd = ctx.getNodesToAdd(nodeMemory.Value(), totalRequiredCapacity.Memory.Value(), ctx.AutoscalingSpec.NodeCount.Min, ctx.AutoscalingSpec.NodeCount.Max, string(corev1.ResourceMemory)) } // Scale horizontally to match storage requirements if !totalRequiredCapacity.Storage.IsZero() { nodeStorage := nodeCapacity.GetRequest(corev1.ResourceStorage) - nodeToAdd = max(nodeToAdd, ctx.getNodesToAdd(nodeStorage.Value(), totalRequiredCapacity.Storage.Value(), minNodes, maxNodes, string(corev1.ResourceStorage))) + nodeToAdd = max(nodeToAdd, ctx.getNodesToAdd(nodeStorage.Value(), totalRequiredCapacity.Storage.Value(), ctx.AutoscalingSpec.NodeCount.Min, ctx.AutoscalingSpec.NodeCount.Max, string(corev1.ResourceStorage))) } - totalNodes := nodeToAdd + minNodes + totalNodes := nodeToAdd + ctx.AutoscalingSpec.NodeCount.Min ctx.Log.Info("Horizontal autoscaler", "policy", ctx.AutoscalingSpec.Name, "scope", "tier", "count", totalNodes, @@ -42,11 +40,7 @@ func (ctx *Context) scaleHorizontally( nodeSetsResources := resources.NewNodeSetsResources(ctx.AutoscalingSpec.Name, ctx.NodeSets.Names()) nodeSetsResources.NodeResources = nodeCapacity - fnm := NewFairNodesManager(ctx.Log, nodeSetsResources.NodeSetNodeCount) - for totalNodes > 0 { - fnm.AddNode() - totalNodes-- - } + distributeFairly(nodeSetsResources.NodeSetNodeCount, totalNodes) return nodeSetsResources } @@ -55,9 +49,9 @@ func (ctx *Context) scaleHorizontally( func (ctx *Context) getNodesToAdd( nodeResourceCapacity int64, // resource capacity of a single node, for example the memory of a node in the tier totalRequiredCapacity int64, // required capacity at the tier level - minNodes, maxNodes int, // min and max number of nodes in this tier, as specified by the user the autoscaling spec. + minNodes, maxNodes int32, // min and max number of nodes in this tier, as specified by the user the autoscaling spec. resourceName string, // used for logging and in events -) int { +) int32 { // minResourceQuantity is the resource quantity in the tier before scaling horizontally. minResourceQuantity := int64(minNodes) * nodeResourceCapacity // resourceDelta holds the resource needed to comply with what is requested by Elasticsearch. @@ -93,8 +87,8 @@ func (ctx *Context) getNodesToAdd( // getNodeDelta computes the nodes to be added given a delta (the additional amount of resource needed) // and the individual capacity a single node. -func getNodeDelta(delta, nodeCapacity int64) int { - nodeToAdd := 0 +func getNodeDelta(delta, nodeCapacity int64) int32 { + var nodeToAdd int32 if delta < 0 { return 0 } @@ -107,7 +101,7 @@ func getNodeDelta(delta, nodeCapacity int64) int { return nodeToAdd } -func max(a, b int) int { +func max(a, b int32) int32 { if a > b { return a } diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets.go index d8f83157c4..c9f8c5c697 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets.go @@ -9,41 +9,30 @@ import ( "strings" "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/resources" - "github.com/go-logr/logr" ) -// FairNodesManager helps to distribute nodes among several node sets whose belong to a same tier. -type FairNodesManager struct { - log logr.Logger - nodeSetNodeCountList resources.NodeSetNodeCountList +func distributeFairly(nodeSets resources.NodeSetNodeCountList, expectedNodeCount int32) { + if len(nodeSets) == 0 { + return + } + // sort the slice a first time + sortNodeSets(nodeSets) + for expectedNodeCount > 0 { + // Peak the first element, this is the one with fewer nodes + nodeSets[0].NodeCount++ + // Ensure the set is sorted + sortNodeSets(nodeSets) + expectedNodeCount-- + } } // sort sorts node sets by the value of the Count field, giving priority to node sets with fewer nodes. // If several node sets have the same number of nodes they are sorted alphabetically. -func (fnm *FairNodesManager) sort() { - sort.SliceStable(fnm.nodeSetNodeCountList, func(i, j int) bool { - if fnm.nodeSetNodeCountList[i].NodeCount == fnm.nodeSetNodeCountList[j].NodeCount { - return strings.Compare(fnm.nodeSetNodeCountList[i].Name, fnm.nodeSetNodeCountList[j].Name) < 0 +func sortNodeSets(nodeSetNodeCountList resources.NodeSetNodeCountList) { + sort.SliceStable(nodeSetNodeCountList, func(i, j int) bool { + if nodeSetNodeCountList[i].NodeCount == nodeSetNodeCountList[j].NodeCount { + return strings.Compare(nodeSetNodeCountList[i].Name, nodeSetNodeCountList[j].Name) < 0 } - return fnm.nodeSetNodeCountList[i].NodeCount < fnm.nodeSetNodeCountList[j].NodeCount + return nodeSetNodeCountList[i].NodeCount < nodeSetNodeCountList[j].NodeCount }) } - -func NewFairNodesManager(log logr.Logger, nodeSetNodeCount []resources.NodeSetNodeCount) FairNodesManager { - fnm := FairNodesManager{ - log: log, - nodeSetNodeCountList: nodeSetNodeCount, - } - fnm.sort() - return fnm -} - -// AddNode selects the nodeSet with the highest priority and increases by one the value of its NodeCount field. -// Priority is defined as the nodeSet with the lowest NodeCount value, or the first nodeSet in the alphabetical order if -// several node sets have the same NodeCount value. -func (fnm *FairNodesManager) AddNode() { - // Peak the first element, this is the one with fewer nodes - fnm.nodeSetNodeCountList[0].NodeCount++ - // Ensure the set is sorted - fnm.sort() -} diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets_test.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets_test.go index cfe57c99ea..b4cc31d823 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets_test.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/nodesets_test.go @@ -11,57 +11,53 @@ import ( "github.com/stretchr/testify/assert" ) -func TestFairNodesManager_AddNode(t *testing.T) { - type fields struct { - nodeSetNodeCountList []resources.NodeSetNodeCount +func Test_distributeFairly(t *testing.T) { + type args struct { + nodeSets resources.NodeSetNodeCountList + expectedNodeCount int32 } tests := []struct { - name string - fields fields - assertFunc func(t *testing.T, fnm FairNodesManager) + name string + args args + expectedNodeSets resources.NodeSetNodeCountList }{ { - name: "One nodeSet", - fields: fields{ - nodeSetNodeCountList: []resources.NodeSetNodeCount{{Name: "nodeset-1"}}, + name: "nodeSet is nil, no panic", + args: args{ + nodeSets: nil, + expectedNodeCount: 2, }, - assertFunc: func(t *testing.T, fnm FairNodesManager) { - assert.Equal(t, 1, len(fnm.nodeSetNodeCountList)) - assert.Equal(t, int32(0), fnm.nodeSetNodeCountList[0].NodeCount) - fnm.AddNode() - assert.Equal(t, int32(1), fnm.nodeSetNodeCountList[0].NodeCount) - fnm.AddNode() - assert.Equal(t, int32(2), fnm.nodeSetNodeCountList[0].NodeCount) + expectedNodeSets: nil, + }, + { + name: "nodeSet is empty, no panic", + args: args{ + nodeSets: []resources.NodeSetNodeCount{}, + expectedNodeCount: 2, }, + expectedNodeSets: []resources.NodeSetNodeCount{}, }, { - name: "Several NodeSets", - fields: fields{ - nodeSetNodeCountList: []resources.NodeSetNodeCount{{Name: "nodeset-1"}, {Name: "nodeset-2"}}, + name: "One nodeSet", + args: args{ + nodeSets: []resources.NodeSetNodeCount{{Name: "nodeset-1"}}, + expectedNodeCount: 2, }, - assertFunc: func(t *testing.T, fnm FairNodesManager) { - assert.Equal(t, 2, len(fnm.nodeSetNodeCountList)) - assert.Equal(t, int32(0), fnm.nodeSetNodeCountList.ByNodeSet()["nodeset-1"]) - assert.Equal(t, int32(0), fnm.nodeSetNodeCountList.ByNodeSet()["nodeset-2"]) - - fnm.AddNode() - assert.Equal(t, int32(1), fnm.nodeSetNodeCountList.ByNodeSet()["nodeset-1"]) - assert.Equal(t, int32(0), fnm.nodeSetNodeCountList.ByNodeSet()["nodeset-2"]) - - fnm.AddNode() - assert.Equal(t, int32(1), fnm.nodeSetNodeCountList.ByNodeSet()["nodeset-1"]) - assert.Equal(t, int32(1), fnm.nodeSetNodeCountList.ByNodeSet()["nodeset-2"]) - - fnm.AddNode() - assert.Equal(t, int32(2), fnm.nodeSetNodeCountList.ByNodeSet()["nodeset-1"]) - assert.Equal(t, int32(1), fnm.nodeSetNodeCountList.ByNodeSet()["nodeset-2"]) + expectedNodeSets: []resources.NodeSetNodeCount{{Name: "nodeset-1", NodeCount: 2}}, + }, + { + name: "Two nodeSet", + args: args{ + nodeSets: []resources.NodeSetNodeCount{{Name: "nodeset-1"}, {Name: "nodeset-2"}}, + expectedNodeCount: 3, }, + expectedNodeSets: []resources.NodeSetNodeCount{{Name: "nodeset-1", NodeCount: 2}, {Name: "nodeset-2", NodeCount: 1}}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - fnm := NewFairNodesManager(logTest, tt.fields.nodeSetNodeCountList) - tt.assertFunc(t, fnm) + distributeFairly(tt.args.nodeSets, tt.args.expectedNodeCount) + assert.ElementsMatch(t, tt.args.nodeSets, tt.expectedNodeSets) }) } } diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go index 90e4765283..1c5cb92448 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go @@ -48,11 +48,7 @@ func GetOfflineNodeSetsResources( // User may have added or removed some NodeSets while the autoscaling API is not available. // We distribute the nodes to reflect that change. - fnm := NewFairNodesManager(log, nodeSetsResources.NodeSetNodeCount) - for expectedNodeCount > 0 { - fnm.AddNode() - expectedNodeCount-- - } + distributeFairly(nodeSetsResources.NodeSetNodeCount, expectedNodeCount) log.Info( "Offline autoscaling", From 567c49dd102a4c394b9db5f7bc842010a86b24dd Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Fri, 5 Feb 2021 15:25:40 +0100 Subject: [PATCH 08/19] Update nodeSetResourcesFromStatus --- .../elasticsearch/autoscaler/autoscaler.go | 8 ++--- .../elasticsearch/autoscaler/offline.go | 32 +++++++++++++------ .../elasticsearch/autoscaler/offline_test.go | 16 ++++++++++ .../elasticsearch/autoscaler/vertical.go | 6 ++-- .../autoscaling/elasticsearch/controller.go | 6 ++-- .../elasticsearch/controller_test.go | 10 +++--- .../autoscaling/elasticsearch/driver.go | 2 +- .../elasticsearch/resources/resources_test.go | 2 +- 8 files changed, 56 insertions(+), 26 deletions(-) diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler.go index d356831d60..5ad330d94d 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler.go @@ -38,18 +38,18 @@ func (ctx *Context) scaleVertically() resources.NodeResources { // All resources can be computed "from scratch", without knowing the previous values. // This is however not true for storage. Storage can't be scaled down, current storage capacity must be considered // as a hard min. limit. This storage limit must be taken into consideration when computing the desired resources. - currentStorage := getStorage(ctx.AutoscalingSpec, ctx.CurrentAutoscalingStatus) + minStorage := getMinStorageQuantity(ctx.AutoscalingSpec, ctx.CurrentAutoscalingStatus) return ctx.nodeResources( int64(ctx.AutoscalingSpec.NodeCount.Min), - currentStorage, + minStorage, ) } -// getStorage returns the min. storage capacity that should be used by the autoscaling algorithm. +// getMinStorageQuantity returns the min. storage quantity that should be used by the autoscaling algorithm. // The value is the max. value of either: // * the current value in the status // * the min. value set by the user in the autoscaling spec. -func getStorage(autoscalingSpec esv1.AutoscalingPolicySpec, currentAutoscalingStatus status.Status) resource.Quantity { +func getMinStorageQuantity(autoscalingSpec esv1.AutoscalingPolicySpec, currentAutoscalingStatus status.Status) resource.Quantity { // If no storage spec is defined in the autoscaling status we return the default volume size. storage := volume.DefaultPersistentVolumeSize.DeepCopy() // Always adjust to the min value specified by the user in the limits. diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go index 1c5cb92448..ac48a87dbc 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/offline.go @@ -14,7 +14,7 @@ import ( ) // GetOfflineNodeSetsResources attempts to create or restore resources.NodeSetsResources without an actual autoscaling -// decision from Elasticsearch. It ensures that even if no decision has been returned by the autoscaling API then +// response from Elasticsearch. It ensures that even if no response has been returned by the autoscaling API then // the NodeSets still respect the min. and max. resource requirements specified by the user. // If resources are within the min. and max. boundaries then they are left untouched. func GetOfflineNodeSetsResources( @@ -33,7 +33,7 @@ func GetOfflineNodeSetsResources( nodeSetsResources = newMinNodeSetResources(autoscalingSpec, nodeSets) } else { // The status contains some resource values for the NodeSets managed by this autoscaling policy, let's reuse them. - nodeSetsResources = nodeSetResourcesFromStatus(currentAutoscalingStatus, currentNodeSetsResources, autoscalingSpec, nodeSets) + nodeSetsResources = nodeSetResourcesFromStatus(currentNodeSetsResources, autoscalingSpec, nodeSets) for _, nodeSet := range currentNodeSetsResources.NodeSetNodeCount { expectedNodeCount += nodeSet.NodeCount } @@ -62,40 +62,54 @@ func GetOfflineNodeSetsResources( } // nodeSetResourcesFromStatus restores NodeSetResources from the status. -// If user removed the limits while offline we are assuming that it wants to take back control on the resources. +// Resources are adjusted to respect min and max in the autoscaling specification, except for storage. +// If a resource is expected but not present in the status then the min. value in the autoscaling specification is used. +// If user removed the limits while offline we are assuming that they want to take back control of the resources. func nodeSetResourcesFromStatus( - currentAutoscalingStatus status.Status, currentNodeSetsResources resources.NodeSetsResources, autoscalingSpec esv1.AutoscalingPolicySpec, nodeSets []string, ) resources.NodeSetsResources { nodeSetsResources := resources.NewNodeSetsResources(autoscalingSpec.Name, nodeSets) - // Ensure memory settings are in the allowed limit range. if autoscalingSpec.IsMemoryDefined() { + // Attempt to get memory value from the status. if currentNodeSetsResources.HasRequest(corev1.ResourceMemory) { nodeSetsResources.SetRequest( corev1.ResourceMemory, adjustQuantity(currentNodeSetsResources.GetRequest(corev1.ResourceMemory), autoscalingSpec.Memory.Min, autoscalingSpec.Memory.Max), ) } else { + // Can't restore memory from status, use the min. from the autoscaling specification. nodeSetsResources.SetRequest(corev1.ResourceMemory, autoscalingSpec.Memory.Min.DeepCopy()) } } - // Ensure CPU settings are in the allowed limit range. if autoscalingSpec.IsCPUDefined() { + // Attempt to get CPU value from the status. if currentNodeSetsResources.HasRequest(corev1.ResourceCPU) { nodeSetsResources.SetRequest( corev1.ResourceCPU, adjustQuantity(currentNodeSetsResources.GetRequest(corev1.ResourceCPU), autoscalingSpec.CPU.Min, autoscalingSpec.CPU.Max), ) } else { + // Can't restore CPU from status, use the min. from the autoscaling specification. nodeSetsResources.SetRequest(corev1.ResourceCPU, autoscalingSpec.CPU.Min.DeepCopy()) } } - // Ensure storage capacity is set - nodeSetsResources.SetRequest(corev1.ResourceStorage, getStorage(autoscalingSpec, currentAutoscalingStatus)) + if autoscalingSpec.IsStorageDefined() { + storage := autoscalingSpec.Storage.Min + // Attempt to get storage value from the status. + if currentNodeSetsResources.HasRequest(corev1.ResourceStorage) { + storageInStatus := currentNodeSetsResources.GetRequest(corev1.ResourceStorage) + // Only use storage in status if it is greater than min value to respect user specification. + // We do not adjust storage re. to the max value specified by the user since we don't want to decrease the storage capacity. + if storageInStatus.Cmp(storage) > 0 { + storage = storageInStatus + } + } + nodeSetsResources.SetRequest(corev1.ResourceStorage, storage) + } return nodeSetsResources } @@ -114,7 +128,7 @@ func newMinNodeSetResources(autoscalingSpec esv1.AutoscalingPolicySpec, nodeSets return nodeSetsResources } -// adjustQuantity ensures that a quantity is comprised between a min and a max. +// adjustQuantity ensures that the Quantity in value is between min and max. func adjustQuantity(value, min, max resource.Quantity) resource.Quantity { if value.Cmp(min) < 0 { return min diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/offline_test.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/offline_test.go index 5ce7069536..d6006bc61d 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/offline_test.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/offline_test.go @@ -45,6 +45,22 @@ func TestGetOfflineNodeSetsResources(t *testing.T) { NodeResources: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("3Gi"), corev1.ResourceStorage: q("35Gi")}}, }, }, + { + name: "Max. value has been decreased by the user, scale down memory", + args: args{ + nodeSets: []string{"region-a", "region-b"}, + autoscalingSpec: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithNodeCounts(1, 6).WithMemory("2Gi", "8Gi").WithStorage("10Gi", "20Gi").Build(), + currentAutoscalingStatus: status.Status{AutoscalingPolicyStatuses: []status.AutoscalingPolicyStatus{{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "region-a", NodeCount: 3}, {Name: "region-b", NodeCount: 3}}, + ResourcesSpecification: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("10Gi"), corev1.ResourceStorage: q("20Gi")}}}}}, + }, + want: resources.NodeSetsResources{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "region-a", NodeCount: 3}, {Name: "region-b", NodeCount: 3}}, + NodeResources: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("8Gi"), corev1.ResourceStorage: q("20Gi")}}, + }, + }, { name: "Min. value has been increased by user", args: args{ diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go index a3a787e26c..c6c4d7a585 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go @@ -17,7 +17,7 @@ import ( const gibi = int64(1024 * 1024 * 1024) // nodeResources computes the desired amount of memory and storage for a node managed by a given AutoscalingPolicySpec. -func (ctx *Context) nodeResources(minNodesCount int64, currentStorage resource.Quantity) resources.NodeResources { +func (ctx *Context) nodeResources(minNodesCount int64, minStorage resource.Quantity) resources.NodeResources { nodeResources := resources.NodeResources{} // Compute desired memory quantity for the nodes managed by this AutoscalingPolicySpec. @@ -43,9 +43,9 @@ func (ctx *Context) nodeResources(minNodesCount int64, currentStorage resource.Q ctx.AutoscalingSpec.Storage.Min, ctx.AutoscalingSpec.Storage.Max, ) - if storageRequest.Cmp(currentStorage) < 0 { + if storageRequest.Cmp(minStorage) < 0 { // Do not decrease storage capacity - storageRequest = currentStorage + storageRequest = minStorage } nodeResources.SetRequest(corev1.ResourceStorage, storageRequest) } diff --git a/pkg/controller/autoscaling/elasticsearch/controller.go b/pkg/controller/autoscaling/elasticsearch/controller.go index 48e3dfc2fc..6316ec77a1 100644 --- a/pkg/controller/autoscaling/elasticsearch/controller.go +++ b/pkg/controller/autoscaling/elasticsearch/controller.go @@ -43,7 +43,7 @@ const ( enterpriseFeaturesDisabledMsg = "Autoscaling is an enterprise feature. Enterprise features are disabled" ) -var defaultReconcile = reconcile.Result{ +var defaultRequeue = reconcile.Result{ Requeue: true, RequeueAfter: 60 * time.Second, } @@ -101,7 +101,7 @@ func (r *ReconcileElasticsearch) Reconcile(ctx context.Context, request reconcil log.Info(enterpriseFeaturesDisabledMsg) r.recorder.Eventf(&es, corev1.EventTypeWarning, license.EventInvalidLicense, enterpriseFeaturesDisabledMsg) // We still schedule a reconciliation in case a valid license is applied later - return defaultReconcile, nil + return defaultRequeue, nil } if common.IsUnmanaged(&es) { @@ -168,7 +168,7 @@ func (r *ReconcileElasticsearch) Reconcile(ctx context.Context, request reconcil return reconcile.Result{}, tracing.CaptureError(ctx, err) } results := &reconciler.Results{} - return results.WithResult(defaultReconcile).WithResult(current).Aggregate() + return results.WithResult(defaultRequeue).WithResult(current).Aggregate() } func newElasticsearchClient( diff --git a/pkg/controller/autoscaling/elasticsearch/controller_test.go b/pkg/controller/autoscaling/elasticsearch/controller_test.go index 4fd680656d..282745721c 100644 --- a/pkg/controller/autoscaling/elasticsearch/controller_test.go +++ b/pkg/controller/autoscaling/elasticsearch/controller_test.go @@ -96,7 +96,7 @@ func TestReconcile(t *testing.T) { esManifest: "ml", isOnline: true, }, - want: defaultReconcile, + want: defaultRequeue, wantErr: false, wantEvents: []string{}, }, @@ -128,7 +128,7 @@ func TestReconcile(t *testing.T) { esManifest: "empty-autoscaling-api-response", isOnline: true, }, - want: defaultReconcile, + want: defaultRequeue, }, { name: "Cluster has just been created, initialize resources", @@ -142,7 +142,7 @@ func TestReconcile(t *testing.T) { esManifest: "cluster-creation", isOnline: false, }, - want: defaultReconcile, + want: defaultRequeue, }, { name: "Cluster is online, data tier has reached max. capacity", @@ -156,7 +156,7 @@ func TestReconcile(t *testing.T) { esManifest: "max-storage-reached", isOnline: true, }, - want: defaultReconcile, + want: defaultRequeue, wantEvents: []string{"Warning HorizontalScalingLimitReached Can't provide total required storage 37106614256, max number of nodes is 8, requires 9 nodes"}, }, { @@ -171,7 +171,7 @@ func TestReconcile(t *testing.T) { esManifest: "storage-scaled-horizontally", isOnline: true, }, - want: defaultReconcile, + want: defaultRequeue, }, { name: "Cluster does not exit", diff --git a/pkg/controller/autoscaling/elasticsearch/driver.go b/pkg/controller/autoscaling/elasticsearch/driver.go index f38d95b8db..aad3b10741 100644 --- a/pkg/controller/autoscaling/elasticsearch/driver.go +++ b/pkg/controller/autoscaling/elasticsearch/driver.go @@ -248,5 +248,5 @@ func (r *ReconcileElasticsearch) doOfflineReconciliation( } return results.WithError(err).Aggregate() } - return results.WithResult(defaultReconcile).Aggregate() + return results.WithResult(defaultRequeue).Aggregate() } diff --git a/pkg/controller/autoscaling/elasticsearch/resources/resources_test.go b/pkg/controller/autoscaling/elasticsearch/resources/resources_test.go index 5369b741b0..b755a33155 100644 --- a/pkg/controller/autoscaling/elasticsearch/resources/resources_test.go +++ b/pkg/controller/autoscaling/elasticsearch/resources/resources_test.go @@ -113,7 +113,7 @@ func TestResourcesSpecification_MaxMerge(t *testing.T) { } } -func TestNodeSetsResources_IsUsedBy(t *testing.T) { +func TestNodeSetsResources_Match(t *testing.T) { type fields struct { Name string NodeSetNodeCount NodeSetNodeCountList From 4519412eed06fba63ef574aa4601e612b3504d2f Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Mon, 8 Feb 2021 09:07:31 +0100 Subject: [PATCH 09/19] Update comments --- .../autoscaling/elasticsearch/autoscaler/horizontal.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go index 449f653e08..afabd29ede 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go @@ -17,6 +17,10 @@ func (ctx *Context) scaleHorizontally( nodeCapacity resources.NodeResources, // resources for each node in the tier/policy, as computed by the vertical autoscaler. ) resources.NodeSetsResources { totalRequiredCapacity := ctx.RequiredCapacity.Total // total required resources, at the tier level. + + // The vertical autoscaler computed the expected capacity for each node in the autoscaling policy. The minimum number of nodes, specified by the user + // in AutoscalingSpec.NodeCount.Min, can then be used to know what amount of resources we already have (AutoscalingSpec.NodeCount.Min * nodeCapacity). + // nodeToAdd is the number of nodes to be added to that min. amount of resources to match the required capacity. var nodeToAdd int32 // Scale horizontally to match memory requirements From b71ce52082963f67d822ccae631bce00711c1d65 Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Mon, 8 Feb 2021 10:06:28 +0100 Subject: [PATCH 10/19] Preserve existing volume claim template --- .../autoscaling/elasticsearch/reconcile.go | 27 +++++++++---------- .../elasticsearch-expected.yml | 1 + .../elasticsearch.yml | 1 + .../validation/autoscaling_validation.go | 12 ++++----- 4 files changed, 21 insertions(+), 20 deletions(-) diff --git a/pkg/controller/autoscaling/elasticsearch/reconcile.go b/pkg/controller/autoscaling/elasticsearch/reconcile.go index 849c4c4214..d563a1d7a6 100644 --- a/pkg/controller/autoscaling/elasticsearch/reconcile.go +++ b/pkg/controller/autoscaling/elasticsearch/reconcile.go @@ -94,30 +94,29 @@ func reconcileElasticsearch( } func newVolumeClaimTemplate(storageQuantity resource.Quantity, nodeSet esv1.NodeSet) ([]corev1.PersistentVolumeClaim, error) { - onlyOneVolumeClaimTemplate, volumeClaimTemplateName := validation.HasAtMostOnePersistentVolumeClaim(nodeSet) + onlyOneVolumeClaimTemplate, volumeClaimTemplate := validation.HasAtMostOnePersistentVolumeClaim(nodeSet) if !onlyOneVolumeClaimTemplate { return nil, fmt.Errorf(validation.UnexpectedVolumeClaimError) } - if volumeClaimTemplateName == "" { - volumeClaimTemplateName = volume.ElasticsearchDataVolumeName - } - return []corev1.PersistentVolumeClaim{ - { + if volumeClaimTemplate == nil { + // Init a new volume claim template + volumeClaimTemplate = &corev1.PersistentVolumeClaim{ ObjectMeta: metav1.ObjectMeta{ - Name: volumeClaimTemplateName, + Name: volume.ElasticsearchDataVolumeName, }, Spec: corev1.PersistentVolumeClaimSpec{ AccessModes: []corev1.PersistentVolumeAccessMode{ corev1.ReadWriteOnce, }, - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceStorage: storageQuantity, - }, - }, }, - }, - }, nil + } + } + // Adjust the size + if volumeClaimTemplate.Spec.Resources.Requests == nil { + volumeClaimTemplate.Spec.Resources.Requests = make(corev1.ResourceList) + } + volumeClaimTemplate.Spec.Resources.Requests[corev1.ResourceStorage] = storageQuantity + return []corev1.PersistentVolumeClaim{*volumeClaimTemplate}, nil } func (r *ReconcileElasticsearch) fetchElasticsearch( diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch-expected.yml b/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch-expected.yml index 044601b2f8..0093d3e978 100644 --- a/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch-expected.yml +++ b/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch-expected.yml @@ -37,6 +37,7 @@ spec: - metadata: name: elasticsearch-data spec: + storageClassName: fast accessModes: - ReadWriteOnce resources: diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch.yml b/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch.yml index 51336f0035..5aa3025222 100644 --- a/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch.yml +++ b/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch.yml @@ -37,6 +37,7 @@ spec: - metadata: name: elasticsearch-data spec: + storageClassName: fast accessModes: - ReadWriteOnce resources: diff --git a/pkg/controller/elasticsearch/validation/autoscaling_validation.go b/pkg/controller/elasticsearch/validation/autoscaling_validation.go index c6fa77739f..a1ee059589 100644 --- a/pkg/controller/elasticsearch/validation/autoscaling_validation.go +++ b/pkg/controller/elasticsearch/validation/autoscaling_validation.go @@ -13,6 +13,7 @@ import ( "github.com/elastic/cloud-on-k8s/pkg/controller/common/version" "github.com/elastic/cloud-on-k8s/pkg/utils/set" "github.com/elastic/cloud-on-k8s/pkg/utils/stringsutil" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/util/validation/field" ) @@ -276,14 +277,13 @@ func containsStringSlice(slices [][]string, slice []string) bool { } // HasAtMostOnePersistentVolumeClaim returns true if the NodeSet has only one volume claim template. It also returns -// the name of the volume claim template in that case. -func HasAtMostOnePersistentVolumeClaim(nodeSet esv1.NodeSet) (bool, string) { - //volumeClaimTemplates := len(nodeSet.VolumeClaimTemplates) +// a copy of the volume claim template in that case. +func HasAtMostOnePersistentVolumeClaim(nodeSet esv1.NodeSet) (bool, *corev1.PersistentVolumeClaim) { switch len(nodeSet.VolumeClaimTemplates) { case 0: - return true, "" + return true, nil case 1: - return true, nodeSet.VolumeClaimTemplates[0].Name + return true, nodeSet.VolumeClaimTemplates[0].DeepCopy() } - return false, "" + return false, nil } From 2dbd006ad7e90c46bfa293f87825f675c8cca243 Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Tue, 9 Feb 2021 08:17:06 +0100 Subject: [PATCH 11/19] Rename PolicyStateType to AutoscalingEventType --- .../elasticsearch/status/status.go | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pkg/controller/autoscaling/elasticsearch/status/status.go b/pkg/controller/autoscaling/elasticsearch/status/status.go index d0e6c6ae5e..aa21c00baa 100644 --- a/pkg/controller/autoscaling/elasticsearch/status/status.go +++ b/pkg/controller/autoscaling/elasticsearch/status/status.go @@ -15,12 +15,12 @@ import ( const ( ElasticsearchAutoscalingStatusAnnotationName = "elasticsearch.alpha.elastic.co/autoscaling-status" - VerticalScalingLimitReached PolicyStateType = "VerticalScalingLimitReached" - HorizontalScalingLimitReached PolicyStateType = "HorizontalScalingLimitReached" - MemoryRequired PolicyStateType = "MemoryRequired" - EmptyResponse PolicyStateType = "EmptyResponse" - StorageRequired PolicyStateType = "StorageRequired" - NoNodeSet PolicyStateType = "NoNodeSet" + VerticalScalingLimitReached AutoscalingEventType = "VerticalScalingLimitReached" + HorizontalScalingLimitReached AutoscalingEventType = "HorizontalScalingLimitReached" + MemoryRequired AutoscalingEventType = "MemoryRequired" + EmptyResponse AutoscalingEventType = "EmptyResponse" + StorageRequired AutoscalingEventType = "StorageRequired" + NoNodeSet AutoscalingEventType = "NoNodeSet" ) type Status struct { @@ -68,13 +68,13 @@ type AutoscalingPolicyStatusBuilder struct { policyName string nodeSetsResources resources.NodeSetsResources lastModificationTime metav1.Time - states map[PolicyStateType]PolicyState + states map[AutoscalingEventType]PolicyState } func NewAutoscalingPolicyStatusBuilder(name string) *AutoscalingPolicyStatusBuilder { return &AutoscalingPolicyStatusBuilder{ policyName: name, - states: make(map[PolicyStateType]PolicyState), + states: make(map[AutoscalingEventType]PolicyState), } } @@ -109,7 +109,7 @@ func (psb *AutoscalingPolicyStatusBuilder) SetLastModificationTime(lastModificat } // RecordEvent records a new event (type + message) for the tier. -func (psb *AutoscalingPolicyStatusBuilder) RecordEvent(stateType PolicyStateType, message string) *AutoscalingPolicyStatusBuilder { +func (psb *AutoscalingPolicyStatusBuilder) RecordEvent(stateType AutoscalingEventType, message string) *AutoscalingPolicyStatusBuilder { if policyState, ok := psb.states[stateType]; ok { policyState.Messages = append(policyState.Messages, message) psb.states[stateType] = policyState @@ -122,11 +122,11 @@ func (psb *AutoscalingPolicyStatusBuilder) RecordEvent(stateType PolicyStateType return psb } -type PolicyStateType string +type AutoscalingEventType string type PolicyState struct { - Type PolicyStateType `json:"type"` - Messages []string `json:"messages"` + Type AutoscalingEventType `json:"type"` + Messages []string `json:"messages"` } type AutoscalingStatusBuilder struct { From d192a8f9c2f8a2208bed28b0bf67290a99b04d63 Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Tue, 9 Feb 2021 08:19:18 +0100 Subject: [PATCH 12/19] Use defer tracing.Span(&ctx)() --- pkg/controller/autoscaling/elasticsearch/driver.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/controller/autoscaling/elasticsearch/driver.go b/pkg/controller/autoscaling/elasticsearch/driver.go index aad3b10741..73c5e10b7b 100644 --- a/pkg/controller/autoscaling/elasticsearch/driver.go +++ b/pkg/controller/autoscaling/elasticsearch/driver.go @@ -60,8 +60,7 @@ func (r *ReconcileElasticsearch) reconcileInternal( // Check if the Service is available. func (r *ReconcileElasticsearch) isElasticsearchReachable(ctx context.Context, es esv1.Elasticsearch) (bool, error) { - span, _ := apm.StartSpan(ctx, "is_es_reachable", tracing.SpanTypeApp) - defer span.End() + defer tracing.Span(&ctx)() externalService, err := services.GetExternalService(r.Client, es) if apierrors.IsNotFound(err) { return false, nil From 8bd278ea2b0e8d57c147a6dc20fb927c16bdeaa3 Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Tue, 9 Feb 2021 08:28:06 +0100 Subject: [PATCH 13/19] Minor refactoring and comments update --- .../autoscaler/autoscaler_test.go | 2 +- .../autoscaling/elasticsearch/controller.go | 3 ++- .../autoscaling/elasticsearch/driver.go | 26 +++++++++---------- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler_test.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler_test.go index 9b9468ba3a..a1b8afad9d 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler_test.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler_test.go @@ -16,7 +16,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" ) -func Test_applyScaleDecision(t *testing.T) { +func Test_GetResources(t *testing.T) { defaultNodeSets := esv1.NodeSetList{{ Name: "default", }} diff --git a/pkg/controller/autoscaling/elasticsearch/controller.go b/pkg/controller/autoscaling/elasticsearch/controller.go index 6316ec77a1..920dda45be 100644 --- a/pkg/controller/autoscaling/elasticsearch/controller.go +++ b/pkg/controller/autoscaling/elasticsearch/controller.go @@ -48,7 +48,8 @@ var defaultRequeue = reconcile.Result{ RequeueAfter: 60 * time.Second, } -// ReconcileElasticsearch reconciles autoscaling policies and Elasticsearch resources specifications based on autoscaling decisions. +// ReconcileElasticsearch reconciles autoscaling policies and Elasticsearch resources specifications based on +// Elasticsearch autoscaling API response. type ReconcileElasticsearch struct { k8s.Client operator.Parameters diff --git a/pkg/controller/autoscaling/elasticsearch/driver.go b/pkg/controller/autoscaling/elasticsearch/driver.go index 73c5e10b7b..5af3ed4580 100644 --- a/pkg/controller/autoscaling/elasticsearch/driver.go +++ b/pkg/controller/autoscaling/elasticsearch/driver.go @@ -130,19 +130,11 @@ func (r *ReconcileElasticsearch) attemptOnlineReconciliation( continue } - // Get the decision from the Elasticsearch API + // Get the required capacity for this autoscaling policy from the Elasticsearch API var nodeSetsResources resources.NodeSetsResources - switch capacity, hasCapacity := requiredCapacity.Policies[autoscalingPolicy.Name]; hasCapacity && !capacity.RequiredCapacity.IsEmpty() { - case false: - // We didn't receive a decision for this tier, or the decision is empty. We can only ensure that resources are within the allowed ranges. - log.V(1).Info( - "No decision received from Elasticsearch, ensure resources limits are respected", - "policy", autoscalingPolicy.Name, - ) - statusBuilder.ForPolicy(autoscalingPolicy.Name).RecordEvent(status.EmptyResponse, "No required capacity from Elasticsearch") - nodeSetsResources = autoscaler.GetOfflineNodeSetsResources(log, nodeSetList.Names(), autoscalingPolicy, currentAutoscalingStatus) - case true: - // We received a capacity decision from Elasticsearch for this policy. + capacity, hasCapacity := requiredCapacity.Policies[autoscalingPolicy.Name] + if hasCapacity && !capacity.RequiredCapacity.IsEmpty() { + // We received a required capacity from Elasticsearch for this policy. log.Info( "Required capacity for policy", "policy", autoscalingPolicy.Name, @@ -163,6 +155,14 @@ func (r *ReconcileElasticsearch) attemptOnlineReconciliation( StatusBuilder: statusBuilder, } nodeSetsResources = ctx.GetResources() + } else { + // We didn't receive a required capacity for this tier, or the response is empty. We can only ensure that resources are within the allowed ranges. + log.V(1).Info( + "No required capacity received from Elasticsearch, ensure resources limits are respected", + "policy", autoscalingPolicy.Name, + ) + statusBuilder.ForPolicy(autoscalingPolicy.Name).RecordEvent(status.EmptyResponse, "No required capacity from Elasticsearch") + nodeSetsResources = autoscaler.GetOfflineNodeSetsResources(log, nodeSetList.Names(), autoscalingPolicy, currentAutoscalingStatus) } // Add the result to the list of the next resources nextClusterResources = append(nextClusterResources, nodeSetsResources) @@ -190,7 +190,7 @@ func (r *ReconcileElasticsearch) attemptOnlineReconciliation( return reconcile.Result{}, nil } -// canDecide ensures that the user has provided resource ranges to apply Elasticsearch autoscaling decision. +// canDecide ensures that the user has provided resource ranges to process the Elasticsearch API autoscaling response. // Expected ranges are not consistent across all deciders. For example ml may only require memory limits, while processing // data deciders response may require storage limits. // Only memory and storage are supported since CPU is not part of the autoscaling API specification. From a681a737e68af90928bf3353e697b4c77ef4c3bb Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Tue, 9 Feb 2021 08:36:53 +0100 Subject: [PATCH 14/19] nodeToAdd -> nodesToAdd --- .../autoscaling/elasticsearch/autoscaler/horizontal.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go index afabd29ede..fb55d23f97 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/horizontal.go @@ -20,22 +20,22 @@ func (ctx *Context) scaleHorizontally( // The vertical autoscaler computed the expected capacity for each node in the autoscaling policy. The minimum number of nodes, specified by the user // in AutoscalingSpec.NodeCount.Min, can then be used to know what amount of resources we already have (AutoscalingSpec.NodeCount.Min * nodeCapacity). - // nodeToAdd is the number of nodes to be added to that min. amount of resources to match the required capacity. - var nodeToAdd int32 + // nodesToAdd is the number of nodes to be added to that min. amount of resources to match the required capacity. + var nodesToAdd int32 // Scale horizontally to match memory requirements if !totalRequiredCapacity.Memory.IsZero() { nodeMemory := nodeCapacity.GetRequest(corev1.ResourceMemory) - nodeToAdd = ctx.getNodesToAdd(nodeMemory.Value(), totalRequiredCapacity.Memory.Value(), ctx.AutoscalingSpec.NodeCount.Min, ctx.AutoscalingSpec.NodeCount.Max, string(corev1.ResourceMemory)) + nodesToAdd = ctx.getNodesToAdd(nodeMemory.Value(), totalRequiredCapacity.Memory.Value(), ctx.AutoscalingSpec.NodeCount.Min, ctx.AutoscalingSpec.NodeCount.Max, string(corev1.ResourceMemory)) } // Scale horizontally to match storage requirements if !totalRequiredCapacity.Storage.IsZero() { nodeStorage := nodeCapacity.GetRequest(corev1.ResourceStorage) - nodeToAdd = max(nodeToAdd, ctx.getNodesToAdd(nodeStorage.Value(), totalRequiredCapacity.Storage.Value(), ctx.AutoscalingSpec.NodeCount.Min, ctx.AutoscalingSpec.NodeCount.Max, string(corev1.ResourceStorage))) + nodesToAdd = max(nodesToAdd, ctx.getNodesToAdd(nodeStorage.Value(), totalRequiredCapacity.Storage.Value(), ctx.AutoscalingSpec.NodeCount.Min, ctx.AutoscalingSpec.NodeCount.Max, string(corev1.ResourceStorage))) } - totalNodes := nodeToAdd + ctx.AutoscalingSpec.NodeCount.Min + totalNodes := nodesToAdd + ctx.AutoscalingSpec.NodeCount.Min ctx.Log.Info("Horizontal autoscaler", "policy", ctx.AutoscalingSpec.Name, "scope", "tier", "count", totalNodes, From e96a25e3eb591f5e4deee999c3f6b2dac89c70e3 Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Tue, 9 Feb 2021 09:49:37 +0100 Subject: [PATCH 15/19] Fix vertical limit message and add unit tests --- .../autoscaler/autoscaler_test.go | 79 ++++++++++++++++++- .../elasticsearch/autoscaler/vertical.go | 2 +- 2 files changed, 76 insertions(+), 5 deletions(-) diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler_test.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler_test.go index a1b8afad9d..624c8f1f7c 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler_test.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/autoscaler_test.go @@ -11,6 +11,7 @@ import ( "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/resources" "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/status" "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/client" + "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/resource" @@ -27,10 +28,11 @@ func Test_GetResources(t *testing.T) { policy esv1.AutoscalingPolicySpec } tests := []struct { - name string - args args - want resources.NodeSetsResources - wantErr bool + name string + args args + want resources.NodeSetsResources + wantPolicyState []status.PolicyState + wantErr bool }{ { name: "Scale both vertically and horizontally to fulfil storage capacity request", @@ -139,6 +141,64 @@ func Test_GetResources(t *testing.T) { NodeResources: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("8G")}}, }, }, + { + name: "Do not exceed node count specified by the user", + args: args{ + currentNodeSets: defaultNodeSets, + nodeSetsStatus: status.Status{AutoscalingPolicyStatuses: []status.AutoscalingPolicyStatus{{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "default", NodeCount: 3}}, + ResourcesSpecification: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("4G"), corev1.ResourceStorage: q("1Gi")}}}}, + }, + requiredCapacity: newRequiredCapacityBuilder(). + nodeMemory("6G"). + tierMemory("48G"). // would require 6 nodes, user set a node count limit to 5 + build(), + policy: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithNodeCounts(3, 5).WithMemory("5G", "8G").Build(), + }, + want: resources.NodeSetsResources{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "default", NodeCount: 5}}, + NodeResources: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("8G")}}, + }, + wantPolicyState: []status.PolicyState{ + { + Type: "HorizontalScalingLimitReached", + Messages: []string{"Can't provide total required memory 48000000000, max number of nodes is 5, requires 6 nodes"}, + }, + }, + }, + { + name: "Do not exceed horizontal and vertical limits specified by the user", + args: args{ + currentNodeSets: defaultNodeSets, + nodeSetsStatus: status.Status{AutoscalingPolicyStatuses: []status.AutoscalingPolicyStatus{{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "default", NodeCount: 3}}, + ResourcesSpecification: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("4G"), corev1.ResourceStorage: q("1Gi")}}}}, + }, + requiredCapacity: newRequiredCapacityBuilder(). + nodeMemory("8G"). // user set a limit to 5G / node + tierMemory("48G"). // would require 10 + build(), + policy: NewAutoscalingSpecBuilder("my-autoscaling-policy").WithNodeCounts(3, 6).WithMemory("5G", "7G").Build(), + }, + want: resources.NodeSetsResources{ + Name: "my-autoscaling-policy", + NodeSetNodeCount: []resources.NodeSetNodeCount{{Name: "default", NodeCount: 6}}, + NodeResources: resources.NodeResources{Requests: map[corev1.ResourceName]resource.Quantity{corev1.ResourceMemory: q("7G")}}, + }, + wantPolicyState: []status.PolicyState{ + { + Type: "VerticalScalingLimitReached", + Messages: []string{"Node required memory 8000000000 is greater than max allowed: 7000000000"}, + }, + { + Type: "HorizontalScalingLimitReached", + Messages: []string{"Can't provide total required memory 48000000000, max number of nodes is 6, requires 7 nodes"}, + }, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -153,10 +213,21 @@ func Test_GetResources(t *testing.T) { if got := ctx.GetResources(); !equality.Semantic.DeepEqual(got, tt.want) { t.Errorf("autoscaler.GetResources() = %v, want %v", got, tt.want) } + gotStatus := ctx.StatusBuilder.Build() + assert.ElementsMatch(t, getPolicyStates(gotStatus, "my-autoscaling-policy"), tt.wantPolicyState) }) } } +func getPolicyStates(status status.Status, policyName string) []status.PolicyState { + for _, state := range status.AutoscalingPolicyStatuses { + if state.Name == policyName { + return state.PolicyStates + } + } + return nil +} + // - AutoscalingSpec builder type AutoscalingSpecBuilder struct { diff --git a/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go b/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go index c6c4d7a585..d3bd755b31 100644 --- a/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go +++ b/pkg/controller/autoscaling/elasticsearch/autoscaler/vertical.go @@ -98,7 +98,7 @@ func (ctx *Context) getResourceValue( ForPolicy(ctx.AutoscalingSpec.Name). RecordEvent( status.VerticalScalingLimitReached, - fmt.Sprintf("Node required %s %d is greater than max allowed: %d", resourceType, nodeRequired, max.Value()), + fmt.Sprintf("Node required %s %d is greater than max allowed: %d", resourceType, nodeRequired.Value(), max.Value()), ) } From a4baa6b393e0b78b7700066458511b57bdac89b0 Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Tue, 9 Feb 2021 12:11:43 +0100 Subject: [PATCH 16/19] Fix compiler error --- pkg/controller/autoscaling/elasticsearch/controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/autoscaling/elasticsearch/controller.go b/pkg/controller/autoscaling/elasticsearch/controller.go index 920dda45be..6b5a4d25be 100644 --- a/pkg/controller/autoscaling/elasticsearch/controller.go +++ b/pkg/controller/autoscaling/elasticsearch/controller.go @@ -222,7 +222,7 @@ func newElasticsearchClient( Name: user.ControllerUserName, Password: string(password), }, - *v, + v, caCerts, esclient.Timeout(es), ), nil From 0dd482d3ea76253565e8881d620ec3abee7a4d0f Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Thu, 11 Feb 2021 09:41:31 +0100 Subject: [PATCH 17/19] Use CheckCompatibility --- .../autoscaling/elasticsearch/controller.go | 4 +--- .../elasticsearch/controller_test.go | 21 +++++++++---------- .../elasticsearch-expected.yml | 1 + .../cluster-creation/elasticsearch.yml | 1 + .../elasticsearch-expected.yml | 1 + .../elasticsearch.yml | 1 + .../elasticsearch-expected.yml | 1 + .../max-storage-reached/elasticsearch.yml | 1 + .../elasticsearch-expected.yml | 1 + .../elasticsearch.yml | 1 + .../testdata/ml/elasticsearch-expected.yml | 1 + .../testdata/ml/elasticsearch.yml | 1 + .../elasticsearch-expected.yml | 1 + .../elasticsearch.yml | 1 + 14 files changed, 23 insertions(+), 14 deletions(-) diff --git a/pkg/controller/autoscaling/elasticsearch/controller.go b/pkg/controller/autoscaling/elasticsearch/controller.go index 6b5a4d25be..ab4f2ad722 100644 --- a/pkg/controller/autoscaling/elasticsearch/controller.go +++ b/pkg/controller/autoscaling/elasticsearch/controller.go @@ -21,7 +21,6 @@ import ( "github.com/elastic/cloud-on-k8s/pkg/controller/common/tracing" "github.com/elastic/cloud-on-k8s/pkg/controller/common/version" esclient "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/client" - "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/label" "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/services" "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/user" "github.com/elastic/cloud-on-k8s/pkg/controller/elasticsearch/validation" @@ -110,8 +109,7 @@ func (r *ReconcileElasticsearch) Reconcile(ctx context.Context, request reconcil return reconcile.Result{}, nil } - selector := map[string]string{label.ClusterNameLabelName: es.Name} - compat, err := annotation.ReconcileCompatibility(ctx, r.Client, &es, selector, r.OperatorInfo.BuildInfo.Version) + compat, err := annotation.CheckCompatibility(&es, r.OperatorInfo.BuildInfo.Version) if err != nil { k8s.EmitErrorEvent(r.recorder, err, &es, events.EventCompatCheckError, "Error during compatibility check: %v", err) return reconcile.Result{}, tracing.CaptureError(ctx, err) diff --git a/pkg/controller/autoscaling/elasticsearch/controller_test.go b/pkg/controller/autoscaling/elasticsearch/controller_test.go index 282745721c..8b52f1d691 100644 --- a/pkg/controller/autoscaling/elasticsearch/controller_test.go +++ b/pkg/controller/autoscaling/elasticsearch/controller_test.go @@ -13,6 +13,7 @@ import ( "reflect" "testing" + "github.com/elastic/cloud-on-k8s/pkg/about" esv1 "github.com/elastic/cloud-on-k8s/pkg/apis/elasticsearch/v1" "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/resources" "github.com/elastic/cloud-on-k8s/pkg/controller/autoscaling/elasticsearch/status" @@ -68,7 +69,6 @@ var ( func TestReconcile(t *testing.T) { type fields struct { EsClient *fakeEsClient - Parameters operator.Parameters recorder *record.FakeRecorder licenseChecker license.Checker } @@ -88,7 +88,6 @@ func TestReconcile(t *testing.T) { name: "ML case where tier total memory was lower than node memory", fields: fields{ EsClient: newFakeEsClient(t).withCapacity("ml"), - Parameters: operator.Parameters{}, recorder: record.NewFakeRecorder(1000), licenseChecker: &fakeLicenceChecker{}, }, @@ -104,7 +103,6 @@ func TestReconcile(t *testing.T) { name: "Simulate an error while updating the autoscaling policies, we still want to respect min nodes count set by user", fields: fields{ EsClient: newFakeEsClient(t).withErrorOnDeleteAutoscalingAutoscalingPolicies(), - Parameters: operator.Parameters{}, recorder: record.NewFakeRecorder(1000), licenseChecker: &fakeLicenceChecker{}, }, @@ -120,7 +118,6 @@ func TestReconcile(t *testing.T) { name: "Cluster is online, but answer from the API is empty, do not touch anything", fields: fields{ EsClient: newFakeEsClient(t).withCapacity("empty-autoscaling-api-response"), - Parameters: operator.Parameters{}, recorder: record.NewFakeRecorder(1000), licenseChecker: &fakeLicenceChecker{}, }, @@ -134,7 +131,6 @@ func TestReconcile(t *testing.T) { name: "Cluster has just been created, initialize resources", fields: fields{ EsClient: newFakeEsClient(t), - Parameters: operator.Parameters{}, recorder: record.NewFakeRecorder(1000), licenseChecker: &fakeLicenceChecker{}, }, @@ -148,7 +144,6 @@ func TestReconcile(t *testing.T) { name: "Cluster is online, data tier has reached max. capacity", fields: fields{ EsClient: newFakeEsClient(t).withCapacity("max-storage-reached"), - Parameters: operator.Parameters{}, recorder: record.NewFakeRecorder(1000), licenseChecker: &fakeLicenceChecker{}, }, @@ -163,7 +158,6 @@ func TestReconcile(t *testing.T) { name: "Cluster is online, data tier needs to be scaled up from 8 to 9 nodes", fields: fields{ EsClient: newFakeEsClient(t).withCapacity("storage-scaled-horizontally"), - Parameters: operator.Parameters{}, recorder: record.NewFakeRecorder(1000), licenseChecker: &fakeLicenceChecker{}, }, @@ -177,7 +171,6 @@ func TestReconcile(t *testing.T) { name: "Cluster does not exit", fields: fields{ EsClient: newFakeEsClient(t), - Parameters: operator.Parameters{}, recorder: record.NewFakeRecorder(1000), licenseChecker: &fakeLicenceChecker{}, }, @@ -213,9 +206,15 @@ func TestReconcile(t *testing.T) { r := &ReconcileElasticsearch{ Client: k8sClient, esClientProvider: tt.fields.EsClient.newFakeElasticsearchClient, - Parameters: tt.fields.Parameters, - recorder: tt.fields.recorder, - licenseChecker: tt.fields.licenseChecker, + Parameters: operator.Parameters{ + OperatorInfo: about.OperatorInfo{ + BuildInfo: about.BuildInfo{ + Version: "1.5.0", + }, + }, + }, + recorder: tt.fields.recorder, + licenseChecker: tt.fields.licenseChecker, } got, err := r.Reconcile( context.Background(), diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/cluster-creation/elasticsearch-expected.yml b/pkg/controller/autoscaling/elasticsearch/testdata/cluster-creation/elasticsearch-expected.yml index e580443ced..d16b392596 100644 --- a/pkg/controller/autoscaling/elasticsearch/testdata/cluster-creation/elasticsearch-expected.yml +++ b/pkg/controller/autoscaling/elasticsearch/testdata/cluster-creation/elasticsearch-expected.yml @@ -2,6 +2,7 @@ apiVersion: elasticsearch.k8s.elastic.co/v1 kind: Elasticsearch metadata: annotations: + common.k8s.elastic.co/controller-version: 1.4.0 elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "di", diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/cluster-creation/elasticsearch.yml b/pkg/controller/autoscaling/elasticsearch/testdata/cluster-creation/elasticsearch.yml index f09fda0ac6..f7731a7d0b 100644 --- a/pkg/controller/autoscaling/elasticsearch/testdata/cluster-creation/elasticsearch.yml +++ b/pkg/controller/autoscaling/elasticsearch/testdata/cluster-creation/elasticsearch.yml @@ -5,6 +5,7 @@ metadata: name: testes namespace: testns annotations: + common.k8s.elastic.co/controller-version: 1.4.0 elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "di", diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/elasticsearch-expected.yml b/pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/elasticsearch-expected.yml index ad10c8ae1f..bd32790022 100644 --- a/pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/elasticsearch-expected.yml +++ b/pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/elasticsearch-expected.yml @@ -2,6 +2,7 @@ apiVersion: elasticsearch.k8s.elastic.co/v1 kind: Elasticsearch metadata: annotations: + common.k8s.elastic.co/controller-version: 1.4.0 elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "di", "roles": ["data", "ingest"], "resources": { "nodeCount": { "min": 3, "max": 9 }, "cpu": { "min": 2, "max": 6 }, "memory": { "min": "2Gi", "max": "8Gi" }, "storage": { "min": "1Gi", "max": "4Gi" } } }, { "name": "ml", "roles": ["ml"], "deciders": { "ml": { "down_scale_delay": "5m" } }, "resources": { "nodeCount": { "min": 1, "max": 9 }, "cpu": { "min": 2, "max": 2 }, "memory": { "min": "2Gi", "max": "6Gi" }, "storage": { "min": "1Gi", "max": "2Gi" } } }] }' elasticsearch.alpha.elastic.co/autoscaling-status: '{"policies":[{"name":"di","nodeSets":[{"name":"di","nodeCount":8}],"resources":{"requests":{"cpu":"6","memory":"8Gi","storage":"4Gi"}},"state":[],"lastModificationTime":"2021-01-17T05:59:22Z"},{"name":"ml","nodeSets":[{"name":"ml","nodeCount":1}],"resources":{"requests":{"cpu":"2","memory":"2Gi"}},"state":[],"lastModificationTime":"2021-01-17T13:25:18Z"}]}' elasticsearch.k8s.elastic.co/cluster-uuid: FghvC9XFS16wDXdAusm9yg diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/elasticsearch.yml b/pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/elasticsearch.yml index 51336f0035..6045b4776a 100644 --- a/pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/elasticsearch.yml +++ b/pkg/controller/autoscaling/elasticsearch/testdata/empty-autoscaling-api-response/elasticsearch.yml @@ -2,6 +2,7 @@ apiVersion: elasticsearch.k8s.elastic.co/v1 kind: Elasticsearch metadata: annotations: + common.k8s.elastic.co/controller-version: 1.4.0 elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "di", "roles": ["data", "ingest"], "resources": { "nodeCount": { "min": 3, "max": 9 }, "cpu": { "min": 2, "max": 6 }, "memory": { "min": "2Gi", "max": "8Gi" }, "storage": { "min": "1Gi", "max": "4Gi" } } }, { "name": "ml", "roles": ["ml"], "deciders": { "ml": { "down_scale_delay": "5m" } }, "resources": { "nodeCount": { "min": 1, "max": 9 }, "cpu": { "min": 2, "max": 2 }, "memory": { "min": "2Gi", "max": "6Gi" }, "storage": { "min": "1Gi", "max": "2Gi" } } }] }' elasticsearch.alpha.elastic.co/autoscaling-status: '{"policies":[{"name":"di","nodeSets":[{"name":"di","nodeCount":8}],"resources":{"requests":{"cpu":"6","memory":"8Gi","storage":"4Gi"}},"state":[{"type":"HorizontalScalingLimitReached","messages":["Can''t provide total required storage 37106614256, max number of nodes is 8, requires 9 nodes"]}],"lastModificationTime":"2021-01-17T05:59:22Z"},{"name":"ml","nodeSets":[{"name":"ml","nodeCount":1}],"resources":{"requests":{"cpu":"2","memory":"2Gi"}},"state":[],"lastModificationTime":"2021-01-17T13:25:18Z"}]}' elasticsearch.k8s.elastic.co/cluster-uuid: FghvC9XFS16wDXdAusm9yg diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/elasticsearch-expected.yml b/pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/elasticsearch-expected.yml index d597fe9e2b..498d075454 100644 --- a/pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/elasticsearch-expected.yml +++ b/pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/elasticsearch-expected.yml @@ -2,6 +2,7 @@ apiVersion: elasticsearch.k8s.elastic.co/v1 kind: Elasticsearch metadata: annotations: + common.k8s.elastic.co/controller-version: 1.4.0 elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "di", diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/elasticsearch.yml b/pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/elasticsearch.yml index d597fe9e2b..498d075454 100644 --- a/pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/elasticsearch.yml +++ b/pkg/controller/autoscaling/elasticsearch/testdata/max-storage-reached/elasticsearch.yml @@ -2,6 +2,7 @@ apiVersion: elasticsearch.k8s.elastic.co/v1 kind: Elasticsearch metadata: annotations: + common.k8s.elastic.co/controller-version: 1.4.0 elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "di", diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/min-nodes-increased-by-user/elasticsearch-expected.yml b/pkg/controller/autoscaling/elasticsearch/testdata/min-nodes-increased-by-user/elasticsearch-expected.yml index 69421fd15a..b805c2c7b7 100644 --- a/pkg/controller/autoscaling/elasticsearch/testdata/min-nodes-increased-by-user/elasticsearch-expected.yml +++ b/pkg/controller/autoscaling/elasticsearch/testdata/min-nodes-increased-by-user/elasticsearch-expected.yml @@ -2,6 +2,7 @@ apiVersion: elasticsearch.k8s.elastic.co/v1 kind: Elasticsearch metadata: annotations: + common.k8s.elastic.co/controller-version: 1.4.0 elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "di", "roles": ["data", "ingest"], "resources": { "nodeCount": { "min": 9, "max": 9 }, "cpu": { "min": 2, "max": 6 }, "memory": { "min": "2Gi", "max": "8Gi" }, "storage": { "min": "1Gi", "max": "4Gi" } } }, { "name": "ml", "roles": ["ml"], "deciders": { "ml": { "down_scale_delay": "5m" } }, "resources": { "nodeCount": { "min": 3, "max": 9 }, "cpu": { "min": 2, "max": 2 }, "memory": { "min": "2Gi", "max": "6Gi" }, "storage": { "min": "1Gi", "max": "2Gi" } } }] }' elasticsearch.alpha.elastic.co/autoscaling-status: '{"policies":[{"name":"di","nodeSets":[{"name":"di","nodeCount":9}],"resources":{"requests":{"cpu":"6","memory":"8Gi","storage":"4Gi"}},"state":[],"lastModificationTime":"2021-01-17T05:59:22Z"},{"name":"ml","nodeSets":[{"name":"ml","nodeCount":3}],"resources":{"requests":{"cpu":"2","memory":"2Gi"}},"state":[],"lastModificationTime":"2021-01-17T13:25:18Z"}]}' elasticsearch.k8s.elastic.co/cluster-uuid: FghvC9XFS16wDXdAusm9yg diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/min-nodes-increased-by-user/elasticsearch.yml b/pkg/controller/autoscaling/elasticsearch/testdata/min-nodes-increased-by-user/elasticsearch.yml index 6875d0a2f7..096a9607be 100644 --- a/pkg/controller/autoscaling/elasticsearch/testdata/min-nodes-increased-by-user/elasticsearch.yml +++ b/pkg/controller/autoscaling/elasticsearch/testdata/min-nodes-increased-by-user/elasticsearch.yml @@ -5,6 +5,7 @@ apiVersion: elasticsearch.k8s.elastic.co/v1 kind: Elasticsearch metadata: annotations: + common.k8s.elastic.co/controller-version: 1.4.0 elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "di", "roles": ["data", "ingest"], "resources": { "nodeCount": { "min": 9, "max": 9 }, "cpu": { "min": 2, "max": 6 }, "memory": { "min": "2Gi", "max": "8Gi" }, "storage": { "min": "1Gi", "max": "4Gi" } } }, { "name": "ml", "roles": ["ml"], "deciders": { "ml": { "down_scale_delay": "5m" } }, "resources": { "nodeCount": { "min": 3, "max": 9 }, "cpu": { "min": 2, "max": 2 }, "memory": { "min": "2Gi", "max": "6Gi" }, "storage": { "min": "1Gi", "max": "2Gi" } } }] }' elasticsearch.alpha.elastic.co/autoscaling-status: '{"policies":[{"name":"di","nodeSets":[{"name":"di","nodeCount":8}],"resources":{"requests":{"cpu":"6","memory":"8Gi","storage":"4Gi"}},"state":[{"type":"HorizontalScalingLimitReached","messages":["Can''t provide total required storage 37106614256, max number of nodes is 8, requires 9 nodes"]}],"lastModificationTime":"2021-01-17T05:59:22Z"},{"name":"ml","nodeSets":[{"name":"ml","nodeCount":1}],"resources":{"requests":{"cpu":"2","memory":"2Gi"}},"state":[],"lastModificationTime":"2021-01-17T13:25:18Z"}]}' elasticsearch.k8s.elastic.co/cluster-uuid: FghvC9XFS16wDXdAusm9yg diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/ml/elasticsearch-expected.yml b/pkg/controller/autoscaling/elasticsearch/testdata/ml/elasticsearch-expected.yml index fc7e3f97b8..cef378f0a3 100644 --- a/pkg/controller/autoscaling/elasticsearch/testdata/ml/elasticsearch-expected.yml +++ b/pkg/controller/autoscaling/elasticsearch/testdata/ml/elasticsearch-expected.yml @@ -2,6 +2,7 @@ apiVersion: elasticsearch.k8s.elastic.co/v1 kind: Elasticsearch metadata: annotations: + common.k8s.elastic.co/controller-version: 1.4.0 elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "ml_only", "roles": ["ml"], "deciders": { "ml": {} }, "resources": { "nodeCount": { "min": 1, "max": 9 }, "cpu": { "min": 1, "max": 3 }, "memory": { "min": "2Gi", "max": "7Gi" }, "storage": { "min": "5Gi", "max": "20Gi" } } }] }' elasticsearch.alpha.elastic.co/autoscaling-status: '{"policies":[{"name":"ml_only","nodeSets":[{"name":"ml","nodeCount":1}],"resources":{"requests":{"cpu":"2","memory":"4Gi"}},"state":[],"lastModificationTime":"2021-01-19T14:20:58Z"}]}' name: testes diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/ml/elasticsearch.yml b/pkg/controller/autoscaling/elasticsearch/testdata/ml/elasticsearch.yml index 9acd824901..cbf8f03dbd 100644 --- a/pkg/controller/autoscaling/elasticsearch/testdata/ml/elasticsearch.yml +++ b/pkg/controller/autoscaling/elasticsearch/testdata/ml/elasticsearch.yml @@ -2,6 +2,7 @@ apiVersion: elasticsearch.k8s.elastic.co/v1 kind: Elasticsearch metadata: annotations: + common.k8s.elastic.co/controller-version: 1.4.0 elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "ml_only", "roles": ["ml"], "deciders": { "ml": {} }, "resources": { "nodeCount": { "min": 1, "max": 9 }, "cpu": { "min": 1, "max": 3 }, "memory": { "min": "2Gi", "max": "7Gi" }, "storage": { "min": "5Gi", "max": "20Gi" } } }] }' elasticsearch.alpha.elastic.co/autoscaling-status: '{"policies":[{"name":"ml_only","nodeSets":[{"name":"ml","nodeCount":1}],"resources":{"requests":{"cpu":"2","memory":"3520439718"}},"state":[],"lastModificationTime":"2021-01-19T14:20:58Z"}]}' name: testes diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch-expected.yml b/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch-expected.yml index 0093d3e978..2a12bc709d 100644 --- a/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch-expected.yml +++ b/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch-expected.yml @@ -2,6 +2,7 @@ apiVersion: elasticsearch.k8s.elastic.co/v1 kind: Elasticsearch metadata: annotations: + common.k8s.elastic.co/controller-version: 1.4.0 elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "di", "roles": ["data", "ingest"], "resources": { "nodeCount": { "min": 3, "max": 9 }, "cpu": { "min": 2, "max": 6 }, "memory": { "min": "2Gi", "max": "8Gi" }, "storage": { "min": "1Gi", "max": "4Gi" } } }, { "name": "ml", "roles": ["ml"], "deciders": { "ml": { "down_scale_delay": "5m" } }, "resources": { "nodeCount": { "min": 1, "max": 9 }, "cpu": { "min": 2, "max": 2 }, "memory": { "min": "2Gi", "max": "6Gi" }, "storage": { "min": "1Gi", "max": "2Gi" } } }] }' elasticsearch.alpha.elastic.co/autoscaling-status: '{"policies":[{"name":"di","nodeSets":[{"name":"di","nodeCount":9}],"resources":{"requests":{"cpu":"6","memory":"8Gi","storage":"4Gi"}},"state":[],"lastModificationTime":"2021-01-17T05:59:22Z"},{"name":"ml","nodeSets":[{"name":"ml","nodeCount":1}],"resources":{"requests":{"cpu":"2","memory":"2Gi"}},"state":[],"lastModificationTime":"2021-01-17T13:25:18Z"}]}' elasticsearch.k8s.elastic.co/cluster-uuid: FghvC9XFS16wDXdAusm9yg diff --git a/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch.yml b/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch.yml index 5aa3025222..e414fc08ff 100644 --- a/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch.yml +++ b/pkg/controller/autoscaling/elasticsearch/testdata/storage-scaled-horizontally/elasticsearch.yml @@ -2,6 +2,7 @@ apiVersion: elasticsearch.k8s.elastic.co/v1 kind: Elasticsearch metadata: annotations: + common.k8s.elastic.co/controller-version: 1.4.0 elasticsearch.alpha.elastic.co/autoscaling-spec: '{ "policies": [{ "name": "di", "roles": ["data", "ingest"], "resources": { "nodeCount": { "min": 3, "max": 9 }, "cpu": { "min": 2, "max": 6 }, "memory": { "min": "2Gi", "max": "8Gi" }, "storage": { "min": "1Gi", "max": "4Gi" } } }, { "name": "ml", "roles": ["ml"], "deciders": { "ml": { "down_scale_delay": "5m" } }, "resources": { "nodeCount": { "min": 1, "max": 9 }, "cpu": { "min": 2, "max": 2 }, "memory": { "min": "2Gi", "max": "6Gi" }, "storage": { "min": "1Gi", "max": "2Gi" } } }] }' elasticsearch.alpha.elastic.co/autoscaling-status: '{"policies":[{"name":"di","nodeSets":[{"name":"di","nodeCount":8}],"resources":{"requests":{"cpu":"6","memory":"8Gi","storage":"4Gi"}},"state":[{"type":"HorizontalScalingLimitReached","messages":["Can''t provide total required storage 37106614256, max number of nodes is 8, requires 9 nodes"]}],"lastModificationTime":"2021-01-17T05:59:22Z"},{"name":"ml","nodeSets":[{"name":"ml","nodeCount":1}],"resources":{"requests":{"cpu":"2","memory":"2Gi"}},"state":[],"lastModificationTime":"2021-01-17T13:25:18Z"}]}' elasticsearch.k8s.elastic.co/cluster-uuid: FghvC9XFS16wDXdAusm9yg From eb61c933d21405a1551f176b5e380e9718d7c948 Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Thu, 11 Feb 2021 11:38:36 +0100 Subject: [PATCH 18/19] Logging: only print the names of the nodeSets in debug mode --- pkg/apis/elasticsearch/v1/autoscaling.go | 9 +++++++++ pkg/controller/autoscaling/elasticsearch/controller.go | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pkg/apis/elasticsearch/v1/autoscaling.go b/pkg/apis/elasticsearch/v1/autoscaling.go index ea99841a2f..b955a9897e 100644 --- a/pkg/apis/elasticsearch/v1/autoscaling.go +++ b/pkg/apis/elasticsearch/v1/autoscaling.go @@ -157,6 +157,15 @@ func rolesMatch(roles1, roles2 []string) bool { // +kubebuilder:object:generate=false type AutoscaledNodeSets map[string]NodeSetList +// Names returns the names of the node sets indexed by the autoscaling policy name. +func (n AutoscaledNodeSets) Names() map[string][]string { + autoscalingPolicies := make(map[string][]string) + for policy, nodeSetList := range n { + autoscalingPolicies[policy] = nodeSetList.Names() + } + return autoscalingPolicies +} + // AutoscalingPolicies returns the list of autoscaling policies names from the named tiers. func (n AutoscaledNodeSets) AutoscalingPolicies() set.StringSet { autoscalingPolicies := set.Make() diff --git a/pkg/controller/autoscaling/elasticsearch/controller.go b/pkg/controller/autoscaling/elasticsearch/controller.go index ab4f2ad722..457af7bd22 100644 --- a/pkg/controller/autoscaling/elasticsearch/controller.go +++ b/pkg/controller/autoscaling/elasticsearch/controller.go @@ -153,7 +153,7 @@ func (r *ReconcileElasticsearch) Reconcile(ctx context.Context, request reconcil if nodeSetErr != nil { return reconcile.Result{}, tracing.CaptureError(ctx, nodeSetErr) } - log.V(1).Info("Autoscaling policies and node sets", "policies", autoscaledNodeSets) + log.V(1).Info("Autoscaling policies and node sets", "policies", autoscaledNodeSets.Names()) // Import existing resources in the current Status if the cluster is managed by some autoscaling policies but // the status annotation does not exist. From 4d8dfbc18c1d9a72e78185a429c5a94a6b546ae7 Mon Sep 17 00:00:00 2001 From: Michael Morello Date: Thu, 11 Feb 2021 11:39:47 +0100 Subject: [PATCH 19/19] improve comment --- pkg/controller/autoscaling/elasticsearch/resources/resources.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/autoscaling/elasticsearch/resources/resources.go b/pkg/controller/autoscaling/elasticsearch/resources/resources.go index 5a50dad7ab..9f6a50ca08 100644 --- a/pkg/controller/autoscaling/elasticsearch/resources/resources.go +++ b/pkg/controller/autoscaling/elasticsearch/resources/resources.go @@ -37,7 +37,7 @@ func NewNodeSetsResources(name string, nodeSetNames []string) NodeSetsResources type ClusterResources []NodeSetsResources // Match returns true if the resources assigned to a container in a NodeSet matches the one specified in the NodeSetsResources. -// It returns false if the container is not found in the NodeSet. +// It also returns false if the container is not found in the NodeSet. func (ntr NodeSetsResources) Match(containerName string, nodeSet v1.NodeSet) (bool, error) { for _, nodeSetNodeCount := range ntr.NodeSetNodeCount { if nodeSetNodeCount.Name != nodeSet.Name {