From a540c1d74cc6e5eab36a704bba8987a67d047a83 Mon Sep 17 00:00:00 2001 From: qmhu Date: Wed, 7 Dec 2022 11:10:21 +0800 Subject: [PATCH] hpa recommendation docs --- pkg/recommendation/manager.go | 24 ++- .../recommender/hpa/recommend.go | 16 +- .../recommender/replicas/recommend.go | 4 +- .../en/docs/Getting started/introduction.md | 2 +- .../Recommendation/hpa-recommendation.md | 138 ++++++++++++++++++ .../recommendation-framework.md | 2 +- .../Recommendation/replicas-recommendation.md | 4 +- .../zh/docs/Getting started/introduction.md | 2 +- .../Recommendation/hpa-recommendation.md | 138 ++++++++++++++++++ .../recommendation-framework.md | 1 + 10 files changed, 303 insertions(+), 28 deletions(-) create mode 100644 site/content/en/docs/Tutorials/Recommendation/hpa-recommendation.md create mode 100644 site/content/zh/docs/Tutorials/Recommendation/hpa-recommendation.md diff --git a/pkg/recommendation/manager.go b/pkg/recommendation/manager.go index e356f456f..246894552 100644 --- a/pkg/recommendation/manager.go +++ b/pkg/recommendation/manager.go @@ -137,66 +137,64 @@ func (m *manager) loadConfigFile() error { } func Run(ctx *framework.RecommendationContext, recommender recommender.Recommender) error { - //// If context is canceled, directly return. - //if ctx.Canceled() { - // klog.Infof("Recommender %q has been cancelled...", recommender.Name()) - // return nil - //} + klog.Infof("%s: start to run recommender %q.", ctx.String(), recommender.Name()) // 1. Filter phase err := recommender.Filter(ctx) if err != nil { - klog.Errorf("%s: recommender %q failed at filter phase!", ctx.String(), recommender.Name()) + klog.Errorf("%s: recommender %q failed at filter phase: %v", ctx.String(), recommender.Name(), err) return err } // 2. PrePrepare phase err = recommender.CheckDataProviders(ctx) if err != nil { - klog.Errorf("%s: recommender %q failed at prepare check data provider phase!", ctx.String(), recommender.Name()) + klog.Errorf("%s: recommender %q failed at prepare check data provider phase: %v", ctx.String(), recommender.Name(), err) return err } // 3. Prepare phase err = recommender.CollectData(ctx) if err != nil { - klog.Errorf("%s: recommender %q failed at prepare collect data phase!", ctx.String(), recommender.Name()) + klog.Errorf("%s: recommender %q failed at prepare collect data phase: %v", ctx.String(), recommender.Name(), err) return err } // 4. PostPrepare phase err = recommender.PostProcessing(ctx) if err != nil { - klog.Errorf("%s: recommender %q failed at prepare data post processing phase!", ctx.String(), recommender.Name()) + klog.Errorf("%s: recommender %q failed at prepare data post processing phase: %v", ctx.String(), recommender.Name(), err) return err } // 5. PreRecommend phase err = recommender.PreRecommend(ctx) if err != nil { - klog.Errorf("%s: recommender %q failed at pre commend phase!", ctx.String(), recommender.Name()) + klog.Errorf("%s: recommender %q failed at pre commend phase: %v", ctx.String(), recommender.Name(), err) return err } // 6. Recommend phase err = recommender.Recommend(ctx) if err != nil { - klog.Errorf("%s: recommender %q failed at recommend phase!", ctx.String(), recommender.Name()) + klog.Errorf("%s: recommender %q failed at recommend phase: %v", ctx.String(), recommender.Name(), err) return err } // 7. PostRecommend phase, add policy err = recommender.Policy(ctx) if err != nil { - klog.Errorf("%s: recommender %q failed at recommend policy phase!", ctx.String(), recommender.Name()) + klog.Errorf("%s: recommender %q failed at recommend policy phase: %v", ctx.String(), recommender.Name(), err) return err } // 8. Observe phase err = recommender.Observe(ctx) if err != nil { - klog.Errorf("%s: recommender %q failed at observe phase!", ctx.String(), recommender.Name()) + klog.Errorf("%s: recommender %q failed at observe phase: %v", ctx.String(), recommender.Name(), err) return err } + + klog.Infof("%s: finish to run recommender %q.", ctx.String(), recommender.Name()) return nil } diff --git a/pkg/recommendation/recommender/hpa/recommend.go b/pkg/recommendation/recommender/hpa/recommend.go index 52aac8409..bc91540c8 100644 --- a/pkg/recommendation/recommender/hpa/recommend.go +++ b/pkg/recommendation/recommender/hpa/recommend.go @@ -40,12 +40,12 @@ func (rr *HPARecommender) Policy(ctx *framework.RecommendationContext) error { predictable := true if len(ctx.ResultValues) != 1 { - klog.Warningf("ReplicasAdvisor prediction metrics data is unexpected, List length is %d ", len(ctx.ResultValues)) + klog.Warningf("%s: prediction metrics data is unexpected, List length is %d ", ctx.String(), len(ctx.ResultValues)) predictable = false } if rr.PredictableEnabled && !predictable { - return fmt.Errorf("ReplicasAdvisor cannot predict target") + return fmt.Errorf("cannot predict target") } minReplicas, cpuMax, percentileCpu, err := rr.GetMinReplicas(ctx) @@ -55,12 +55,12 @@ func (rr *HPARecommender) Policy(ctx *framework.RecommendationContext) error { err = rr.checkMinCpuUsageThreshold(cpuMax) if err != nil { - return fmt.Errorf("%s checkMinCpuUsageThreshold failed: %v", rr.Name(), err) + return fmt.Errorf("checkMinCpuUsageThreshold failed: %v", err) } medianMin, medianMax, err := rr.minMaxMedians(ctx.InputValues) if err != nil { - return fmt.Errorf("%s minMaxMedians failed: %v", rr.Name(), err) + return fmt.Errorf("minMaxMedians failed: %v", err) } err = rr.checkFluctuation(medianMin, medianMax) @@ -70,12 +70,12 @@ func (rr *HPARecommender) Policy(ctx *framework.RecommendationContext) error { targetUtilization, _, err := rr.proposeTargetUtilization(ctx) if err != nil { - return fmt.Errorf("ReplicasAdvisor proposeTargetUtilization failed: %v", err) + return fmt.Errorf("proposeTargetUtilization failed: %v", err) } maxReplicas, err := rr.proposeMaxReplicas(&ctx.PodTemplate, percentileCpu, targetUtilization, minReplicas) if err != nil { - return fmt.Errorf("ReplicasAdvisor proposeMaxReplicas failed: %v", err) + return fmt.Errorf("proposeMaxReplicas failed: %v", err) } defaultPredictionWindow := int32(3600) @@ -304,7 +304,7 @@ func (rr *HPARecommender) proposeTargetUtilization(ctx *framework.Recommendation return 0, 0, err } - klog.V(4).Infof("ReplicasAdvisor propose targetUtilization, cpuUsage %f requestsPod %d", cpuUsage, requestTotal) + klog.V(4).Infof("propose targetUtilization, cpuUsage %f requestsPod %d", cpuUsage, requestTotal) targetUtilization := int32(math.Ceil((cpuUsage * 1000 / float64(requestTotal)) * 100)) // capping @@ -327,7 +327,7 @@ func (rr *HPARecommender) proposeMaxReplicas(podTemplate *corev1.PodTemplateSpec return 0, err } - klog.V(4).Infof("ReplicasAdvisor proposeMaxReplicas, percentileCpu %f requestsPod %d targetUtilization %d", percentileCpu, requestsPod, targetUtilization) + klog.V(4).Infof("proposeMaxReplicas, percentileCpu %f requestsPod %d targetUtilization %d", percentileCpu, requestsPod, targetUtilization) // request * targetUtilization is the target average cpu usage, use total p95thCpu to divide, we can get the expect max replicas. calcMaxReplicas := (percentileCpu * 100 * 1000 * rr.MaxReplicasFactor) / float64(int32(requestsPod)*targetUtilization) diff --git a/pkg/recommendation/recommender/replicas/recommend.go b/pkg/recommendation/recommender/replicas/recommend.go index 1deebd93d..acead1e41 100644 --- a/pkg/recommendation/recommender/replicas/recommend.go +++ b/pkg/recommendation/recommender/replicas/recommend.go @@ -54,11 +54,11 @@ func (rr *ReplicasRecommender) Recommend(ctx *framework.RecommendationContext) e timeNow.Add(time.Hour*24*7)) if err != nil { - klog.Warningf("%s query predicted time series failed: %v ", rr.Name(), err) + klog.Warningf("%s: query predicted time series failed: %v ", ctx.String(), err) } if len(tsListPrediction) != 1 { - klog.Warningf("%s prediction metrics data is unexpected, List length is %d ", rr.Name(), len(tsListPrediction)) + klog.Warningf("%s: prediction metrics data is unexpected, List length is %d ", ctx.String(), len(tsListPrediction)) } ctx.ResultValues = tsListPrediction diff --git a/site/content/en/docs/Getting started/introduction.md b/site/content/en/docs/Getting started/introduction.md index 22e02f530..ba1548505 100644 --- a/site/content/en/docs/Getting started/introduction.md +++ b/site/content/en/docs/Getting started/introduction.md @@ -33,7 +33,7 @@ scrolling="no" border="0" frameborder="no" framespacing="0" allowfullscreen="tru **Recommendation Framework** -Provide a pluggable framework for analytics and give recommendation for cloud resources, support out-of-box recommenders: Workload Resources/Replicas, Idle Resources. [learn more](/docs/tutorials/recommendation). +Provide a pluggable framework for analytics and give recommendation for cloud resources, support out-of-box recommenders: Workload Resources/Replicas/HPA, Idle Resources. [learn more](/docs/tutorials/recommendation). **Prediction-driven Horizontal Autoscaling** diff --git a/site/content/en/docs/Tutorials/Recommendation/hpa-recommendation.md b/site/content/en/docs/Tutorials/Recommendation/hpa-recommendation.md new file mode 100644 index 000000000..d7bcaf624 --- /dev/null +++ b/site/content/en/docs/Tutorials/Recommendation/hpa-recommendation.md @@ -0,0 +1,138 @@ +--- +title: "HPA Recommendation(Alpha)" +description: "Introduce for HPA Recommendation" +weight: 16 +--- + +Kubernetes' users want to use HPA to optimize resource utilization. But it is often that we don't know which applications are suitable for HPA or how to configure the parameters of HPA. With HPA Recommendation you can analyze the actual application usage and get recommended configurations. You can use it to improve application resource utilization. + +HPA recommendation is still in Alpha phase, comments are welcome. + +## Motivation + +In Kubernetes, the HPA (HorizontalPodAutoscaler) automatically updates the workload replicas (such as Deployment or StatefulSet) to meet the target utilization. However, in the actual world, we observe following problems: + +- Some applications should improve resource utilization through HPA, but HPA is not configured +- Some HPA configuration is not reasonable, can not effectively perform autoscaling, also can not improve resource utilization. + +Based on the historical metrics data and algorithm analysis, HPA Recommendation provide the following suggestions: Which applications are suitable for HPA and How to configure it. + +## Sample + +An HPA recommendation sample yaml looks like below: + +```yaml +apiVersion: analysis.crane.io/v1alpha1 +kind: Recommendation +metadata: + labels: + analysis.crane.io/recommendation-rule-name: workload-hpa + analysis.crane.io/recommendation-rule-recommender: HPA + analysis.crane.io/recommendation-rule-uid: 0214c84b-8b39-499b-a7c6-559ac460695d + analysis.crane.io/recommendation-target-kind: Rollout + analysis.crane.io/recommendation-target-name: eshop + analysis.crane.io/recommendation-target-version: v1alpha1 + name: workload-hpa-hpa-blr4r + namespace: zytms + ownerReferences: + - apiVersion: analysis.crane.io/v1alpha1 + blockOwnerDeletion: false + controller: false + kind: RecommendationRule + name: workload-hpa + uid: 0214c84b-8b39-499b-a7c6-559ac460695d +spec: + adoptionType: StatusAndAnnotation + completionStrategy: + completionStrategyType: Once + targetRef: + apiVersion: argoproj.io/v1alpha1 + kind: Rollout + name: eshop + namespace: eshop + type: HPA +status: + action: Create + lastUpdateTime: "2022-12-05T06:12:54Z" + recommendedInfo: '{"kind":"EffectiveHorizontalPodAutoscaler","apiVersion":"autoscaling.crane.io/v1alpha1","metadata":{"name":"eshop","namespace":"eshop","creationTimestamp":null},"spec":{"scaleTargetRef":{"kind":"Rollout","name":"eshop","apiVersion":"argoproj.io/v1alpha1"},"minReplicas":1,"maxReplicas":1,"scaleStrategy":"Preview","metrics":[{"type":"Resource","resource":{"name":"cpu","target":{"type":"Utilization","averageUtilization":58}}},{"type":"Pods","pods":{"metric":{"name":"k8s_pod_cpu_core_used"},"target":{"type":"AverageValue","averageValue":"500m"}}}]},"status":{}}' + recommendedValue: | + effectiveHPA: + maxReplicas: 1 + metrics: + - resource: + name: cpu + target: + averageUtilization: 58 + type: Utilization + type: Resource + - pods: + metric: + name: k8s_pod_cpu_core_used + target: + averageValue: 500m + type: AverageValue + type: Pods + minReplicas: 1 +``` + +In this sample: + +- Recommendation TargetRef point to a ArgoRollout in eshop namespace: eshop +- Recommendation type is HPA +- adoptionType is StatusAndAnnotation,indicated that put recommendation result in recommendation.status and Workload's Annotation +- recommendedInfo shows the recommended HPA configuration.(recommendedValue is deprecated) +- action is Create,If existing EHPA in k8s cluster, then the action will be Patch + +## Implement + +The process for one HPA recommendation: + +1. Query the historical CPU and Memory usage of the Workload for the past week by monitoring system. +2. Use DSP algorithm to predict the CPU usage in the future. +3. Calculate the replicas for both CPU and memory, then choose a larger one. +4. Calculate the historical CPU usage fluctuation and minimum usage, and filter out the suitable Workload for HPA +5. Calculate the targetUtilization based on the peak CPU utilization of the pod +6. Calculate the recommended maxReplicas based on the recommended targetUtilization +7. Assemble the targetUtilization, maxReplicas, and minReplicas into a complete EHPA object as recommended result + +### How to filter the suitable workload suitable for HPA + +It should meet the following conditions: + +1. The Workload is healthy. For example, most of the Pods are running +2. There are peaks and troughs in CPU usage. A fixed number of replicas is recommended if the usage is largely smooth and steady or completely random +3. Workload with a certain amount cpu usage. If the cpu usage is very low for a long time, HPA is no need even if there is some fluctuation + +The following is a typical workload with peaks and troughs which is suitable for HPA. + +![](/images/algorithm/dsp/input0.png) + +### Algorithm for recommend MinReplicas + +The method is consistent with replica recommendation. Please referring to: [**Replicas Recommendation**](/docs/tutorials/recommendation/replicas-recommendation) + +## Accepted resources + +Support StatefulSet and Deployment by default,but all workloads that support `Scale SubResource` are supported. + +## Configuration + +| Configuration items | Default | Description | +|-------------|---------|-----------------------------------------------------------------------------------| +| workload-min-replicas | 1 | Workload replicas that less than this value will abort recommendation | +| pod-min-ready-seconds | 30 | Defines the min seconds to identify Pod is ready | +| pod-available-ratio | 0.5 | Workload ready Pod ratio that less than this value will abort recommendation | +| default-min-replicas | 1 | default minReplicas | +| cpu-percentile | 0.95 | Percentile for historical cpu usage | +| mem-percentile | 0.95 | Percentile for historical memory usage | +| cpu-target-utilization | 0.5 | Target of CPU peak historical usage | +| mem-target-utilization | 0.5 | Target of Memory peak historical usage | +| predictable | false | When set to true, it will not recommend for HPA if CPU usage is not predictable | +| reference-hpa | true | The recommended result will inherits custom/external metric from existing ehpa | +| min-cpu-usage-threshold | 1 | Workload CPU peak usage that less than this value will abort recommendation | +| fluctuation-threshold | 1.5 | Workload CPU usage fluctuation that less than this value will abort recommendation | +| min-cpu-target-utilization | 30 | minimum CPU TargetUtilization | +| max-cpu-target-utilization | 75 | maximum CPU TargetUtilization | +| max-replicas-factor | 3 | the factor when calculate maxReplicas | + +How to update recommendation configuration please refer to:[**Recommendation Framework**](/docs/tutorials/recommendation/recommendation-framework) diff --git a/site/content/en/docs/Tutorials/Recommendation/recommendation-framework.md b/site/content/en/docs/Tutorials/Recommendation/recommendation-framework.md index 31e0bc76f..312ecfe1c 100644 --- a/site/content/en/docs/Tutorials/Recommendation/recommendation-framework.md +++ b/site/content/en/docs/Tutorials/Recommendation/recommendation-framework.md @@ -126,9 +126,9 @@ Currently, Crane support these Recommenders: - [**Resource Recommendation**](/docs/tutorials/recommendation/resource-recommendation): Use the VPA algorithm to analyze the actual usage of applications and recommend more appropriate resource configurations. - [**Replicas Recommendation**](/docs/tutorials/recommendation/replicas-recommendation): Use the HPA algorithm to analyze the actual usage of applications and recommend more appropriate replicas configurations. +- [**HPA Recommendation**](/docs/tutorials/recommendation/hpa-recommendation): Scan the Workload in a cluster and recommend HPA configurations for Workload that are suitable for horizontal autoscaling - [**IdleNode Recommendation**](/docs/tutorials/recommendation/idlenode-recommendation): Find the idle nodes in cluster - ### Recommender Framework Recommender framework defines a set of workflow, The workflow execution sequence according to the process, the process is divided into four stages: Filter, Prepare, Recommend, Observe. Recommender performs recommends functions by implementing these four stages. diff --git a/site/content/en/docs/Tutorials/Recommendation/replicas-recommendation.md b/site/content/en/docs/Tutorials/Recommendation/replicas-recommendation.md index c4361dff0..7752aa571 100644 --- a/site/content/en/docs/Tutorials/Recommendation/replicas-recommendation.md +++ b/site/content/en/docs/Tutorials/Recommendation/replicas-recommendation.md @@ -129,9 +129,9 @@ Support StatefulSet and Deployment by default,but all workloads that support ` | Configuration items | Default | Description | |------------------------|---------|------------------------------------------------------------------------| -| workload-min-replicas | 1 | Workload replicas than less than this value are not recommended | +| workload-min-replicas | 1 | Workload replicas that less than this value will abort recommendation | | pod-min-ready-seconds | 30 | Defines the min seconds to identify Pod is ready | -| pod-available-ratio | 0.5 | Workload ready Pod ratio that less than this value are not recommended | +| pod-available-ratio | 0.5 | Workload ready Pod ratio that less than this value will abort recommendation | | default-min-replicas | 1 | default minReplicas | | cpu-percentile | 0.95 | Percentile for historical cpu usage | | mem-percentile | 0.95 | Percentile for historical memory usage | diff --git a/site/content/zh/docs/Getting started/introduction.md b/site/content/zh/docs/Getting started/introduction.md index 844ce818e..c9116f0d0 100644 --- a/site/content/zh/docs/Getting started/introduction.md +++ b/site/content/zh/docs/Getting started/introduction.md @@ -33,7 +33,7 @@ Crane Dashboard **在线 Demo**: http://dashboard.gocrane.io/ **推荐框架** -提供了一个可扩展的推荐框架以支持多种云资源的分析,内置了多种推荐器:资源推荐,副本推荐,闲置资源推荐。[了解更多](/zh-cn/docs/tutorials/recommendation)。 +提供了一个可扩展的推荐框架以支持多种云资源的分析,内置了多种推荐器:资源推荐,副本推荐,HPA 推荐,闲置资源推荐。[了解更多](/zh-cn/docs/tutorials/recommendation)。 **基于预测的水平弹性器** diff --git a/site/content/zh/docs/Tutorials/Recommendation/hpa-recommendation.md b/site/content/zh/docs/Tutorials/Recommendation/hpa-recommendation.md new file mode 100644 index 000000000..68fde5ef1 --- /dev/null +++ b/site/content/zh/docs/Tutorials/Recommendation/hpa-recommendation.md @@ -0,0 +1,138 @@ +--- +title: "HPA 推荐(Alpha)" +description: "HPA 推荐介绍" +weight: 16 +--- + +Kubernetes 用户希望使用 HPA 来实现按需使用,提示资源利用率。但是往往不知道哪些应用适合弹性也不知道如何配置HPA的参数。通过 HPA 推荐的算法分析应用的真实用量推荐合适的水平弹性的配置,您可以参考并采纳它提升应用资源利用率。 + +HPA 推荐还处于 Alpha 阶段,欢迎对功能提供意见。 + +## 动机 + +在 Kubernetes 中,HPA(HorizontalPodAutoscaler) 自动更新工作负载资源 (例如 Deployment 或者 StatefulSet), 目的是自动扩缩工作负载以满足需求。但是在实际使用过程中我们观察到以下使用问题: + +- 有些应用可以通过 HPA 提示资源利用率,但是没有配置 HPA +- 有些 HPA 配置并不合理,无法有效的进行弹性伸缩,也就达不到提示利用率的效果 + +HPA 推荐通过应用的历史数据结合算法分析给出建议:哪些应用适合配置 HPA 以及 HPA 的配置。 + +## 推荐示例 + +一个简单的弹性推荐 yaml 文件如下: + +```yaml +apiVersion: analysis.crane.io/v1alpha1 +kind: Recommendation +metadata: + labels: + analysis.crane.io/recommendation-rule-name: workload-hpa + analysis.crane.io/recommendation-rule-recommender: HPA + analysis.crane.io/recommendation-rule-uid: 0214c84b-8b39-499b-a7c6-559ac460695d + analysis.crane.io/recommendation-target-kind: Rollout + analysis.crane.io/recommendation-target-name: eshop + analysis.crane.io/recommendation-target-version: v1alpha1 + name: workload-hpa-hpa-blr4r + namespace: zytms + ownerReferences: + - apiVersion: analysis.crane.io/v1alpha1 + blockOwnerDeletion: false + controller: false + kind: RecommendationRule + name: workload-hpa + uid: 0214c84b-8b39-499b-a7c6-559ac460695d +spec: + adoptionType: StatusAndAnnotation + completionStrategy: + completionStrategyType: Once + targetRef: + apiVersion: argoproj.io/v1alpha1 + kind: Rollout + name: eshop + namespace: eshop + type: HPA +status: + action: Create + lastUpdateTime: "2022-12-05T06:12:54Z" + recommendedInfo: '{"kind":"EffectiveHorizontalPodAutoscaler","apiVersion":"autoscaling.crane.io/v1alpha1","metadata":{"name":"eshop","namespace":"eshop","creationTimestamp":null},"spec":{"scaleTargetRef":{"kind":"Rollout","name":"eshop","apiVersion":"argoproj.io/v1alpha1"},"minReplicas":1,"maxReplicas":1,"scaleStrategy":"Preview","metrics":[{"type":"Resource","resource":{"name":"cpu","target":{"type":"Utilization","averageUtilization":58}}},{"type":"Pods","pods":{"metric":{"name":"k8s_pod_cpu_core_used"},"target":{"type":"AverageValue","averageValue":"500m"}}}]},"status":{}}' + recommendedValue: | + effectiveHPA: + maxReplicas: 1 + metrics: + - resource: + name: cpu + target: + averageUtilization: 58 + type: Utilization + type: Resource + - pods: + metric: + name: k8s_pod_cpu_core_used + target: + averageValue: 500m + type: AverageValue + type: Pods + minReplicas: 1 +``` + +在该示例中: + +- 推荐的 TargetRef 指向 eshop 的 Rollout:eshop +- 推荐类型为 HPA 推荐 +- adoptionType 是 StatusAndAnnotation,表示将推荐结果展示在 recommendation.status 和 Deployment 的 Annotation +- recommendedInfo 显示了推荐的 EHPA 配置(recommendedValue 已经 deprecated) +- action 是 Create,如果集群中已经有 EHPA 存在,则 action 是 Patch + +## 实现原理 + +HPA 推荐按以下步骤完成一次推荐过程: + +1. 通过监控数据,获取 Workload 过去一周的 CPU 和 Memory 历史用量。 +2. 用 DSP 算法预测未来一周 CPU 用量 +3. 分别计算 CPU 和 内存分别对应的副本数,取较大值作为 minReplicas +4. 计算历史 CPU 用量的波动率和最小用量,筛选出适合使用 HPA 的 Workload +5. 根据 pod 的 CPU 峰值利用率计算 targetUtilization +6. 根据推荐的 targetUtilization 计算推荐的 maxReplicas +7. 将 targetUtilization,maxReplicas,minReplicas 组装成完整的 EHPA 对象作为推荐结果 + +### 如何筛选适合使用 HPA 的 workload + +适合使用 HPA 的 Workload 需要满足以下条件: + +1. Workload 运行基本正常,比如绝大多数 Pod 都处于运行中 +2. CPU 的使用量存在波峰波谷的波动。如果基本没有波动或者完全随机的用量适合通过副本推荐配置固定的副本数 +3. 有一定资源用量的 Workload,如果资源用量长期非常低,那么即使有一定的波动量,也是没有使用 HPA 的价值的 + +以下是一个典型的存在波峰波谷规律的 Workload 的历史资源用量 + +![](/images/algorithm/dsp/input0.png) + +### 计算最小副本算法 + +方法和副本推荐中计算副本算法一致,请参考:[**副本推荐**](/zh-cn/docs/tutorials/recommendation/replicas-recommendation) + +## 支持的资源类型 + +默认支持 StatefulSet 和 Deployment,但是支持所有实现了 Scale SubResource 的 Workload。 + +## 参数配置 + +| 配置项 | 默认值 | 描述 | +|-------------|-------|--------------------------------------| +| workload-min-replicas | 1 | 小于该值的工作负载不做弹性推荐 | +| pod-min-ready-seconds | 30 | 定义了 Pod 是否 Ready 的秒数 | +| pod-available-ratio | 0.5 | Ready Pod 比例小于该值的工作负载不做弹性推荐 | +| default-min-replicas | 1 | 最小 minReplicas | +| cpu-percentile | 0.95 | 历史 CPU 用量的 Percentile | +| mem-percentile | 0.95 | 历史内存用量的 Percentile | +| cpu-target-utilization | 0.5 | CPU 目标峰值利用率 | +| mem-target-utilization | 0.5 | 内存目标峰值利用率 | +| predictable | false | 当设置成 true 时,如果 CPU 历史用量无法预测,则不进行推荐 | +| reference-hpa | true | 推荐配置会参考现有 HPA 的配置,继承比如自定义指标等信息到 EHPA | +| min-cpu-usage-threshold | 1 | Workload CPU 最小用量,如果历史用量小于该配置,则不进行推荐 | +| fluctuation-threshold | 1.5 | Workload CPU 的波动率,小于该配置,则不进行推荐 | +| min-cpu-target-utilization | 30 | CPU 的 TargetUtilization 最小值 | +| max-cpu-target-utilization | 75 | CPU 的 TargetUtilization 最大值 | +| max-replicas-factor | 3 | 在计算 maxReplicas 时的放大系数 | + +如何更新推荐的配置请参考:[**推荐框架**](/zh-cn/docs/tutorials/recommendation/recommendation-framework) \ No newline at end of file diff --git a/site/content/zh/docs/Tutorials/Recommendation/recommendation-framework.md b/site/content/zh/docs/Tutorials/Recommendation/recommendation-framework.md index c8481c1dc..bb07a6f0d 100644 --- a/site/content/zh/docs/Tutorials/Recommendation/recommendation-framework.md +++ b/site/content/zh/docs/Tutorials/Recommendation/recommendation-framework.md @@ -126,6 +126,7 @@ patchData=`kubectl get recommend workloads-rule-replicas-rckvb -n default -o jso - [**资源推荐**](/zh-cn/docs/tutorials/recommendation/resource-recommendation): 通过 VPA 算法分析应用的真实用量推荐更合适的资源配置 - [**副本数推荐**](/zh-cn/docs/tutorials/recommendation/replicas-recommendation): 通过 HPA 算法分析应用的真实用量推荐更合适的副本数量 +- [**HPA 推荐**](/zh-cn/docs/tutorials/recommendation/hpa-recommendation): 扫描集群中的 Workload,针对适合适合水平弹性的 Workload 推荐 HPA 配置 - [**闲置节点推荐**](/zh-cn/docs/tutorials/recommendation/idlenode-recommendation): 扫描集群中的闲置节点 ### Recommender 框架