gocrane · qmhu · Dec 7, 2022 · Dec 7, 2022
diff --git a/pkg/recommendation/manager.go b/pkg/recommendation/manager.go
@@ -137,66 +137,64 @@ func (m *manager) loadConfigFile() error {
 }
 
 func Run(ctx *framework.RecommendationContext, recommender recommender.Recommender) error {
-	//// If context is canceled, directly return.
-	//if ctx.Canceled() {
-	//	klog.Infof("Recommender %q has been cancelled...", recommender.Name())
-	//	return nil
-	//}
+	klog.Infof("%s: start to run recommender %q.", ctx.String(), recommender.Name())
 
 	// 1. Filter phase
 	err := recommender.Filter(ctx)
 	if err != nil {
-		klog.Errorf("%s: recommender %q failed at filter phase!", ctx.String(), recommender.Name())
+		klog.Errorf("%s: recommender %q failed at filter phase: %v", ctx.String(), recommender.Name(), err)
 		return err
 	}
 
 	// 2. PrePrepare phase
 	err = recommender.CheckDataProviders(ctx)
 	if err != nil {
-		klog.Errorf("%s: recommender %q failed at prepare check data provider phase!", ctx.String(), recommender.Name())
+		klog.Errorf("%s: recommender %q failed at prepare check data provider phase: %v", ctx.String(), recommender.Name(), err)
 		return err
 	}
 
 	// 3. Prepare phase
 	err = recommender.CollectData(ctx)
 	if err != nil {
-		klog.Errorf("%s: recommender %q failed at prepare collect data phase!", ctx.String(), recommender.Name())
+		klog.Errorf("%s: recommender %q failed at prepare collect data phase: %v", ctx.String(), recommender.Name(), err)
 		return err
 	}
 
 	// 4. PostPrepare phase
 	err = recommender.PostProcessing(ctx)
 	if err != nil {
-		klog.Errorf("%s: recommender %q failed at prepare data post processing phase!", ctx.String(), recommender.Name())
+		klog.Errorf("%s: recommender %q failed at prepare data post processing phase: %v", ctx.String(), recommender.Name(), err)
 		return err
 	}
 
 	// 5. PreRecommend phase
 	err = recommender.PreRecommend(ctx)
 	if err != nil {
-		klog.Errorf("%s: recommender %q failed at pre commend phase!", ctx.String(), recommender.Name())
+		klog.Errorf("%s: recommender %q failed at pre commend phase: %v", ctx.String(), recommender.Name(), err)
 		return err
 	}
 
 	// 6. Recommend phase
 	err = recommender.Recommend(ctx)
 	if err != nil {
-		klog.Errorf("%s: recommender %q failed at recommend phase!", ctx.String(), recommender.Name())
+		klog.Errorf("%s: recommender %q failed at recommend phase: %v", ctx.String(), recommender.Name(), err)
 		return err
 	}
 
 	// 7. PostRecommend phase, add policy
 	err = recommender.Policy(ctx)
 	if err != nil {
-		klog.Errorf("%s: recommender %q failed at recommend policy phase!", ctx.String(), recommender.Name())
+		klog.Errorf("%s: recommender %q failed at recommend policy phase: %v", ctx.String(), recommender.Name(), err)
 		return err
 	}
 
 	// 8. Observe phase
 	err = recommender.Observe(ctx)
 	if err != nil {
-		klog.Errorf("%s: recommender %q failed at observe phase!", ctx.String(), recommender.Name())
+		klog.Errorf("%s: recommender %q failed at observe phase: %v", ctx.String(), recommender.Name(), err)
 		return err
 	}
+
+	klog.Infof("%s: finish to run recommender %q.", ctx.String(), recommender.Name())
 	return nil
 }
diff --git a/pkg/recommendation/recommender/hpa/recommend.go b/pkg/recommendation/recommender/hpa/recommend.go
@@ -40,12 +40,12 @@ func (rr *HPARecommender) Policy(ctx *framework.RecommendationContext) error {
 	predictable := true
 
 	if len(ctx.ResultValues) != 1 {
-		klog.Warningf("ReplicasAdvisor prediction metrics data is unexpected, List length is %d ", len(ctx.ResultValues))
+		klog.Warningf("%s: prediction metrics data is unexpected, List length is %d ", ctx.String(), len(ctx.ResultValues))
 		predictable = false
 	}
 
 	if rr.PredictableEnabled && !predictable {
-		return fmt.Errorf("ReplicasAdvisor cannot predict target")
+		return fmt.Errorf("cannot predict target")
 	}
 
 	minReplicas, cpuMax, percentileCpu, err := rr.GetMinReplicas(ctx)
@@ -55,12 +55,12 @@ func (rr *HPARecommender) Policy(ctx *framework.RecommendationContext) error {
 
 	err = rr.checkMinCpuUsageThreshold(cpuMax)
 	if err != nil {
-		return fmt.Errorf("%s checkMinCpuUsageThreshold failed: %v", rr.Name(), err)
+		return fmt.Errorf("checkMinCpuUsageThreshold failed: %v", err)
 	}
 
 	medianMin, medianMax, err := rr.minMaxMedians(ctx.InputValues)
 	if err != nil {
-		return fmt.Errorf("%s minMaxMedians failed: %v", rr.Name(), err)
+		return fmt.Errorf("minMaxMedians failed: %v", err)
 	}
 
 	err = rr.checkFluctuation(medianMin, medianMax)
@@ -70,12 +70,12 @@ func (rr *HPARecommender) Policy(ctx *framework.RecommendationContext) error {
 
 	targetUtilization, _, err := rr.proposeTargetUtilization(ctx)
 	if err != nil {
-		return fmt.Errorf("ReplicasAdvisor proposeTargetUtilization failed: %v", err)
+		return fmt.Errorf("proposeTargetUtilization failed: %v", err)
 	}
 
 	maxReplicas, err := rr.proposeMaxReplicas(&ctx.PodTemplate, percentileCpu, targetUtilization, minReplicas)
 	if err != nil {
-		return fmt.Errorf("ReplicasAdvisor proposeMaxReplicas failed: %v", err)
+		return fmt.Errorf("proposeMaxReplicas failed: %v", err)
 	}
 
 	defaultPredictionWindow := int32(3600)
@@ -304,7 +304,7 @@ func (rr *HPARecommender) proposeTargetUtilization(ctx *framework.Recommendation
 		return 0, 0, err
 	}
 
-	klog.V(4).Infof("ReplicasAdvisor propose targetUtilization, cpuUsage %f requestsPod %d", cpuUsage, requestTotal)
+	klog.V(4).Infof("propose targetUtilization, cpuUsage %f requestsPod %d", cpuUsage, requestTotal)
 	targetUtilization := int32(math.Ceil((cpuUsage * 1000 / float64(requestTotal)) * 100))
 
 	// capping
@@ -327,7 +327,7 @@ func (rr *HPARecommender) proposeMaxReplicas(podTemplate *corev1.PodTemplateSpec
 		return 0, err
 	}
 
-	klog.V(4).Infof("ReplicasAdvisor proposeMaxReplicas, percentileCpu %f requestsPod %d targetUtilization %d", percentileCpu, requestsPod, targetUtilization)
+	klog.V(4).Infof("proposeMaxReplicas, percentileCpu %f requestsPod %d targetUtilization %d", percentileCpu, requestsPod, targetUtilization)
 
 	// request * targetUtilization is the target average cpu usage, use total p95thCpu to divide, we can get the expect max replicas.
 	calcMaxReplicas := (percentileCpu * 100 * 1000 * rr.MaxReplicasFactor) / float64(int32(requestsPod)*targetUtilization)

diff --git a/pkg/recommendation/recommender/replicas/recommend.go b/pkg/recommendation/recommender/replicas/recommend.go
@@ -54,11 +54,11 @@ func (rr *ReplicasRecommender) Recommend(ctx *framework.RecommendationContext) e
 		timeNow.Add(time.Hour*24*7))
 
 	if err != nil {
-		klog.Warningf("%s query predicted time series failed: %v ", rr.Name(), err)
+		klog.Warningf("%s: query predicted time series failed: %v ", ctx.String(), err)
 	}
 
 	if len(tsListPrediction) != 1 {
-		klog.Warningf("%s prediction metrics data is unexpected, List length is %d ", rr.Name(), len(tsListPrediction))
+		klog.Warningf("%s: prediction metrics data is unexpected, List length is %d ", ctx.String(), len(tsListPrediction))
 	}
 
 	ctx.ResultValues = tsListPrediction

diff --git a/site/content/en/docs/Getting started/introduction.md b/site/content/en/docs/Getting started/introduction.md
@@ -33,7 +33,7 @@ scrolling="no" border="0" frameborder="no" framespacing="0" allowfullscreen="tru
 
 **Recommendation Framework**
 
-Provide a pluggable framework for analytics and give recommendation for cloud resources, support out-of-box recommenders: Workload Resources/Replicas, Idle Resources. [learn more](/docs/tutorials/recommendation).
+Provide a pluggable framework for analytics and give recommendation for cloud resources, support out-of-box recommenders: Workload Resources/Replicas/HPA, Idle Resources. [learn more](/docs/tutorials/recommendation).
 
 **Prediction-driven Horizontal Autoscaling**
 

diff --git a/site/content/en/docs/Tutorials/Recommendation/hpa-recommendation.md b/site/content/en/docs/Tutorials/Recommendation/hpa-recommendation.md
@@ -0,0 +1,138 @@
+---
+title: "HPA Recommendation（Alpha）"
+description: "Introduce for HPA Recommendation"
+weight: 16
+---
+
+Kubernetes' users want to use HPA to optimize resource utilization. But it is often that we don't know which applications are suitable for HPA or how to configure the parameters of HPA. With HPA Recommendation you can analyze the actual application usage and get recommended configurations. You can use it to improve application resource utilization.
+
+HPA recommendation is still in Alpha phase, comments are welcome.
+
+## Motivation
+
+In Kubernetes, the HPA (HorizontalPodAutoscaler) automatically updates the workload replicas (such as Deployment or StatefulSet) to meet the target utilization. However, in the actual world, we observe following problems:
+
+- Some applications should improve resource utilization through HPA, but HPA is not configured
+- Some HPA configuration is not reasonable, can not effectively perform autoscaling, also can not improve resource utilization.
+
+Based on the historical metrics data and algorithm analysis, HPA Recommendation provide the following suggestions: Which applications are suitable for HPA and How to configure it.
+
+## Sample
+
+An HPA recommendation sample yaml looks like below:
+
+```yaml
+apiVersion: analysis.crane.io/v1alpha1
+kind: Recommendation
+metadata:
+  labels:
+    analysis.crane.io/recommendation-rule-name: workload-hpa
+    analysis.crane.io/recommendation-rule-recommender: HPA
+    analysis.crane.io/recommendation-rule-uid: 0214c84b-8b39-499b-a7c6-559ac460695d
+    analysis.crane.io/recommendation-target-kind: Rollout
+    analysis.crane.io/recommendation-target-name: eshop
+    analysis.crane.io/recommendation-target-version: v1alpha1
+  name: workload-hpa-hpa-blr4r
+  namespace: zytms
+  ownerReferences:
+    - apiVersion: analysis.crane.io/v1alpha1
+      blockOwnerDeletion: false
+      controller: false
+      kind: RecommendationRule
+      name: workload-hpa
+      uid: 0214c84b-8b39-499b-a7c6-559ac460695d
+spec:
+  adoptionType: StatusAndAnnotation
+  completionStrategy:
+    completionStrategyType: Once
+  targetRef:
+    apiVersion: argoproj.io/v1alpha1
+    kind: Rollout
+    name: eshop
+    namespace: eshop
+  type: HPA
+status:
+  action: Create
+  lastUpdateTime: "2022-12-05T06:12:54Z"
+  recommendedInfo: '{"kind":"EffectiveHorizontalPodAutoscaler","apiVersion":"autoscaling.crane.io/v1alpha1","metadata":{"name":"eshop","namespace":"eshop","creationTimestamp":null},"spec":{"scaleTargetRef":{"kind":"Rollout","name":"eshop","apiVersion":"argoproj.io/v1alpha1"},"minReplicas":1,"maxReplicas":1,"scaleStrategy":"Preview","metrics":[{"type":"Resource","resource":{"name":"cpu","target":{"type":"Utilization","averageUtilization":58}}},{"type":"Pods","pods":{"metric":{"name":"k8s_pod_cpu_core_used"},"target":{"type":"AverageValue","averageValue":"500m"}}}]},"status":{}}'
+  recommendedValue: |
+    effectiveHPA:
+      maxReplicas: 1
+      metrics:
+      - resource:
+          name: cpu
+          target:
+            averageUtilization: 58
+            type: Utilization
+        type: Resource
+      - pods:
+          metric:
+            name: k8s_pod_cpu_core_used
+          target:
+            averageValue: 500m
+            type: AverageValue
+        type: Pods
+      minReplicas: 1
+```
+
+In this sample：
+
+- Recommendation TargetRef point to a ArgoRollout in eshop namespace: eshop
+- Recommendation type is HPA
+- adoptionType is StatusAndAnnotation，indicated that put recommendation result in recommendation.status and Workload's Annotation
+- recommendedInfo shows the recommended HPA configuration.（recommendedValue is deprecated）
+- action is Create，If existing EHPA in k8s cluster, then the action will be Patch 
+
+## Implement
+
+The process for one HPA recommendation:
+
+1. Query the historical CPU and Memory usage of the Workload for the past week by monitoring system.
+2. Use DSP algorithm to predict the CPU usage in the future.
+3. Calculate the replicas for both CPU and memory, then choose a larger one.
+4. Calculate the historical CPU usage fluctuation and minimum usage, and filter out the suitable Workload for HPA
+5. Calculate the targetUtilization based on the peak CPU utilization of the pod
+6. Calculate the recommended maxReplicas based on the recommended targetUtilization
+7. Assemble the targetUtilization, maxReplicas, and minReplicas into a complete EHPA object as recommended result
+
+### How to filter the suitable workload suitable for HPA
+
+It should meet the following conditions:
+
+1. The Workload is healthy. For example, most of the Pods are running
+2. There are peaks and troughs in CPU usage. A fixed number of replicas is recommended if the usage is largely smooth and steady or completely random
+3. Workload with a certain amount cpu usage. If the cpu usage is very low for a long time, HPA is no need even if there is some fluctuation
+
+The following is a typical workload with peaks and troughs which is suitable for HPA.
+
+![](/images/algorithm/dsp/input0.png)
+
+### Algorithm for recommend MinReplicas
+
+The method is consistent with replica recommendation. Please referring to: [**Replicas Recommendation**](/docs/tutorials/recommendation/replicas-recommendation)
+
+## Accepted resources
+
+Support StatefulSet and Deployment by default，but all workloads that support `Scale SubResource` are supported.
+
+## Configuration
+
+| Configuration items   | Default | Description                                                                       |
+|-------------|---------|-----------------------------------------------------------------------------------|
+| workload-min-replicas | 1       | Workload replicas that less than this value will abort recommendation             |
+| pod-min-ready-seconds | 30      | Defines the min seconds to identify Pod is ready                                  |
+| pod-available-ratio | 0.5     | Workload ready Pod ratio that less than this value will abort recommendation      |
+| default-min-replicas | 1       | default minReplicas                                                               |
+| cpu-percentile | 0.95    | Percentile for historical cpu usage                                               |
+| mem-percentile | 0.95    | Percentile for historical memory usage                                            |
+| cpu-target-utilization | 0.5     | Target of CPU peak historical usage                                               |
+| mem-target-utilization | 0.5     | Target of Memory peak historical usage                                            |
+| predictable | false   | When set to true, it will not recommend for HPA if CPU usage is not predictable   |
+| reference-hpa | true    | The recommended result will inherits custom/external metric from existing ehpa    |
+| min-cpu-usage-threshold | 1       | Workload CPU peak usage that less than this value will abort recommendation       |
+| fluctuation-threshold | 1.5     | Workload CPU usage fluctuation that less than this value will abort recommendation |
+| min-cpu-target-utilization | 30      | minimum CPU TargetUtilization                                                     |
+| max-cpu-target-utilization | 75      | maximum CPU TargetUtilization                                                     |
+| max-replicas-factor | 3       | the factor when calculate maxReplicas                                       |
+
+How to update recommendation configuration please refer to：[**Recommendation Framework**](/docs/tutorials/recommendation/recommendation-framework)
diff --git a/site/content/en/docs/Tutorials/Recommendation/recommendation-framework.md b/site/content/en/docs/Tutorials/Recommendation/recommendation-framework.md
@@ -126,9 +126,9 @@ Currently, Crane support these Recommenders:
 
 - [**Resource Recommendation**](/docs/tutorials/recommendation/resource-recommendation): Use the VPA algorithm to analyze the actual usage of applications and recommend more appropriate resource configurations.
 - [**Replicas Recommendation**](/docs/tutorials/recommendation/replicas-recommendation): Use the HPA algorithm to analyze the actual usage of applications and recommend more appropriate replicas configurations.
+- [**HPA Recommendation**](/docs/tutorials/recommendation/hpa-recommendation): Scan the Workload in a cluster and recommend HPA configurations for Workload that are suitable for horizontal autoscaling
 - [**IdleNode Recommendation**](/docs/tutorials/recommendation/idlenode-recommendation): Find the idle nodes in cluster
 
-
 ### Recommender Framework 
 
 Recommender framework defines a set of workflow, The workflow execution sequence according to the process, the process is divided into four stages: Filter, Prepare, Recommend, Observe. Recommender performs recommends functions by implementing these four stages.

diff --git a/site/content/en/docs/Tutorials/Recommendation/replicas-recommendation.md b/site/content/en/docs/Tutorials/Recommendation/replicas-recommendation.md
@@ -129,9 +129,9 @@ Support StatefulSet and Deployment by default，but all workloads that support `
 
 | Configuration items    | Default | Description                                                            |
 |------------------------|---------|------------------------------------------------------------------------|
-| workload-min-replicas  | 1       | Workload replicas than less than this value are not recommended        |
+| workload-min-replicas  | 1       | Workload replicas that less than this value will abort recommendation       |
 | pod-min-ready-seconds  | 30      | Defines the min seconds to identify Pod is ready                       |
-| pod-available-ratio    | 0.5     | Workload ready Pod ratio that less than this value are not recommended |
+| pod-available-ratio    | 0.5     | Workload ready Pod ratio that less than this value will abort recommendation |
 | default-min-replicas   | 1       | default minReplicas                                                    |
 | cpu-percentile         | 0.95 | Percentile for historical cpu usage                                    |
 | mem-percentile         | 0.95 | Percentile for historical memory usage                                 |

diff --git a/site/content/zh/docs/Getting started/introduction.md b/site/content/zh/docs/Getting started/introduction.md
@@ -33,7 +33,7 @@ Crane Dashboard **在线 Demo**: http://dashboard.gocrane.io/
 
 **推荐框架**
 
-提供了一个可扩展的推荐框架以支持多种云资源的分析，内置了多种推荐器：资源推荐，副本推荐，闲置资源推荐。[了解更多](/zh-cn/docs/tutorials/recommendation)。
+提供了一个可扩展的推荐框架以支持多种云资源的分析，内置了多种推荐器：资源推荐，副本推荐，HPA 推荐，闲置资源推荐。[了解更多](/zh-cn/docs/tutorials/recommendation)。
 
 **基于预测的水平弹性器**