diff --git a/deploy/craned/deployment.yaml b/deploy/craned/deployment.yaml index 603f68cc6..c086543dd 100644 --- a/deploy/craned/deployment.yaml +++ b/deploy/craned/deployment.yaml @@ -119,7 +119,7 @@ data: acceptedResources: - kind: Node apiVersion: v1 - - name: Volumes + - name: Volume acceptedResources: - kind: PersistentVolume apiVersion: v1 diff --git a/examples/analytics/preinstall-rule.yaml b/examples/analytics/preinstall-rule.yaml index 80628483a..a9d02520d 100644 --- a/examples/analytics/preinstall-rule.yaml +++ b/examples/analytics/preinstall-rule.yaml @@ -52,3 +52,21 @@ spec: any: true # 扫描所有namespace recommenders: - name: Service + +--- + +apiVersion: analysis.crane.io/v1alpha1 +kind: RecommendationRule +metadata: + name: persistentvolumes-rule + labels: + analysis.crane.io/recommendation-rule-preinstall: "true" +spec: + runInterval: 24h # 每24h运行一次 + resourceSelectors: # 资源的信息 + - kind: PersistentVolume + apiVersion: v1 + namespaceSelector: + any: true # 扫描所有namespace + recommenders: + - name: Volume \ No newline at end of file diff --git a/examples/analytics/recommendation-configuration.yaml b/examples/analytics/recommendation-configuration.yaml index f4cc9a464..6f331f353 100644 --- a/examples/analytics/recommendation-configuration.yaml +++ b/examples/analytics/recommendation-configuration.yaml @@ -23,7 +23,7 @@ recommenders: apiVersion: apps/v1 - kind: StatefulSet apiVersion: apps/v1 - - name: Volumes + - name: Volume acceptedResources: - kind: PersistentVolume apiVersion: v1 diff --git a/pkg/recommendation/manager.go b/pkg/recommendation/manager.go index 2bcf01107..a2bb3e7c4 100644 --- a/pkg/recommendation/manager.go +++ b/pkg/recommendation/manager.go @@ -17,6 +17,7 @@ import ( _ "github.com/gocrane/crane/pkg/recommendation/recommender/replicas" _ "github.com/gocrane/crane/pkg/recommendation/recommender/resource" _ "github.com/gocrane/crane/pkg/recommendation/recommender/service" + _ "github.com/gocrane/crane/pkg/recommendation/recommender/volume" ) type RecommenderManager interface { diff --git a/pkg/recommendation/recommender/const.go b/pkg/recommendation/recommender/const.go index 98c17ad4f..169588edd 100644 --- a/pkg/recommendation/recommender/const.go +++ b/pkg/recommendation/recommender/const.go @@ -13,8 +13,8 @@ const ( // IdleNodeRecommender name IdleNodeRecommender string = "IdleNode" - // VolumesRecommender name - VolumesRecommender string = "Volumes" + // VolumeRecommender name + VolumeRecommender string = "Volume" // ServiceRecommender name ServiceRecommender string = "Service" diff --git a/pkg/recommendation/recommender/volumes/filter.go b/pkg/recommendation/recommender/volume/filter.go similarity index 86% rename from pkg/recommendation/recommender/volumes/filter.go rename to pkg/recommendation/recommender/volume/filter.go index 739a9e0e9..7eebbbfa8 100644 --- a/pkg/recommendation/recommender/volumes/filter.go +++ b/pkg/recommendation/recommender/volume/filter.go @@ -1,4 +1,4 @@ -package volumes +package volume import ( corev1 "k8s.io/api/core/v1" @@ -8,7 +8,7 @@ import ( ) // Filter out k8s resources that are not supported by the recommender. -func (vr *VolumesRecommender) Filter(ctx *framework.RecommendationContext) error { +func (vr *VolumeRecommender) Filter(ctx *framework.RecommendationContext) error { var err error // filter resource that not match objectIdentity diff --git a/pkg/recommendation/recommender/volumes/observe.go b/pkg/recommendation/recommender/volume/observe.go similarity index 54% rename from pkg/recommendation/recommender/volumes/observe.go rename to pkg/recommendation/recommender/volume/observe.go index 9de0777ac..c2cb3e513 100644 --- a/pkg/recommendation/recommender/volumes/observe.go +++ b/pkg/recommendation/recommender/volume/observe.go @@ -1,10 +1,10 @@ -package volumes +package volume import ( "github.com/gocrane/crane/pkg/recommendation/framework" ) // Observe enhance the observability. -func (vr *VolumesRecommender) Observe(ctx *framework.RecommendationContext) error { +func (vr *VolumeRecommender) Observe(ctx *framework.RecommendationContext) error { return nil } diff --git a/pkg/recommendation/recommender/volumes/prepare.go b/pkg/recommendation/recommender/volume/prepare.go similarity index 51% rename from pkg/recommendation/recommender/volumes/prepare.go rename to pkg/recommendation/recommender/volume/prepare.go index b434c7af4..67172ff93 100644 --- a/pkg/recommendation/recommender/volumes/prepare.go +++ b/pkg/recommendation/recommender/volume/prepare.go @@ -1,11 +1,11 @@ -package volumes +package volume import ( "github.com/gocrane/crane/pkg/recommendation/framework" ) // CheckDataProviders in PrePrepare phase, will create data source provider via your recommendation config. -func (vr *VolumesRecommender) CheckDataProviders(ctx *framework.RecommendationContext) error { +func (vr *VolumeRecommender) CheckDataProviders(ctx *framework.RecommendationContext) error { if err := vr.BaseRecommender.CheckDataProviders(ctx); err != nil { return err } @@ -13,10 +13,10 @@ func (vr *VolumesRecommender) CheckDataProviders(ctx *framework.RecommendationCo return nil } -func (vr *VolumesRecommender) CollectData(ctx *framework.RecommendationContext) error { +func (vr *VolumeRecommender) CollectData(ctx *framework.RecommendationContext) error { return nil } -func (vr *VolumesRecommender) PostProcessing(ctx *framework.RecommendationContext) error { +func (vr *VolumeRecommender) PostProcessing(ctx *framework.RecommendationContext) error { return nil } diff --git a/pkg/recommendation/recommender/volumes/recommend.go b/pkg/recommendation/recommender/volume/recommend.go similarity index 76% rename from pkg/recommendation/recommender/volumes/recommend.go rename to pkg/recommendation/recommender/volume/recommend.go index 7ed5fc0e0..d9003b655 100644 --- a/pkg/recommendation/recommender/volumes/recommend.go +++ b/pkg/recommendation/recommender/volume/recommend.go @@ -1,4 +1,4 @@ -package volumes +package volume import ( "fmt" @@ -8,11 +8,11 @@ import ( "github.com/gocrane/crane/pkg/recommendation/framework" ) -func (vr *VolumesRecommender) PreRecommend(ctx *framework.RecommendationContext) error { +func (vr *VolumeRecommender) PreRecommend(ctx *framework.RecommendationContext) error { return nil } -func (vr *VolumesRecommender) Recommend(ctx *framework.RecommendationContext) error { +func (vr *VolumeRecommender) Recommend(ctx *framework.RecommendationContext) error { // Check if each volume is being used by any pods isOrphanVolume := true var pv corev1.PersistentVolume @@ -38,6 +38,6 @@ func (vr *VolumesRecommender) Recommend(ctx *framework.RecommendationContext) er } // Policy add some logic for result of recommend phase. -func (vr *VolumesRecommender) Policy(ctx *framework.RecommendationContext) error { +func (vr *VolumeRecommender) Policy(ctx *framework.RecommendationContext) error { return nil } diff --git a/pkg/recommendation/recommender/volume/registry.go b/pkg/recommendation/recommender/volume/registry.go new file mode 100644 index 000000000..af317a20e --- /dev/null +++ b/pkg/recommendation/recommender/volume/registry.go @@ -0,0 +1,32 @@ +package volume + +import ( + analysisv1alph1 "github.com/gocrane/api/analysis/v1alpha1" + + "github.com/gocrane/crane/pkg/recommendation/config" + "github.com/gocrane/crane/pkg/recommendation/recommender" + "github.com/gocrane/crane/pkg/recommendation/recommender/apis" + "github.com/gocrane/crane/pkg/recommendation/recommender/base" +) + +var _ recommender.Recommender = &VolumeRecommender{} + +type VolumeRecommender struct { + base.BaseRecommender +} + +func init() { + recommender.RegisterRecommenderProvider(recommender.VolumeRecommender, NewVolumeRecommender) +} + +func (vr *VolumeRecommender) Name() string { + return recommender.VolumeRecommender +} + +// NewVolumeRecommender create a new Volumes recommender. +func NewVolumeRecommender(recommender apis.Recommender, recommendationRule analysisv1alph1.RecommendationRule) (recommender.Recommender, error) { + recommender = config.MergeRecommenderConfigFromRule(recommender, recommendationRule) + return &VolumeRecommender{ + *base.NewBaseRecommender(recommender), + }, nil +} diff --git a/pkg/recommendation/recommender/volumes/registry.go b/pkg/recommendation/recommender/volumes/registry.go deleted file mode 100644 index eceefd7a4..000000000 --- a/pkg/recommendation/recommender/volumes/registry.go +++ /dev/null @@ -1,31 +0,0 @@ -package volumes - -import ( - analysisv1alph1 "github.com/gocrane/api/analysis/v1alpha1" - "github.com/gocrane/crane/pkg/recommendation/config" - "github.com/gocrane/crane/pkg/recommendation/recommender" - "github.com/gocrane/crane/pkg/recommendation/recommender/apis" - "github.com/gocrane/crane/pkg/recommendation/recommender/base" -) - -var _ recommender.Recommender = &VolumesRecommender{} - -type VolumesRecommender struct { - base.BaseRecommender -} - -func init() { - recommender.RegisterRecommenderProvider(recommender.VolumesRecommender, NewVolumesRecommender) -} - -func (vr *VolumesRecommender) Name() string { - return recommender.VolumesRecommender -} - -// NewVolumesRecommender create a new Volumes recommender. -func NewVolumesRecommender(recommender apis.Recommender, recommendationRule analysisv1alph1.RecommendationRule) (recommender.Recommender, error) { - recommender = config.MergeRecommenderConfigFromRule(recommender, recommendationRule) - return &VolumesRecommender{ - *base.NewBaseRecommender(recommender), - }, nil -} diff --git a/pkg/server/handler/clusters/cluster.go b/pkg/server/handler/clusters/cluster.go index 2de7ae9a3..bec2fe002 100644 --- a/pkg/server/handler/clusters/cluster.go +++ b/pkg/server/handler/clusters/cluster.go @@ -81,6 +81,25 @@ spec: - name: Service ` +const RecommendationRulePVsName = "persistentvolumes-rule" +const RecommendationRulePVsYAML = ` +apiVersion: analysis.crane.io/v1alpha1 +kind: RecommendationRule +metadata: + name: persistentvolumes-rule + labels: + analysis.crane.io/recommendation-rule-preinstall: "true" +spec: + runInterval: 24h # 每24h运行一次 + resourceSelectors: # 资源的信息 + - kind: PersistentVolume + apiVersion: v1 + namespaceSelector: + any: true # 扫描所有namespace + recommenders: + - name: Volume +` + type AddClustersRequest struct { Clusters []*store.Cluster `json:"clusters"` } @@ -195,6 +214,12 @@ func (ch *ClusterHandler) AddClusters(c *gin.Context) { ginwrapper.WriteResponse(c, err, nil) return } + + err = ch.upsertRecommendationRule(RecommendationRulePVsName, RecommendationRulePVsYAML) + if err != nil { + ginwrapper.WriteResponse(c, err, nil) + return + } } else if err != nil { ginwrapper.WriteResponse(c, err, nil) return diff --git a/site/.DS_Store b/site/.DS_Store new file mode 100644 index 000000000..272808f68 Binary files /dev/null and b/site/.DS_Store differ diff --git a/site/content/en/docs/Tutorials/Recommendation/pv-recommendation.md b/site/content/en/docs/Tutorials/Recommendation/pv-recommendation.md new file mode 100644 index 000000000..6d6f3f64b --- /dev/null +++ b/site/content/en/docs/Tutorials/Recommendation/pv-recommendation.md @@ -0,0 +1,73 @@ +--- +title: "PV Recommendation" +description: "Introduce for PV Recommendation" +weight: 17 +--- + +PV 推荐通过扫描集群中 PV 的运行状况,帮助用户找到闲置的 Kubernetes PV。 + +## 动机 + +通常在 Kubernetes 中我们会使用 PV + Workload 来自动创建和管理存储卷并将存储卷挂载到应用上,在日常的运营中难免会出现空闲或者空跑的存储卷,浪费了大量成本, PV 推荐尝试帮助用户找到这部分 PV 来实现成本优化。 + +## 推荐示例 + +```yaml +apiVersion: analysis.crane.io/v1alpha1 +kind: Recommendation +metadata: + annotations: + analysis.crane.io/last-start-time: "2023-06-14 08:55:25" + analysis.crane.io/message: Success + analysis.crane.io/run-number: "653" + labels: + analysis.crane.io/recommendation-rule-name: persistentvolumes-rule + analysis.crane.io/recommendation-rule-recommender: Volume + analysis.crane.io/recommendation-rule-uid: 39d30abe-4c7f-4e65-b961-b00ec7776b45 + analysis.crane.io/recommendation-target-kind: PersistentVolume + analysis.crane.io/recommendation-target-name: pvc-6ce24277-24e9-4fcf-8e8a-f9bdb5694134 + analysis.crane.io/recommendation-target-namespace: "" + analysis.crane.io/recommendation-target-version: v1 + name: persistentvolumes-rule-volume-5r9zn + namespace: crane-system + ownerReferences: + - apiVersion: analysis.crane.io/v1alpha1 + blockOwnerDeletion: false + controller: false + kind: RecommendationRule + name: persistentvolumes-rule + uid: 39d30abe-4c7f-4e65-b961-b00ec7776b45 +spec: + adoptionType: StatusAndAnnotation + completionStrategy: + completionStrategyType: Once + targetRef: + apiVersion: v1 + kind: PersistentVolume + name: pvc-6ce24277-24e9-4fcf-8e8a-f9bdb5694134 + type: Volume +status: + action: Delete + description: It is an Orphan Volumes + lastUpdateTime: "2023-06-14T08:55:25Z" +``` + +在该示例中: + +- 推荐的 TargetRef 指向了 PV: pvc-6ce24277-24e9-4fcf-8e8a-f9bdb5694134 +- 推荐类型为 PV 推荐 +- action 是 Delete,这里只是给出建议 + +## 实现原理 + +PV 推荐按以下步骤完成一次推荐过程: + +1. 扫描集群中所有 PV,找到 PV 对应的 Pod 列表 +2. 如果 PV 没有对应的 PVC,则判断为闲置 PV +3. 如果没有 Pod 关联这个 PV 和 PVC,则判断为闲置 PVC + +## 参数配置 + +目前 PV 推荐没有参数配置。 + +如何更新推荐的配置请参考:[**推荐框架**](/zh-cn/docs/tutorials/recommendation/recommendation-framework) \ No newline at end of file diff --git a/site/content/en/docs/Tutorials/Recommendation/recommendation-framework.md b/site/content/en/docs/Tutorials/Recommendation/recommendation-framework.md index eb5f80dcc..ed6203bd2 100644 --- a/site/content/en/docs/Tutorials/Recommendation/recommendation-framework.md +++ b/site/content/en/docs/Tutorials/Recommendation/recommendation-framework.md @@ -128,6 +128,8 @@ Currently, Crane support these Recommenders: - [**Replicas Recommendation**](/docs/tutorials/recommendation/replicas-recommendation): Use the HPA algorithm to analyze the actual usage of applications and recommend more appropriate replicas configurations. - [**HPA Recommendation**](/docs/tutorials/recommendation/hpa-recommendation): Scan the Workload in a cluster and recommend HPA configurations for Workload that are suitable for horizontal autoscaling - [**IdleNode Recommendation**](/docs/tutorials/recommendation/idlenode-recommendation): Find the idle nodes in cluster +- [**Service Recommendation**](/zh-cn/docs/tutorials/recommendation/service-recommendation): Find the idle load balancer service in cluster +- [**PV Recommendation**](/zh-cn/docs/tutorials/recommendation/pv-recommendation): Find the idle persist volume in cluster ### Recommender Framework diff --git a/site/content/en/docs/Tutorials/Recommendation/service-recommendation.md b/site/content/en/docs/Tutorials/Recommendation/service-recommendation.md new file mode 100644 index 000000000..4c266e138 --- /dev/null +++ b/site/content/en/docs/Tutorials/Recommendation/service-recommendation.md @@ -0,0 +1,97 @@ +--- +title: "Service Recommendation" +description: "Introduce for Service Recommendation" +weight: 16 +--- + +Service 推荐通过扫描集群中 Service 的运行状况,帮助用户找到闲置的 Kubernetes Service。 + +## 动机 + +通常在 Kubernetes 中我们会使用 Service + Workload 来自动创建和管理负载均衡并将负载均衡挂载到应用上,在日常的运营中难免会出现空闲和低利用率的负载均衡,浪费了大量成本,Service 推荐尝试帮助用户找到这部分 Service 来实现成本优化。 + +## 推荐示例 + +```yaml +apiVersion: analysis.crane.io/v1alpha1 +kind: Recommendation +metadata: + annotations: + analysis.crane.io/last-start-time: "2023-06-12 11:52:23" + analysis.crane.io/message: Success + analysis.crane.io/run-number: "7823" + creationTimestamp: "2023-06-12T09:44:23Z" + labels: + analysis.crane.io/recommendation-rule-name: service-rule + analysis.crane.io/recommendation-rule-recommender: Service + analysis.crane.io/recommendation-rule-uid: 67807cd9-b4c9-4d63-8493-d330ccace364 + analysis.crane.io/recommendation-target-kind: Service + analysis.crane.io/recommendation-target-name: nginx + analysis.crane.io/recommendation-target-namespace: crane-system + analysis.crane.io/recommendation-target-version: v1 + name: service-rule-service-cnwt5 + namespace: crane-system + ownerReferences: + - apiVersion: analysis.crane.io/v1alpha1 + blockOwnerDeletion: false + controller: false + kind: RecommendationRule + name: service-rule + uid: 67807cd9-b4c9-4d63-8493-d330ccace364 +spec: + adoptionType: StatusAndAnnotation + completionStrategy: + completionStrategyType: Once + targetRef: + apiVersion: v1 + kind: Service + name: nginx + namespace: crane-system + type: Service +status: + action: Delete + description: It is a Orphan Service, Pod count is 0 + lastUpdateTime: "2023-06-12T11:52:23Z" +``` + +在该示例中: + +- 推荐的 TargetRef 指向了 Service:nginx +- 推荐类型为 Service 推荐 +- action 是 Delete,这里只是给出建议 + +## 实现原理 + +Service 推荐按以下步骤完成一次推荐过程: + +1. 扫描集群中所有 LoadBalancer 类型的 Service +2. 如果 Service 对应的 endpoints 中有 Address 或者 NotReadyAddresses,则不是限制的 Service +3. 依据 Service 推荐中流量相关 metric 检测 Service 是否小于阈值水位,如果小于水位则判定为闲置节点 + +## 如何验证推荐结果的准确性 + +以下是判断节点资源阈值水位的 Prom query,验证时把 node 替换成实际的节点名 + +```go +// Container network cumulative count of bytes received +queryFmtNetReceiveBytes = `sum(rate(container_network_receive_bytes_total{namespace="%s",pod=~"%s",container!=""}[3m]))` +// Container network cumulative count of bytes transmitted +queryFmtNetTransferBytes = `sum(rate(container_network_transmit_bytes_total{namespace="%s",pod=~"%s",container!=""}[3m]))` +``` + +## 支持的资源类型 + +只支持 Service 类型,目前只会对 LoadBalancer 类型的 Service 进行分析。 + +## 参数配置 + +| 配置项 | 默认值 | 描述 | +|----------|-----|---------------------------------| +| net-receive-bytes | 0 | Service 对应 Pods 接受到的网络请求 bytes,默认不检查 | +| net-receive-percentile | 0.99 | 计算接受到的网络请求时的 Percentile | +| net-transfer-bytes | 0 | Service 对应 Pods 传输的网络请求 bytes,默认不检查 | +| net-transfer-percentile | 0.99 | 计算传输的网络请求时的 Percentile | + +注意,当 pod 配置了 liveness/readness probe 后,kubelet 的探测会带来一定的容器流量,因此流量的阈值需要设置的稍微大一些,可结合具体监控数据配置。 + +如何更新推荐的配置请参考:[**推荐框架**](/zh-cn/docs/tutorials/recommendation/recommendation-framework) \ No newline at end of file diff --git a/site/content/zh/docs/Tutorials/Recommendation/idlenode-recommendation.md b/site/content/zh/docs/Tutorials/Recommendation/idlenode-recommendation.md index c057b7568..144885b89 100644 --- a/site/content/zh/docs/Tutorials/Recommendation/idlenode-recommendation.md +++ b/site/content/zh/docs/Tutorials/Recommendation/idlenode-recommendation.md @@ -13,39 +13,46 @@ weight: 15 ## 推荐示例 ```yaml -kind: Recommendation apiVersion: analysis.crane.io/v1alpha1 +kind: Recommendation metadata: - name: idlenodes-rule-idlenode-5jxn9 - namespace: crane-system + annotations: + analysis.crane.io/last-start-time: "2023-06-09 09:46:33" + analysis.crane.io/message: Success + analysis.crane.io/run-number: "111" + creationTimestamp: "2023-05-31T11:06:10Z" + generateName: idlenodes-rule-idlenode- + generation: 111 labels: analysis.crane.io/recommendation-rule-name: idlenodes-rule analysis.crane.io/recommendation-rule-recommender: IdleNode - analysis.crane.io/recommendation-rule-uid: 8921a198-7082-11ed-8b7b-246e960a8d8c + analysis.crane.io/recommendation-rule-uid: 25bf5a49-e78f-4f42-8e67-36c0b1b9bb5b analysis.crane.io/recommendation-target-kind: Node analysis.crane.io/recommendation-target-name: worker-node-1 + analysis.crane.io/recommendation-target-namespace: "" analysis.crane.io/recommendation-target-version: v1 - beta.kubernetes.io/arch: amd64 - beta.kubernetes.io/instance-type: bareMetal - beta.kubernetes.io/os: linux + name: idlenodes-rule-idlenode-px2ck + namespace: crane-system ownerReferences: - apiVersion: analysis.crane.io/v1alpha1 + blockOwnerDeletion: false + controller: false kind: RecommendationRule name: idlenodes-rule - uid: 8921a198-7082-11ed-8b7b-246e960a8d8c - controller: false - blockOwnerDeletion: false + uid: 25bf5a49-e78f-4f42-8e67-36c0b1b9bb5b spec: + adoptionType: StatusAndAnnotation + completionStrategy: + completionStrategyType: Once targetRef: + apiVersion: v1 kind: Node name: worker-node-1 - apiVersion: v1 type: IdleNode - completionStrategy: {} status: - targetRef: {} action: Delete - lastUpdateTime: '2022-11-30T07:46:57Z' + description: Node is owned by DaemonSet + lastUpdateTime: "2023-06-09T09:46:33Z" ``` 在该示例中: @@ -60,4 +67,36 @@ status: 1. 扫描集群中所有节点和节点上的 Pod 2. 如果节点上所有 Pod 都属于 DaemonSet,则判定为闲置节点 +3. 依据 IdleNode 的其他配置检测节点是否小于阈值水位,如果小于水位则判定为闲置节点 + +## 如何验证推荐结果的准确性 + +以下是判断节点资源阈值水位的 Prom query,验证时把 node 替换成实际的节点名 + +```go + // NodeCpuRequestUtilizationExprTemplate is used to query node cpu request utilization by promql, param is node name, node name which prometheus scrape +NodeCpuRequestUtilizationExprTemplate = `sum(kube_pod_container_resource_requests{node="%s", resource="cpu", unit="core"} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="cpu", unit="core"} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) ` +// NodeMemRequestUtilizationExprTemplate is used to query node memory request utilization by promql, param is node name, node name which prometheus scrape +NodeMemRequestUtilizationExprTemplate = `sum(kube_pod_container_resource_requests{node="%s", resource="memory", unit="byte", namespace!=""} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="memory", unit="byte"} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) ` +// NodeCpuUsageUtilizationExprTemplate is used to query node memory usage utilization by promql, param is node name, node name which prometheus scrape +NodeCpuUsageUtilizationExprTemplate = `sum(label_replace(irate(container_cpu_usage_seconds_total{instance="%s", container!="POD", container!="",image!=""}[1h]), "node", "$1", "instance", "(^[^:]+)") * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="cpu", unit="core"} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) ` +// NodeMemUsageUtilizationExprTemplate is used to query node memory usage utilization by promql, param is node name, node name which prometheus scrape +NodeMemUsageUtilizationExprTemplate = `sum(label_replace(container_memory_usage_bytes{instance="%s", namespace!="",container!="POD", container!="",image!=""}, "node", "$1", "instance", "(^[^:]+)") * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) / sum(kube_node_status_capacity{node="%s", resource="memory", unit="byte"} * on (node) group_left() max(kube_node_labels{label_beta_kubernetes_io_instance_type!~"eklet", label_node_kubernetes_io_instance_type!~"eklet"}) by (node)) by (node) ` +``` + +## 支持的资源类型 + +只支持 Node,由于 Node 是 Cluster Scope 资源,因此 IdleNode 类型的 Recommendation 均在 crane-system namespace。 + +## 参数配置 + +| 配置项 | 默认值 | 描述 | +|----------|------|------------------------------------------| +| cpu-request-utilization | 0 | 高于该值利用率的节点不是闲置节点,0.5代表50%,默认不检查 | +| cpu-usage-utilization | 0 | 高于该值 request 使用率的节点不是闲置节点,0.5代表50%,默认不检查 | +| cpu-percentile | 0.99 | 计算 cpu 负载时的 Percentile | +| memory-request-utilization | 0 | 高于该值利用率的节点不是闲置节点,0.5代表50%,默认不检查 | +| memory-usage-utilization | 0 | 高于该值 request 使用率的节点不是闲置节点,0.5代表50%,默认不检查 | +| memory-percentile | 0.99 | 计算 memory 负载时的 Percentile | +如何更新推荐的配置请参考:[**推荐框架**](/zh-cn/docs/tutorials/recommendation/recommendation-framework) \ No newline at end of file diff --git a/site/content/zh/docs/Tutorials/Recommendation/pv-recommendation.md b/site/content/zh/docs/Tutorials/Recommendation/pv-recommendation.md new file mode 100644 index 000000000..f090994ae --- /dev/null +++ b/site/content/zh/docs/Tutorials/Recommendation/pv-recommendation.md @@ -0,0 +1,73 @@ +--- +title: "PV 推荐" +description: "PV 推荐功能介绍" +weight: 17 +--- + +PV 推荐通过扫描集群中 PV 的运行状况,帮助用户找到闲置的 Kubernetes PV。 + +## 动机 + +通常在 Kubernetes 中我们会使用 PV + Workload 来自动创建和管理存储卷并将存储卷挂载到应用上,在日常的运营中难免会出现空闲或者空跑的存储卷,浪费了大量成本, PV 推荐尝试帮助用户找到这部分 PV 来实现成本优化。 + +## 推荐示例 + +```yaml +apiVersion: analysis.crane.io/v1alpha1 +kind: Recommendation +metadata: + annotations: + analysis.crane.io/last-start-time: "2023-06-14 08:55:25" + analysis.crane.io/message: Success + analysis.crane.io/run-number: "653" + labels: + analysis.crane.io/recommendation-rule-name: persistentvolumes-rule + analysis.crane.io/recommendation-rule-recommender: Volume + analysis.crane.io/recommendation-rule-uid: 39d30abe-4c7f-4e65-b961-b00ec7776b45 + analysis.crane.io/recommendation-target-kind: PersistentVolume + analysis.crane.io/recommendation-target-name: pvc-6ce24277-24e9-4fcf-8e8a-f9bdb5694134 + analysis.crane.io/recommendation-target-namespace: "" + analysis.crane.io/recommendation-target-version: v1 + name: persistentvolumes-rule-volume-5r9zn + namespace: crane-system + ownerReferences: + - apiVersion: analysis.crane.io/v1alpha1 + blockOwnerDeletion: false + controller: false + kind: RecommendationRule + name: persistentvolumes-rule + uid: 39d30abe-4c7f-4e65-b961-b00ec7776b45 +spec: + adoptionType: StatusAndAnnotation + completionStrategy: + completionStrategyType: Once + targetRef: + apiVersion: v1 + kind: PersistentVolume + name: pvc-6ce24277-24e9-4fcf-8e8a-f9bdb5694134 + type: Volume +status: + action: Delete + description: It is an Orphan Volumes + lastUpdateTime: "2023-06-14T08:55:25Z" +``` + +在该示例中: + +- 推荐的 TargetRef 指向了 PV: pvc-6ce24277-24e9-4fcf-8e8a-f9bdb5694134 +- 推荐类型为 PV 推荐 +- action 是 Delete,这里只是给出建议 + +## 实现原理 + +PV 推荐按以下步骤完成一次推荐过程: + +1. 扫描集群中所有 PV,找到 PV 对应的 Pod 列表 +2. 如果 PV 没有对应的 PVC,则判断为闲置 PV +3. 如果没有 Pod 关联这个 PV 和 PVC,则判断为闲置 PVC + +## 参数配置 + +目前 PV 推荐没有参数配置。 + +如何更新推荐的配置请参考:[**推荐框架**](/zh-cn/docs/tutorials/recommendation/recommendation-framework) \ No newline at end of file diff --git a/site/content/zh/docs/Tutorials/Recommendation/recommendation-framework.md b/site/content/zh/docs/Tutorials/Recommendation/recommendation-framework.md index c1f257e89..312f8f671 100644 --- a/site/content/zh/docs/Tutorials/Recommendation/recommendation-framework.md +++ b/site/content/zh/docs/Tutorials/Recommendation/recommendation-framework.md @@ -128,6 +128,8 @@ patchData=`kubectl get recommend workloads-rule-replicas-rckvb -n default -o jso - [**副本数推荐**](/zh-cn/docs/tutorials/recommendation/replicas-recommendation): 通过 HPA 算法分析应用的真实用量推荐更合适的副本数量 - [**HPA 推荐**](/zh-cn/docs/tutorials/recommendation/hpa-recommendation): 扫描集群中的 Workload,针对适合适合水平弹性的 Workload 推荐 HPA 配置 - [**闲置节点推荐**](/zh-cn/docs/tutorials/recommendation/idlenode-recommendation): 扫描集群中的闲置节点 +- [**Service 推荐**](/zh-cn/docs/tutorials/recommendation/service-recommendation): 扫描集群中的闲置 Service +- [**PV 推荐**](/zh-cn/docs/tutorials/recommendation/pv-recommendation): 扫描集群中的闲置 PV ### Recommender 框架 diff --git a/site/content/zh/docs/Tutorials/Recommendation/service-recommendation.md b/site/content/zh/docs/Tutorials/Recommendation/service-recommendation.md new file mode 100644 index 000000000..99c66c4bd --- /dev/null +++ b/site/content/zh/docs/Tutorials/Recommendation/service-recommendation.md @@ -0,0 +1,97 @@ +--- +title: "Service 推荐" +description: "Service 推荐功能介绍" +weight: 16 +--- + +Service 推荐通过扫描集群中 Service 的运行状况,帮助用户找到闲置的 Kubernetes Service。 + +## 动机 + +通常在 Kubernetes 中我们会使用 Service + Workload 来自动创建和管理负载均衡并将负载均衡挂载到应用上,在日常的运营中难免会出现空闲和低利用率的负载均衡,浪费了大量成本,Service 推荐尝试帮助用户找到这部分 Service 来实现成本优化。 + +## 推荐示例 + +```yaml +apiVersion: analysis.crane.io/v1alpha1 +kind: Recommendation +metadata: + annotations: + analysis.crane.io/last-start-time: "2023-06-12 11:52:23" + analysis.crane.io/message: Success + analysis.crane.io/run-number: "7823" + creationTimestamp: "2023-06-12T09:44:23Z" + labels: + analysis.crane.io/recommendation-rule-name: service-rule + analysis.crane.io/recommendation-rule-recommender: Service + analysis.crane.io/recommendation-rule-uid: 67807cd9-b4c9-4d63-8493-d330ccace364 + analysis.crane.io/recommendation-target-kind: Service + analysis.crane.io/recommendation-target-name: nginx + analysis.crane.io/recommendation-target-namespace: crane-system + analysis.crane.io/recommendation-target-version: v1 + name: service-rule-service-cnwt5 + namespace: crane-system + ownerReferences: + - apiVersion: analysis.crane.io/v1alpha1 + blockOwnerDeletion: false + controller: false + kind: RecommendationRule + name: service-rule + uid: 67807cd9-b4c9-4d63-8493-d330ccace364 +spec: + adoptionType: StatusAndAnnotation + completionStrategy: + completionStrategyType: Once + targetRef: + apiVersion: v1 + kind: Service + name: nginx + namespace: crane-system + type: Service +status: + action: Delete + description: It is a Orphan Service, Pod count is 0 + lastUpdateTime: "2023-06-12T11:52:23Z" +``` + +在该示例中: + +- 推荐的 TargetRef 指向了 Service:nginx +- 推荐类型为 Service 推荐 +- action 是 Delete,这里只是给出建议 + +## 实现原理 + +Service 推荐按以下步骤完成一次推荐过程: + +1. 扫描集群中所有 LoadBalancer 类型的 Service +2. 如果 Service 对应的 endpoints 中有 Address 或者 NotReadyAddresses,则不是限制的 Service +3. 依据 Service 推荐中流量相关 metric 检测 Service 是否小于阈值水位,如果小于水位则判定为闲置节点 + +## 如何验证推荐结果的准确性 + +以下是判断节点资源阈值水位的 Prom query,验证时把 node 替换成实际的节点名 + +```go +// Container network cumulative count of bytes received +queryFmtNetReceiveBytes = `sum(rate(container_network_receive_bytes_total{namespace="%s",pod=~"%s",container!=""}[3m]))` +// Container network cumulative count of bytes transmitted +queryFmtNetTransferBytes = `sum(rate(container_network_transmit_bytes_total{namespace="%s",pod=~"%s",container!=""}[3m]))` +``` + +## 支持的资源类型 + +只支持 Service 类型,目前只会对 LoadBalancer 类型的 Service 进行分析。 + +## 参数配置 + +| 配置项 | 默认值 | 描述 | +|----------|-----|---------------------------------| +| net-receive-bytes | 0 | Service 对应 Pods 接受到的网络请求 bytes,默认不检查 | +| net-receive-percentile | 0.99 | 计算接受到的网络请求时的 Percentile | +| net-transfer-bytes | 0 | Service 对应 Pods 传输的网络请求 bytes,默认不检查 | +| net-transfer-percentile | 0.99 | 计算传输的网络请求时的 Percentile | + +注意,当 pod 配置了 liveness/readness probe 后,kubelet 的探测会带来一定的容器流量,因此流量的阈值需要设置的稍微大一些,可结合具体监控数据配置。 + +如何更新推荐的配置请参考:[**推荐框架**](/zh-cn/docs/tutorials/recommendation/recommendation-framework) \ No newline at end of file diff --git a/site/training.zip b/site/training.zip new file mode 100644 index 000000000..0628917cc Binary files /dev/null and b/site/training.zip differ