Skip to content

Commit

Permalink
changes to hypershift metrics (#493)
Browse files Browse the repository at this point in the history
* fine tuning some metric

* enabled prom container usage
  • Loading branch information
mukrishn authored Oct 12, 2022
1 parent 60422fd commit 0367931
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions workloads/kube-burner/metrics-profiles/hypershift-metrics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@
metricName: podMemoryReq
instant: true

- query: (sum(irate(container_cpu_usage_seconds_total{openshift_cluster_name=~"${MGMT_CLUSTER_NAME}",name!="",container!="POD",namespace=~"openshift-(etcd|.*apiserver|ovn-kubernetes|sdn|ingress|.*controller-manager|.*scheduler|image-registry)"}[2m]) * 100) by (container, pod, namespace, node, openshift_cluster_name)) > 0
- query: (sum(irate(container_cpu_usage_seconds_total{openshift_cluster_name=~"${MGMT_CLUSTER_NAME}",name!="",container!="POD",namespace=~"openshift-(etcd|.*apiserver|ovn-kubernetes|sdn|ingress|.*controller-manager|.*scheduler|image-registry|monitoring|user-workload-monitoring)"}[2m]) * 100) by (container, pod, namespace, node, openshift_cluster_name)) > 0
metricName: mgmt-containerCPU

- query: sum(container_memory_rss{openshift_cluster_name=~"${MGMT_CLUSTER_NAME}",name!="",container!="POD",namespace=~"openshift-(etcd|.*apiserver|ovn-kubernetes|sdn|ingress|.*controller-manager|.*scheduler|image-registry)"}) by (container, pod, namespace, node, openshift_cluster_name)
- query: sum(container_memory_rss{openshift_cluster_name=~"${MGMT_CLUSTER_NAME}",name!="",container!="POD",namespace=~"openshift-(etcd|.*apiserver|ovn-kubernetes|sdn|ingress|.*controller-manager|.*scheduler|image-registry|monitoring|user-workload-monitoring)"}) by (container, pod, namespace, node, openshift_cluster_name)
metricName: mgmt-containerMemory

# Containers & pod metrics
Expand Down Expand Up @@ -90,17 +90,17 @@

# Node metrics: CPU & Memory

- query: (sum(irate(node_cpu_seconds_total{openshift_cluster_name=~"${HOSTED_CLUSTER_NAME}"}[2m])) by (mode,instance) and on (instance) label_replace(bottomk(1, min_over_time(irate(node_cpu_seconds_total{openshift_cluster_name=~"${HOSTED_CLUSTER_NAME}",mode=~"idle"}[2m])[24h:1m] @ ${Q_TIME})), "instance", "$1", "instance", "(.+)")) > 0
- query: (sum(irate(node_cpu_seconds_total{openshift_cluster_name=~"${HOSTED_CLUSTER_NAME}"}[2m])) by (mode,instance) and on (instance) label_replace(bottomk(1, min_over_time(sum(irate(node_cpu_seconds_total{openshift_cluster_name=~"${HOSTED_CLUSTER_NAME}",mode=~"idle"}[2m])) by (mode,instance)[5m:2m] @ ${Q_TIME})), "instance", "$1", "instance", "(.+)")) > 0
metricName: nodeCPU-Workers

- query: node_memory_MemAvailable_bytes{openshift_cluster_name=~"${HOSTED_CLUSTER_NAME}"} and on (instance) label_replace(kube_node_role{role="worker"}, "instance", "$1", "node", "(.+)")
metricName: nodeMemoryAvailable-Workers

# Management Node metrics: CPU & Memory
- query: (max((sum(irate(node_cpu_seconds_total{openshift_cluster_name=~"${MGMT_CLUSTER_NAME}"}[2m])) by (mode,instance) and on (instance) label_replace(bottomk(1, min_over_time(irate(node_cpu_seconds_total{openshift_cluster_name=~"${MGMT_CLUSTER_NAME}",mode=~"idle",instance!~"${MGMT_WORKER_ONLY_NODES}"}[2m])[24h:1m] @ ${Q_TIME})), "instance", "$1", "instance", "(.+)"))) by (mode, instance)) > 0
- query: (max((sum(irate(node_cpu_seconds_total{openshift_cluster_name=~"${MGMT_CLUSTER_NAME}"}[2m])) by (mode,instance) and on (instance) label_replace(bottomk(1, min_over_time(sum(irate(node_cpu_seconds_total{openshift_cluster_name=~"${MGMT_CLUSTER_NAME}",mode=~"idle",instance!~"${MGMT_WORKER_ONLY_NODES}"}[2m])) by (mode,instance)[5m:2m] @ ${Q_TIME})), "instance", "$1", "instance", "(.+)"))) by (mode, instance)) > 0
metricName: mgmtNodeCPU-AggregatedWorkers

- query: bottomk(1,min_over_time(node_memory_MemAvailable_bytes{openshift_cluster_name=~"${MGMT_CLUSTER_NAME}",instance!~"${MGMT_WORKER_ONLY_NODES}"}[24h:1m] @ ${Q_TIME}))
- query: bottomk(1,min_over_time(node_memory_MemAvailable_bytes{openshift_cluster_name=~"${MGMT_CLUSTER_NAME}",instance!~"${MGMT_WORKER_ONLY_NODES}"}[5m:2m] @ ${Q_TIME}))
metricName: mgmtNodeMemoryAvailable-AggregatedWorkers

- query: (avg(node_memory_MemTotal_bytes{openshift_cluster_name=~"${MGMT_CLUSTER_NAME}",instance!~"${MGMT_WORKER_ONLY_NODES}"}) by (instance))
Expand All @@ -125,10 +125,10 @@
- query: node_memory_MemTotal_bytes{openshift_cluster_name=~"${HOSTED_CLUSTER_NAME}"} and on (instance) label_replace(kube_node_role{role="worker"}, "instance", "$1", "node", "(.+)")
metricName: nodeMemoryTotal-Workers

- query: (max(sum(irate(node_cpu_seconds_total{openshift_cluster_name=~"${HOSTED_CLUSTER_NAME}"}[2m])) by (mode,instance) and on (instance) label_replace(bottomk(1, min_over_time(irate(node_cpu_seconds_total{openshift_cluster_name=~"${HOSTED_CLUSTER_NAME}",mode=~"idle"}[2m])[24h:1m] @ ${Q_TIME})), "instance", "$1", "instance", "(.+)")) by (mode)) > 0
- query: (max(sum(irate(node_cpu_seconds_total{openshift_cluster_name=~"${HOSTED_CLUSTER_NAME}"}[2m])) by (mode,instance) and on (instance) label_replace(bottomk(1, min_over_time(sum(irate(node_cpu_seconds_total{openshift_cluster_name=~"${HOSTED_CLUSTER_NAME}",mode=~"idle"}[2m])) by (mode,instance)[5m:2m] @ ${Q_TIME})), "instance", "$1", "instance", "(.+)")) by (mode)) > 0
metricName: nodeCPU-AggregatedWorkers

- query: bottomk(1,min_over_time(node_memory_MemAvailable_bytes{openshift_cluster_name=~"${HOSTED_CLUSTER_NAME}"}[24h:1m] @ ${Q_TIME}))
- query: bottomk(1,min_over_time(node_memory_MemAvailable_bytes{openshift_cluster_name=~"${HOSTED_CLUSTER_NAME}"}[5m:2m] @ ${Q_TIME}))
metricName: nodeMemoryAvailable-AggregatedWorkers

- query: avg(node_memory_MemTotal_bytes{openshift_cluster_name=~"${HOSTED_CLUSTER_NAME}"} and on (instance) label_replace(kube_node_role{role="worker"}, "instance", "$1", "node", "(.+)"))
Expand Down

0 comments on commit 0367931

Please sign in to comment.