From ed13e1eeab1c55869a8490155a7208e57a7130b8 Mon Sep 17 00:00:00 2001 From: Jose Armesto Date: Mon, 9 Dec 2024 12:22:55 +0100 Subject: [PATCH] Add cluster_id label --- .../kaas/phoenix/alerting-rules/karpenter.rules.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/helm/prometheus-rules/templates/kaas/phoenix/alerting-rules/karpenter.rules.yml b/helm/prometheus-rules/templates/kaas/phoenix/alerting-rules/karpenter.rules.yml index 8a6ac41c..75e9a4cd 100644 --- a/helm/prometheus-rules/templates/kaas/phoenix/alerting-rules/karpenter.rules.yml +++ b/helm/prometheus-rules/templates/kaas/phoenix/alerting-rules/karpenter.rules.yml @@ -13,9 +13,9 @@ spec: - alert: KarpenterCanNotRegisterNewNodes annotations: description: | - Karpenter provisioner {{`{{ $labels.provisioner }}`}} launched new nodes, but some of nodes did not registered in the cluster + Karpenter provisioner {{`{{ $labels.provisioner }}`}} on cluster {{`{{ $labels.cluster_id }}`}} launched new nodes, but some of nodes did not registered in the cluster opsrecipe: karpenter/ - expr: sum by (provisioner) (karpenter_machines_launched) - sum by (provisioner)(karpenter_machines_registered) != 0 + expr: sum by (provisioner, cluster_id, installation, pipeline, provider) (karpenter_machines_launched) - sum by (provisioner, cluster_id, installation, pipeline, provider)(karpenter_machines_registered) != 0 for: 1h labels: area: kaas @@ -27,7 +27,7 @@ spec: - alert: KarpenterProvisionerAlmostFull annotations: description: | - Provisioner {{`{{ $labels.provisioner }}`}} is almost full. + Provisioner {{`{{ $labels.provisioner }}`}} on cluster {{`{{ $labels.cluster_id }}`}} is almost full. opsrecipe: karpenter/ expr: karpenter_provisioner_usage_pct > 90 for: 72h @@ -41,7 +41,7 @@ spec: - alert: KarpenterCloudproviderErrors annotations: description: | - Karpenter is getting errors during API calls to the cloud provider. + Karpenter on cluster {{`{{ $labels.cluster_id }}`}} is getting errors during API calls to the cloud provider. opsrecipe: karpenter/ expr: rate(karpenter_cloudprovider_errors_total{}[5m]) > 0.1 for: 10m