Skip to content

Commit

Permalink
Execute the jsonnet
Browse files Browse the repository at this point in the history
Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>
  • Loading branch information
codesome committed Dec 19, 2024
1 parent 256c308 commit c777bbf
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 7 deletions.
24 changes: 21 additions & 3 deletions operations/mimir-mixin-compiled-baremetal/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1155,18 +1155,36 @@ groups:
runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirblockbuildernocycleprocessing
expr: |
max by(cluster, namespace, instance) (histogram_count(increase(cortex_blockbuilder_consume_cycle_duration_seconds[60m]))) == 0
for: 5m
for: 10m
labels:
severity: warning
- alert: MimirBlockBuilderNoCycleProcessing
annotations:
message: Mimir {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} has not processed cycles in the past hour.
runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirblockbuildernocycleprocessing
expr: |
max by(cluster, namespace, instance) (histogram_count(increase(cortex_blockbuilder_consume_cycle_duration_seconds[60m]))) == 0
for: 20m
labels:
severity: critical
- alert: MimirBlockBuilderLagging
annotations:
message: Mimir {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} reports partition lag of {{ printf "%.2f" $value }}%.
message: Mimir {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} reports partition lag of {{ printf "%.2f" $value }}.
runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirblockbuilderlagging
expr: |
max by(cluster, namespace, instance) (max_over_time(cortex_blockbuilder_consumer_lag_records[10m])) > 4e6
for: 75m
labels:
severity: warning
- alert: MimirBlockBuilderLagging
annotations:
message: Mimir {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} reports partition lag of {{ printf "%.2f" $value }}.
runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirblockbuilderlagging
expr: |
max by(cluster, namespace, instance) (max_over_time(cortex_blockbuilder_consumer_lag_records[10m])) > 4e6
for: 140m
labels:
severity: critical
- alert: MimirBlockBuilderCompactAndUploadFailed
annotations:
message: Mimir {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} fails to compact and upload blocks.
Expand All @@ -1175,7 +1193,7 @@ groups:
sum by (cluster, namespace, instance) (rate(cortex_blockbuilder_tsdb_compact_and_upload_failed_total[1m])) > 0
for: 5m
labels:
severity: warning
severity: critical
- name: mimir_continuous_test
rules:
- alert: MimirContinuousTestNotRunningOnWrites
Expand Down
24 changes: 21 additions & 3 deletions operations/mimir-mixin-compiled/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1169,18 +1169,36 @@ groups:
runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirblockbuildernocycleprocessing
expr: |
max by(cluster, namespace, pod) (histogram_count(increase(cortex_blockbuilder_consume_cycle_duration_seconds[60m]))) == 0
for: 5m
for: 10m
labels:
severity: warning
- alert: MimirBlockBuilderNoCycleProcessing
annotations:
message: Mimir {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} has not processed cycles in the past hour.
runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirblockbuildernocycleprocessing
expr: |
max by(cluster, namespace, pod) (histogram_count(increase(cortex_blockbuilder_consume_cycle_duration_seconds[60m]))) == 0
for: 20m
labels:
severity: critical
- alert: MimirBlockBuilderLagging
annotations:
message: Mimir {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} reports partition lag of {{ printf "%.2f" $value }}%.
message: Mimir {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} reports partition lag of {{ printf "%.2f" $value }}.
runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirblockbuilderlagging
expr: |
max by(cluster, namespace, pod) (max_over_time(cortex_blockbuilder_consumer_lag_records[10m])) > 4e6
for: 75m
labels:
severity: warning
- alert: MimirBlockBuilderLagging
annotations:
message: Mimir {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} reports partition lag of {{ printf "%.2f" $value }}.
runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimirblockbuilderlagging
expr: |
max by(cluster, namespace, pod) (max_over_time(cortex_blockbuilder_consumer_lag_records[10m])) > 4e6
for: 140m
labels:
severity: critical
- alert: MimirBlockBuilderCompactAndUploadFailed
annotations:
message: Mimir {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} fails to compact and upload blocks.
Expand All @@ -1189,7 +1207,7 @@ groups:
sum by (cluster, namespace, pod) (rate(cortex_blockbuilder_tsdb_compact_and_upload_failed_total[1m])) > 0
for: 5m
labels:
severity: warning
severity: critical
- name: mimir_continuous_test
rules:
- alert: MimirContinuousTestNotRunningOnWrites
Expand Down
2 changes: 1 addition & 1 deletion operations/mimir-mixin/alerts/ingest-storage.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@
},
{
alert: $.alertName('BlockBuilderLagging'),
'for': '140m', // 2h20m. Indicating the lag did not come down for ~2 consumption cycles.
'for': '140m', // 2h20m. Indicating the lag did not come down for ~2 consumption cycles.
expr: |||
max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (max_over_time(cortex_blockbuilder_consumer_lag_records[10m])) > 4e6
||| % $._config,
Expand Down

0 comments on commit c777bbf

Please sign in to comment.