Skip to content

Commit

Permalink
Add dashboard and alerts for Velero backups (#681)
Browse files Browse the repository at this point in the history
  • Loading branch information
mkjpryor authored Oct 25, 2024
1 parent f0a7bab commit 9eda302
Show file tree
Hide file tree
Showing 3 changed files with 1,198 additions and 0 deletions.
28 changes: 28 additions & 0 deletions roles/velero/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,34 @@ velero_release_namespace: velero
velero_release_name: velero
velero_wait_timeout: 10m
velero_release_defaults:
metrics:
enabled: true
serviceMonitor:
enabled: true
prometheusRule:
enabled: true
spec:
- alert: VeleroBackupPartialFailures
annotations:
# Use the unsafe tag to prevent Ansible trying to render it as a template
message: !unsafe >-
Velero schedule '{{ $labels.schedule }}' has partially failed backups in the last 24 hours.
expr: |-
sum(increase(velero_backup_partial_failure_total{schedule!=""}[24h])) by(schedule) > 0
for: 15m
labels:
severity: warning

- alert: VeleroBackupFailures
annotations:
# Use the unsafe tag to prevent Ansible trying to render it as a template
message: !unsafe >-
Velero schedule '{{ $labels.schedule }}' has failed backups in the last 24 hours.
expr: |-
sum(increase(velero_backup_failure_total{schedule!=""}[24h])) by(schedule) > 0
for: 15m
labels:
severity: warning
configuration:
features: EnableCSI
backupStorageLocation:
Expand Down
Loading

0 comments on commit 9eda302

Please sign in to comment.