Skip to content

Commit

Permalink
Merge branch 'master' into dasomeone/fix_ceph_bright_mode
Browse files Browse the repository at this point in the history
  • Loading branch information
Dasomeone authored Dec 4, 2024
2 parents d05f0fc + deb2c73 commit aa4ee47
Show file tree
Hide file tree
Showing 9 changed files with 721 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1653,7 +1653,7 @@ local getMatcher(cfg) = '%(solrSelector)s, solr_cluster=~"$solr_cluster"' % cfg;
refresh=2,
includeAll=true,
multi=true,
allValues='.+',
allValues='.*',
sort=1
),
template.new(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1578,7 +1578,7 @@ local getMatcher(cfg) = '%(solrSelector)s, solr_cluster=~"$solr_cluster", base_u
refresh=2,
includeAll=true,
multi=true,
allValues='.+',
allValues='.*',
sort=1
),
template.new(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1059,7 +1059,7 @@ local dispatchesPanel(matcher) = {
},
};

local getMatcher(cfg) = '%(solrSelector)s, solr_cluster="$solr_cluster", base_url=~"$base_url"' % cfg;
local getMatcher(cfg) = '%(solrSelector)s, solr_cluster=~"$solr_cluster", base_url=~"$base_url"' % cfg;

{
grafanaDashboards+:: {
Expand Down
130 changes: 128 additions & 2 deletions csp-mixin/alerts/azure-alerts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ groups:
service: 'Azure Virtual Machines'
namespace: cloud-provider-azure
annotations:
summary: 'CPU utilization is too high.'
summary: 'VM CPU utilization is too high.'
description: 'The VM {{ $labels.resourceName }} is under heavy load and may become unresponsive.'
dashboard_uid: '58f33c50e66c911b0ad8a25aa438a96e'

Expand All @@ -22,9 +22,135 @@ groups:
keep_firing_for: 10m
labels:
severity: critical
service: 'Azure Virtual Machines.'
service: 'Azure Virtual Machines'
namespace: cloud-provider-azure
annotations:
summary: 'VM unavailable.'
description: 'The VM {{ $labels.resourceName }} is not functioning or crashed, which may require immediate action.'
dashboard_uid: '58f33c50e66c911b0ad8a25aa438a96e'

- alert: AzureDatabaseHighDtuConsumption
expr: |
avg by (job,resourceGroup,subscriptionName,resourceName) (azure_microsoft_sql_servers_databases_dtu_consumption_percent_average_percent{job=~".+",resourceGroup=~".+",subscriptionName=~".+",resourceName=~".+"}) > 90
for: 10m
keep_firing_for: 10m
labels:
severity: critical
service: 'Azure SQL database'
namespace: cloud-provider-azure
annotations:
summary: 'High database DTU consumption.'
description: 'Check active queries and optimize indexes or consider scaling up DTUs to handle load in {{ $labels.resourceName }} database.'
dashboard_uid: '82c5b6cf30db5b601c5cc3f5d8d4284d'

- alert: AzureDatabaseHighStorageUsage
expr: |
avg by (job,resourceGroup,subscriptionName,resourceName) (azure_microsoft_sql_servers_databases_storage_percent_maximum_percent{job=~".+",resourceGroup=~".+",subscriptionName=~".+",resourceName=~".+"}) > 95
for: 15m
keep_firing_for: 10m
labels:
severity: critical
service: 'Azure SQL database'
namespace: cloud-provider-azure
annotations:
summary: 'High database Storage usage.'
description: 'Archive or delete old data, or scale up storage capacity in {{ $labels.resourceName }} database.'
dashboard_uid: '82c5b6cf30db5b601c5cc3f5d8d4284d'

- alert: AzureDatabaseHighDeadlockCount
expr: |
sum by (job,resourceGroup,subscriptionName,resourceName) (rate(azure_microsoft_sql_servers_databases_deadlock_total_count{job=~".+",resourceGroup=~".+",subscriptionName=~".+",resourceName=~".+"}[5m])) > 5
for: 10m
keep_firing_for: 10m
labels:
severity: info
service: 'Azure SQL database'
namespace: cloud-provider-azure
annotations:
summary: 'High database Deadlock count.'
description: 'Check {{ $labels.resourceName }} database logs for deadlock details and optimize affected queries.'
dashboard_uid: '82c5b6cf30db5b601c5cc3f5d8d4284d'

- alert: AzureDatabaseHighUserCpuUsage
expr: |
avg by (job,resourceGroup,subscriptionName,resourceName) (azure_microsoft_sql_servers_databases_cpu_percent_average_percent{job=~".+",resourceGroup=~".+",subscriptionName=~".+",resourceName=~".+"}) > 90
for: 10m
keep_firing_for: 10m
labels:
severity: warning
service: 'Azure SQL database'
namespace: cloud-provider-azure
annotations:
summary: 'High database User CPU usage.'
description: 'Identify high CPU queries on {{ $labels.resourceName }} database and optimize them.'
dashboard_uid: '82c5b6cf30db5b601c5cc3f5d8d4284d'

- alert: AzureDatabaseHighSystemFailedConnections
expr: |
sum by (job,resourceGroup,subscriptionName,resourceName) (rate(azure_microsoft_sql_servers_databases_connection_failed_total_count{job=~".+",resourceGroup=~".+",subscriptionName=~".+",resourceName=~".+"}[5m])) > 10
for: 5m
keep_firing_for: 10m
labels:
severity: warning
service: 'Azure SQL database'
namespace: cloud-provider-azure
annotations:
summary: 'High number of database System Failed connections.'
description: 'Check network problems, firewall restrictions or high resource consumption affecting application access to the {{ $labels.resourceName }} database.'
dashboard_uid: '82c5b6cf30db5b601c5cc3f5d8d4284d'

- alert: AzureDatabaseHighUserFailedConnections
expr: |
sum by (job,resourceGroup,subscriptionName,resourceName) (rate(azure_microsoft_sql_servers_databases_connection_failed_user_error_total_count{job=~".+",resourceGroup=~".+",subscriptionName=~".+",resourceName=~".+"}[5m])) > 10
for: 15m
keep_firing_for: 10m
labels:
severity: warning
service: 'Azure SQL database'
namespace: cloud-provider-azure
annotations:
summary: 'High number of database User Failed connections.'
description: 'Check for authentication problems, network configuration errors, firewall issues, or resource constraints, affecting database accessibility for users on database {{ $labels.resourceName }}.'
dashboard_uid: '82c5b6cf30db5b601c5cc3f5d8d4284d'

- alert: AzureDatabaseHighWorkerUsage
expr: |
avg by (job,resourceGroup,subscriptionName,resourceName) (azure_microsoft_sql_servers_databases_workers_percent_average_percent{job=~".+",resourceGroup=~".+",subscriptionName=~".+",resourceName=~".+"}) > 60
for: 5m
keep_firing_for: 10m
labels:
severity: critical
service: 'Azure SQL database'
namespace: cloud-provider-azure
annotations:
summary: 'High database worker usage.'
description: 'Look for long execution queries, review the number of concurrent queries and requests being sent to the database or check if there are any blocking sessions or deadlocks into the {{ $labels.resourceName }} database.'
dashboard_uid: '82c5b6cf30db5b601c5cc3f5d8d4284d'

- alert: AzureDatabaseHighDataIoUsage
expr: |
avg by (job,resourceGroup,subscriptionName,resourceName) (azure_microsoft_sql_servers_databases_physical_data_read_percent_average_percent{job=~".+",resourceGroup=~".+",subscriptionName=~".+",resourceName=~".+"}) > 90
for: 15m
keep_firing_for: 10m
labels:
severity: info
service: 'Azure SQL database'
namespace: cloud-provider-azure
annotations:
summary: 'High database data IO usage.'
description: 'Review queries with high read or write activity, check if there are missing indexes or inefficient indexes that result in full table scans and assess the volume of transactions into the {{ $labels.resourceName }} database.'
dashboard_uid: '82c5b6cf30db5b601c5cc3f5d8d4284d'

- alert: AzureDatabaseLowTempdbLogSpace
expr: |
avg by (job,resourceGroup,subscriptionName,resourceName) (azure_microsoft_sql_servers_databases_tempdb_log_used_percent_average_percent{job=~".+",resourceGroup=~".+",subscriptionName=~".+",resourceName=~".+"}) > 60
for: 5m
keep_firing_for: 10m
labels:
severity: critical
service: 'Azure SQL database'
namespace: cloud-provider-azure
annotations:
summary: 'Low database tempdb log space.'
description: 'Look for active sessions that might be using TempDB intensively, identify stored procedures or queries that create temporary tables or objects, and also look for long-running or memory-intensive queries that rely heavily on TempDB into the {{ $labels.resourceName }} database.'
dashboard_uid: '82c5b6cf30db5b601c5cc3f5d8d4284d'
2 changes: 1 addition & 1 deletion logs-lib/logs/variables.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ function(
)
+ var.query.selectionOptions.withIncludeAll(
value=true,
customAllValue='.*'
customAllValue='.+'
)
+ var.query.selectionOptions.withMulti()
+ var.query.refresh.onTime()
Expand Down
4 changes: 2 additions & 2 deletions mixin-utils/utils.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,10 @@ local g = import 'grafana-builder/grafana.libsonnet';

// showClassicHistogramQuery wraps a query defined as map {classic: q, native: q}, and compares the classic query
// to dashboard variable which should take -1 or +1 as values in order to hide or show the classic query.
showClassicHistogramQuery(query, dashboard_variable='latency_metrics'):: '%s < ($%s * +Inf)' % [query.classic, dashboard_variable],
showClassicHistogramQuery(query, dashboard_variable='latency_metrics'):: '(%s) and on() (vector($%s) == 1)' % [query.classic, dashboard_variable],
// showNativeHistogramQuery wraps a query defined as map {classic: q, native: q}, and compares the native query
// to dashboard variable which should take -1 or +1 as values in order to show or hide the native query.
showNativeHistogramQuery(query, dashboard_variable='latency_metrics'):: '%s < ($%s * -Inf)' % [query.native, dashboard_variable],
showNativeHistogramQuery(query, dashboard_variable='latency_metrics'):: '(%s) and on() (vector($%s) == -1)' % [query.native, dashboard_variable],

histogramRules(metric, labels, interval='1m', record_native=false)::
local vars = {
Expand Down
19 changes: 19 additions & 0 deletions static-exporter/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,22 @@ local static_exporter = import 'github.com/grafana/jsonnet-libs/static-expoter/m
]),
}
```

## Updating httpd.conf

There is a default httpd.conf that was added to this library.
It was generated by running the following:

```
docker run --rm httpd:2.4 cat /usr/local/apache2/conf/httpd.conf httpd.conf
```

If there is a downstream change that requires updating this config file, run the above command and then add the following snippet to the `<Directory "/usr/local/apache2/htdocs"` block:

```
<IfModule mod_headers.c>
Header set Content-Type: "text/plain; version=0.0.4"
</IfModule>
```

This change adds a Header to the requests that enables Prometheus 3.x to scrape the static exporter.
Loading

0 comments on commit aa4ee47

Please sign in to comment.