From 1ca7d5041ee121cfaad0854e3faddfd87b1599f8 Mon Sep 17 00:00:00 2001 From: "Grot (@grafanabot)" <43478413+grafanabot@users.noreply.github.com> Date: Tue, 29 Nov 2022 21:33:32 +0200 Subject: [PATCH] [release-2.5] Improve networking panels to work with read write deployment mode (#3574) * Improve networking panels to work with read write deployment mode (#3519) * Improve networking dashboards to work with read-write deployment mode Signed-off-by: Marco Pracucci * Added CHANGELOG entry Signed-off-by: Marco Pracucci * Fixed panel title Signed-off-by: Marco Pracucci Signed-off-by: Marco Pracucci (cherry picked from commit 5064c6462c6132431c7c059fdd6fc35cf683d26a) * add an empty space Signed-off-by: Mauro Stettler * Revert "add an empty space" This reverts commit 8136c9797849142bb15bb137bfc76102d3927401. Signed-off-by: Mauro Stettler Co-authored-by: Marco Pracucci Co-authored-by: Mauro Stettler --- CHANGELOG.md | 2 +- .../dashboards/mimir-overview-networking.json | 30 +- .../dashboards/mimir-reads-networking.json | 418 ++++++++++++++++-- .../dashboards/mimir-writes-networking.json | 370 +++++++++++++++- .../dashboards/mimir-overview-networking.json | 30 +- .../dashboards/mimir-reads-networking.json | 418 ++++++++++++++++-- .../dashboards/mimir-writes-networking.json | 370 +++++++++++++++- operations/mimir-mixin/config.libsonnet | 16 +- .../dashboards/dashboard-utils.libsonnet | 34 +- .../dashboards/overview-networking.libsonnet | 8 +- .../dashboards/reads-networking.libsonnet | 13 +- .../dashboards/writes-networking.libsonnet | 7 +- 12 files changed, 1538 insertions(+), 178 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8747fc94502..5ab3478f6f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -54,7 +54,7 @@ * [ENHANCEMENT] Dashboards: Add read path insights row to the "Mimir / Tenants" dashboard. #3326 * [ENHANCEMENT] Alerts: Add runbook urls for alerts. #3452 * [ENHANCEMENT] Configuration: Make it possible to configure namespace label, job label, and job prefix. #3482 -* [ENHANCEMENT] Dashboards: improved "Mimir / Writes resources" and "Mimir / Reads resources" dashboards to work with read-write deployment mode too. #3497 #3504 +* [ENHANCEMENT] Dashboards: improved resources and networking dashboards to work with read-write deployment mode too. #3497 #3504 #3519 * [BUGFIX] Dashboards: Fix legend showing `persistentvolumeclaim` when using `deployment_type=baremetal` for `Disk space utilization` panels. #3173 * [BUGFIX] Alerts: Fixed `MimirGossipMembersMismatch` alert when Mimir is deployed in read-write mode. #3489 * [BUGFIX] Dashboards: Remove "Inflight requests" from object store panels because the panel is not tracking the inflight requests to object storage. #3521 diff --git a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-overview-networking.json b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-overview-networking.json index 3067da0f6a6..ee7bab3e5b7 100644 --- a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-overview-networking.json +++ b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-overview-networking.json @@ -218,7 +218,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|ingester.*|mimir-write.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(distributor|ingester|mimir-write).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -226,7 +226,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|ingester.*|mimir-write.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(distributor|ingester|mimir-write).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -302,7 +302,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|ingester.*|mimir-write.*))\"}))", + "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(distributor|ingester|mimir-write).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -310,7 +310,7 @@ "step": 10 }, { - "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|ingester.*|mimir-write.*))\"}))", + "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(distributor|ingester|mimir-write).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -318,7 +318,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|ingester.*|mimir-write.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(distributor|ingester|mimir-write).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", @@ -558,7 +558,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|querier.*|ruler-query-frontend.*|ruler-querier.*|mimir-read.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -566,7 +566,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|querier.*|ruler-query-frontend.*|ruler-querier.*|mimir-read.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -642,7 +642,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|querier.*|ruler-query-frontend.*|ruler-querier.*|mimir-read.*))\"}))", + "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -650,7 +650,7 @@ "step": 10 }, { - "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|querier.*|ruler-query-frontend.*|ruler-querier.*|mimir-read.*))\"}))", + "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -658,7 +658,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|querier.*|ruler-query-frontend.*|ruler-querier.*|mimir-read.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", @@ -898,7 +898,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|query-scheduler.*|ruler-query-scheduler.*|store-gateway.*|compactor.*|alertmanager|overrides-exporter|mimir-backend.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -906,7 +906,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|query-scheduler.*|ruler-query-scheduler.*|store-gateway.*|compactor.*|alertmanager|overrides-exporter|mimir-backend.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -982,7 +982,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|query-scheduler.*|ruler-query-scheduler.*|store-gateway.*|compactor.*|alertmanager|overrides-exporter|mimir-backend.*))\"}))", + "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -990,7 +990,7 @@ "step": 10 }, { - "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|query-scheduler.*|ruler-query-scheduler.*|store-gateway.*|compactor.*|alertmanager|overrides-exporter|mimir-backend.*))\"}))", + "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -998,7 +998,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|query-scheduler.*|ruler-query-scheduler.*|store-gateway.*|compactor.*|alertmanager|overrides-exporter|mimir-backend.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", diff --git a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-reads-networking.json b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-reads-networking.json index 8f72b05400d..3e3c5ad1343 100644 --- a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-reads-networking.json +++ b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-reads-networking.json @@ -66,7 +66,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (rate(node_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*query-frontend.*\"}[$__rate_interval]))", + "expr": "sum by(instance) (rate(node_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -142,7 +142,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (rate(node_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*query-frontend.*\"}[$__rate_interval]))", + "expr": "sum by(instance) (rate(node_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -218,7 +218,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -226,7 +226,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -302,7 +302,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}))", + "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -310,7 +310,7 @@ "step": 10 }, { - "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}))", + "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -318,7 +318,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", @@ -367,7 +367,7 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Query-frontend", + "title": "Summary", "titleSize": "h6" }, { @@ -406,7 +406,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (rate(node_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*query-scheduler.*\"}[$__rate_interval]))", + "expr": "sum by(instance) (rate(node_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*query-frontend.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -482,7 +482,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (rate(node_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*query-scheduler.*\"}[$__rate_interval]))", + "expr": "sum by(instance) (rate(node_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*query-frontend.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -558,7 +558,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*query-frontend.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -566,7 +566,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*query-frontend.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -642,7 +642,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}))", + "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*query-frontend.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -650,7 +650,7 @@ "step": 10 }, { - "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}))", + "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*query-frontend.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -658,7 +658,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*query-frontend.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", @@ -707,7 +707,7 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Query-scheduler", + "title": "Query-frontend", "titleSize": "h6" }, { @@ -746,7 +746,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (rate(node_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*querier.*\"}[$__rate_interval]))", + "expr": "sum by(instance) (rate(node_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*query-scheduler.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -822,7 +822,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (rate(node_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*querier.*\"}[$__rate_interval]))", + "expr": "sum by(instance) (rate(node_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*query-scheduler.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -898,7 +898,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*query-scheduler.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -906,7 +906,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*query-scheduler.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -982,7 +982,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}))", + "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*query-scheduler.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -990,7 +990,7 @@ "step": 10 }, { - "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}))", + "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*query-scheduler.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -998,7 +998,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*query-scheduler.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", @@ -1047,7 +1047,7 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Querier", + "title": "Query-scheduler", "titleSize": "h6" }, { @@ -1086,7 +1086,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (rate(node_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*store-gateway.*\"}[$__rate_interval]))", + "expr": "sum by(instance) (rate(node_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*querier.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -1162,7 +1162,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (rate(node_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*store-gateway.*\"}[$__rate_interval]))", + "expr": "sum by(instance) (rate(node_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*querier.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -1238,7 +1238,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*querier.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -1246,7 +1246,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*querier.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -1322,7 +1322,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}))", + "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*querier.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -1330,7 +1330,7 @@ "step": 10 }, { - "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}))", + "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*querier.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -1338,7 +1338,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*querier.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", @@ -1387,7 +1387,7 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Store-gateway", + "title": "Querier", "titleSize": "h6" }, { @@ -1426,7 +1426,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (rate(node_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*ruler.*\"}[$__rate_interval]))", + "expr": "sum by(instance) (rate(node_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*store-gateway.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -1502,7 +1502,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (rate(node_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*ruler.*\"}[$__rate_interval]))", + "expr": "sum by(instance) (rate(node_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*store-gateway.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -1578,7 +1578,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*store-gateway.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -1586,7 +1586,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*store-gateway.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -1662,7 +1662,347 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}))", + "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*store-gateway.*\"}))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "avg", + "legendLink": null, + "step": 10 + }, + { + "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*store-gateway.*\"}))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "highest", + "legendLink": null, + "step": 10 + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*store-gateway.*\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "limit", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "TCP connections (per pod)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Store-gateway", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (rate(node_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*ruler.*\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Receive bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (rate(node_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*ruler.*\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*ruler.*\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "avg", + "legendLink": null, + "step": 10 + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*ruler.*\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "highest", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Inflight requests (per pod)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*ruler.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -1670,7 +2010,7 @@ "step": 10 }, { - "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}))", + "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*ruler.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -1678,7 +2018,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*ruler.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", diff --git a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-writes-networking.json b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-writes-networking.json index 8fdbc79f35c..02a20817efc 100644 --- a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-writes-networking.json +++ b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-writes-networking.json @@ -66,7 +66,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (rate(node_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*distributor.*\"}[$__rate_interval]))", + "expr": "sum by(instance) (rate(node_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -142,7 +142,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (rate(node_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*distributor.*\"}[$__rate_interval]))", + "expr": "sum by(instance) (rate(node_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -218,7 +218,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(distributor|ingester|mimir-write).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -226,7 +226,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(distributor|ingester|mimir-write).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -302,7 +302,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}))", + "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(distributor|ingester|mimir-write).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -310,7 +310,7 @@ "step": 10 }, { - "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}))", + "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(distributor|ingester|mimir-write).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -318,7 +318,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*(distributor|ingester|mimir-write).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", @@ -367,7 +367,7 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Distributor", + "title": "Summary", "titleSize": "h6" }, { @@ -406,7 +406,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (rate(node_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*ingester.*\"}[$__rate_interval]))", + "expr": "sum by(instance) (rate(node_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*distributor.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -482,7 +482,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(instance) (rate(node_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*ingester.*\"}[$__rate_interval]))", + "expr": "sum by(instance) (rate(node_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*distributor.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", @@ -558,7 +558,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*distributor.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -566,7 +566,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*distributor.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -642,7 +642,347 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}))", + "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*distributor.*\"}))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "avg", + "legendLink": null, + "step": 10 + }, + { + "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*distributor.*\"}))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "highest", + "legendLink": null, + "step": 10 + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*distributor.*\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "limit", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "TCP connections (per pod)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (rate(node_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*ingester.*\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Receive bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(instance) (rate(node_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*ingester.*\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*ingester.*\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "avg", + "legendLink": null, + "step": 10 + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*ingester.*\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "highest", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Inflight requests (per pod)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*ingester.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -650,7 +990,7 @@ "step": 10 }, { - "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}))", + "expr": "max(sum by(instance) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*ingester.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -658,7 +998,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",instance=~\".*ingester.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", diff --git a/operations/mimir-mixin-compiled/dashboards/mimir-overview-networking.json b/operations/mimir-mixin-compiled/dashboards/mimir-overview-networking.json index 55b7caed677..8b2b8148d54 100644 --- a/operations/mimir-mixin-compiled/dashboards/mimir-overview-networking.json +++ b/operations/mimir-mixin-compiled/dashboards/mimir-overview-networking.json @@ -218,7 +218,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|ingester.*|mimir-write.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(distributor|ingester|mimir-write).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -226,7 +226,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|ingester.*|mimir-write.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(distributor|ingester|mimir-write).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -302,7 +302,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|ingester.*|mimir-write.*))\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(distributor|ingester|mimir-write).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -310,7 +310,7 @@ "step": 10 }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|ingester.*|mimir-write.*))\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(distributor|ingester|mimir-write).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -318,7 +318,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|ingester.*|mimir-write.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(distributor|ingester|mimir-write).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", @@ -558,7 +558,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|querier.*|ruler-query-frontend.*|ruler-querier.*|mimir-read.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -566,7 +566,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|querier.*|ruler-query-frontend.*|ruler-querier.*|mimir-read.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -642,7 +642,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|querier.*|ruler-query-frontend.*|ruler-querier.*|mimir-read.*))\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -650,7 +650,7 @@ "step": 10 }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|querier.*|ruler-query-frontend.*|ruler-querier.*|mimir-read.*))\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -658,7 +658,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|querier.*|ruler-query-frontend.*|ruler-querier.*|mimir-read.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", @@ -898,7 +898,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|query-scheduler.*|ruler-query-scheduler.*|store-gateway.*|compactor.*|alertmanager|overrides-exporter|mimir-backend.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -906,7 +906,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|query-scheduler.*|ruler-query-scheduler.*|store-gateway.*|compactor.*|alertmanager|overrides-exporter|mimir-backend.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -982,7 +982,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|query-scheduler.*|ruler-query-scheduler.*|store-gateway.*|compactor.*|alertmanager|overrides-exporter|mimir-backend.*))\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -990,7 +990,7 @@ "step": 10 }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|query-scheduler.*|ruler-query-scheduler.*|store-gateway.*|compactor.*|alertmanager|overrides-exporter|mimir-backend.*))\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -998,7 +998,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|query-scheduler.*|ruler-query-scheduler.*|store-gateway.*|compactor.*|alertmanager|overrides-exporter|mimir-backend.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", diff --git a/operations/mimir-mixin-compiled/dashboards/mimir-reads-networking.json b/operations/mimir-mixin-compiled/dashboards/mimir-reads-networking.json index 52d9b05e079..6ed90fb7be2 100644 --- a/operations/mimir-mixin-compiled/dashboards/mimir-reads-networking.json +++ b/operations/mimir-mixin-compiled/dashboards/mimir-reads-networking.json @@ -66,7 +66,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?query-frontend.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -142,7 +142,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?query-frontend.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -218,7 +218,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -226,7 +226,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -302,7 +302,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -310,7 +310,7 @@ "step": 10 }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -318,7 +318,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", @@ -367,7 +367,7 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Query-frontend", + "title": "Summary", "titleSize": "h6" }, { @@ -406,7 +406,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?query-scheduler.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?query-frontend.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -482,7 +482,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?query-scheduler.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?query-frontend.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -558,7 +558,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?query-frontend.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -566,7 +566,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?query-frontend.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -642,7 +642,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?query-frontend.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -650,7 +650,7 @@ "step": 10 }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?query-frontend.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -658,7 +658,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?query-frontend.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", @@ -707,7 +707,7 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Query-scheduler", + "title": "Query-frontend", "titleSize": "h6" }, { @@ -746,7 +746,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?querier.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?query-scheduler.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -822,7 +822,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?querier.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?query-scheduler.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -898,7 +898,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?query-scheduler.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -906,7 +906,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?query-scheduler.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -982,7 +982,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?query-scheduler.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -990,7 +990,7 @@ "step": 10 }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?query-scheduler.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -998,7 +998,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?query-scheduler.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", @@ -1047,7 +1047,7 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Querier", + "title": "Query-scheduler", "titleSize": "h6" }, { @@ -1086,7 +1086,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?store-gateway.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?querier.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -1162,7 +1162,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?store-gateway.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?querier.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -1238,7 +1238,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?querier.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -1246,7 +1246,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?querier.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -1322,7 +1322,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?querier.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -1330,7 +1330,7 @@ "step": 10 }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?querier.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -1338,7 +1338,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?querier.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", @@ -1387,7 +1387,7 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Store-gateway", + "title": "Querier", "titleSize": "h6" }, { @@ -1426,7 +1426,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?ruler.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?store-gateway.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -1502,7 +1502,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?ruler.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?store-gateway.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -1578,7 +1578,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?store-gateway.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -1586,7 +1586,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?store-gateway.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -1662,7 +1662,347 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?store-gateway.*\"}))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "avg", + "legendLink": null, + "step": 10 + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?store-gateway.*\"}))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "highest", + "legendLink": null, + "step": 10 + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?store-gateway.*\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "limit", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "TCP connections (per pod)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Store-gateway", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?ruler.*\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Receive bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?ruler.*\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?ruler.*\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "avg", + "legendLink": null, + "step": 10 + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?ruler.*\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "highest", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Inflight requests (per pod)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?ruler.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -1670,7 +2010,7 @@ "step": 10 }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?ruler.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -1678,7 +2018,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?ruler.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", diff --git a/operations/mimir-mixin-compiled/dashboards/mimir-writes-networking.json b/operations/mimir-mixin-compiled/dashboards/mimir-writes-networking.json index 5adb0746119..75ad93978c7 100644 --- a/operations/mimir-mixin-compiled/dashboards/mimir-writes-networking.json +++ b/operations/mimir-mixin-compiled/dashboards/mimir-writes-networking.json @@ -66,7 +66,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?distributor.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -142,7 +142,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?distributor.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -218,7 +218,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(distributor|ingester|mimir-write).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -226,7 +226,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(distributor|ingester|mimir-write).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -302,7 +302,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(distributor|ingester|mimir-write).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -310,7 +310,7 @@ "step": 10 }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(distributor|ingester|mimir-write).*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -318,7 +318,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?(distributor|ingester|mimir-write).*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", @@ -367,7 +367,7 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Distributor", + "title": "Summary", "titleSize": "h6" }, { @@ -406,7 +406,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?ingester.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?distributor.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -482,7 +482,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?ingester.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?distributor.*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", @@ -558,7 +558,7 @@ "steppedLine": false, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?distributor.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -566,7 +566,7 @@ "step": 10 }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?distributor.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -642,7 +642,347 @@ "steppedLine": false, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?distributor.*\"}))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "avg", + "legendLink": null, + "step": 10 + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?distributor.*\"}))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "highest", + "legendLink": null, + "step": 10 + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?distributor.*\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "limit", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "TCP connections (per pod)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?ingester.*\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Receive bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?ingester.*\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Transmit bandwidth", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?ingester.*\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "avg", + "legendLink": null, + "step": 10 + }, + { + "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?ingester.*\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "highest", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Inflight requests (per pod)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 3, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?ingester.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "avg", @@ -650,7 +990,7 @@ "step": 10 }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?ingester.*\"}))", "format": "time_series", "intervalFactor": 2, "legendFormat": "highest", @@ -658,7 +998,7 @@ "step": 10 }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", + "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*-mimir-)?ingester.*\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "limit", diff --git a/operations/mimir-mixin/config.libsonnet b/operations/mimir-mixin/config.libsonnet index 4962501365a..2031b70bb01 100644 --- a/operations/mimir-mixin/config.libsonnet +++ b/operations/mimir-mixin/config.libsonnet @@ -201,16 +201,6 @@ // System mount point where mimir stores its data, used for baremetal // deployment only. instance_data_mountpoint: '/', - resources_panel_series: { - kubernetes: { - network_receive_bytes_metrics: 'container_network_receive_bytes_total', - network_transmit_bytes_metrics: 'container_network_transmit_bytes_total', - }, - baremetal: { - network_receive_bytes_metrics: 'node_network_receive_bytes_total', - network_transmit_bytes_metrics: 'node_network_transmit_bytes_total', - }, - }, resources_panel_queries: { kubernetes: { cpu_usage: 'sum by(%(instanceLabel)s) (rate(container_cpu_usage_seconds_total{%(namespace)s,container=~"%(containerName)s"}[$__rate_interval]))', @@ -227,7 +217,8 @@ memory_rss_limit: 'min(container_spec_memory_limit_bytes{%(namespace)s,container=~"%(containerName)s"} > 0)', memory_rss_request: 'min(kube_pod_container_resource_requests{%(namespace)s,container=~"%(containerName)s",resource="memory"})', memory_go_heap_usage: 'sum by(%(instanceLabel)s) (go_memstats_heap_inuse_bytes{%(namespace)s,container=~"%(containerName)s"})', - network: 'sum by(%(instanceLabel)s) (rate(%(metric)s{%(namespaceMatcher)s,%(instanceLabel)s=~"%(instanceName)s"}[$__rate_interval]))', + network_receive_bytes: 'sum by(%(instanceLabel)s) (rate(container_network_receive_bytes_total{%(namespaceMatcher)s,%(instanceLabel)s=~"%(instanceName)s"}[$__rate_interval]))', + network_transmit_bytes: 'sum by(%(instanceLabel)s) (rate(container_network_transmit_bytes_total{%(namespaceMatcher)s,%(instanceLabel)s=~"%(instanceName)s"}[$__rate_interval]))', disk_writes: ||| sum by(%(nodeLabel)s, %(instanceLabel)s, device) ( @@ -283,7 +274,8 @@ + node_memory_SwapCached_bytes{%(namespace)s,%(instanceLabel)s=~"%(instanceName)s"} |||, memory_go_heap_usage: 'sum by(%(instanceLabel)s) (go_memstats_heap_inuse_bytes{%(namespace)s,%(instanceLabel)s=~"%(instanceName)s"})', - network: 'sum by(%(instanceLabel)s) (rate(%(metric)s{%(namespaceMatcher)s,%(instanceLabel)s=~"%(instanceName)s"}[$__rate_interval]))', + network_receive_bytes: 'sum by(%(instanceLabel)s) (rate(node_network_receive_bytes_total{%(namespaceMatcher)s,%(instanceLabel)s=~"%(instanceName)s"}[$__rate_interval]))', + network_transmit_bytes: 'sum by(%(instanceLabel)s) (rate(node_network_transmit_bytes_total{%(namespaceMatcher)s,%(instanceLabel)s=~"%(instanceName)s"}[$__rate_interval]))', disk_writes: ||| sum by(%(nodeLabel)s, %(instanceLabel)s, device) ( diff --git a/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet b/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet index a1b3d1a5098..83d3bdd4fd1 100644 --- a/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet +++ b/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet @@ -338,12 +338,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; fill: 0, }, - containerNetworkPanel(title, metric, instanceName):: + containerNetworkBytesPanel(title, metric, instanceName):: $.panel(title) + $.queryPanel( - $._config.resources_panel_queries[$._config.deployment_type].network % { + $._config.resources_panel_queries[$._config.deployment_type][metric] % { namespaceMatcher: $.namespaceMatcher(), - metric: metric, instanceLabel: $._config.per_instance_label, instanceName: instanceName, }, '{{%s}}' % $._config.per_instance_label @@ -351,11 +350,13 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.stack + { yaxes: $.yaxes('Bps') }, + // The provided instanceName should be a regexp from $._config.instance_names. containerNetworkReceiveBytesPanel(instanceName):: - $.containerNetworkPanel('Receive bandwidth', $._config.resources_panel_series[$._config.deployment_type].network_receive_bytes_metrics, instanceName), + $.containerNetworkBytesPanel('Receive bandwidth', 'network_receive_bytes', instanceName), + // The provided instanceName should be a regexp from $._config.instance_names. containerNetworkTransmitBytesPanel(instanceName):: - $.containerNetworkPanel('Transmit bandwidth', $._config.resources_panel_series[$._config.deployment_type].network_transmit_bytes_metrics, instanceName), + $.containerNetworkBytesPanel('Transmit bandwidth', 'network_transmit_bytes', instanceName), // The provided instanceName should be a regexp from $._config.instance_names, while // the provided containerName should be a regexp from $._config.container_names. @@ -416,28 +417,33 @@ local utils = import 'mixin-utils/utils.libsonnet'; // Check only the prefix so that a multi-zone deployment matches too. 'label_name=~"(%s).*"' % containerName, - jobNetworkingRow(title, name):: + // The provided componentName should be the name of a component among the ones defined in $._config.instance_names. + containerNetworkingRow(title, componentName):: + // Match series using namespace + instance instead of the job so that we can + // select only specific deployments (e.g. "distributor in microservices mode"). local vars = $._config { - job_matcher: $.jobMatcher($._config.job_names[name]), + instanceLabel: $._config.per_instance_label, + instanceName: $._config.instance_names[componentName], + namespaceMatcher: $.namespaceMatcher(), }; super.row(title) - .addPanel($.containerNetworkReceiveBytesPanel($._config.instance_names[name])) - .addPanel($.containerNetworkTransmitBytesPanel($._config.instance_names[name])) + .addPanel($.containerNetworkReceiveBytesPanel($._config.instance_names[componentName])) + .addPanel($.containerNetworkTransmitBytesPanel($._config.instance_names[componentName])) .addPanel( $.panel('Inflight requests (per pod)') + $.queryPanel([ - 'avg(cortex_inflight_requests{%(job_matcher)s})' % vars, - 'max(cortex_inflight_requests{%(job_matcher)s})' % vars, + 'avg(cortex_inflight_requests{%(namespaceMatcher)s,%(instanceLabel)s=~"%(instanceName)s"})' % vars, + 'max(cortex_inflight_requests{%(namespaceMatcher)s,%(instanceLabel)s=~"%(instanceName)s"})' % vars, ], ['avg', 'highest']) + { fill: 0 } ) .addPanel( $.panel('TCP connections (per pod)') + $.queryPanel([ - 'avg(sum by(%(per_instance_label)s) (cortex_tcp_connections{%(job_matcher)s}))' % vars, - 'max(sum by(%(per_instance_label)s) (cortex_tcp_connections{%(job_matcher)s}))' % vars, - 'min(cortex_tcp_connections_limit{%(job_matcher)s})' % vars, + 'avg(sum by(%(per_instance_label)s) (cortex_tcp_connections{%(namespaceMatcher)s,%(instanceLabel)s=~"%(instanceName)s"}))' % vars, + 'max(sum by(%(per_instance_label)s) (cortex_tcp_connections{%(namespaceMatcher)s,%(instanceLabel)s=~"%(instanceName)s"}))' % vars, + 'min(cortex_tcp_connections_limit{%(namespaceMatcher)s,%(instanceLabel)s=~"%(instanceName)s"})' % vars, ], ['avg', 'highest', 'limit']) + { fill: 0 } ), diff --git a/operations/mimir-mixin/dashboards/overview-networking.libsonnet b/operations/mimir-mixin/dashboards/overview-networking.libsonnet index dd75ff744d2..304efb6e4b8 100644 --- a/operations/mimir-mixin/dashboards/overview-networking.libsonnet +++ b/operations/mimir-mixin/dashboards/overview-networking.libsonnet @@ -5,10 +5,10 @@ local filename = 'mimir-overview-networking.json'; [filename]: ($.dashboard('Overview networking') + { uid: std.md5(filename) }) .addClusterSelectorTemplates(false) - .addRowIf($._config.gateway_enabled, $.jobNetworkingRow('Gateway', 'gateway')) - .addRow($.jobNetworkingRow('Writes', 'write')) - .addRow($.jobNetworkingRow('Reads', 'read')) - .addRow($.jobNetworkingRow('Backend', 'backend')) + .addRowIf($._config.gateway_enabled, $.containerNetworkingRow('Gateway', 'gateway')) + .addRow($.containerNetworkingRow('Writes', 'write')) + .addRow($.containerNetworkingRow('Reads', 'read')) + .addRow($.containerNetworkingRow('Backend', 'backend')) + { templating+: { list: [ diff --git a/operations/mimir-mixin/dashboards/reads-networking.libsonnet b/operations/mimir-mixin/dashboards/reads-networking.libsonnet index a6373a8a23c..09ecf84d5b2 100644 --- a/operations/mimir-mixin/dashboards/reads-networking.libsonnet +++ b/operations/mimir-mixin/dashboards/reads-networking.libsonnet @@ -5,12 +5,13 @@ local filename = 'mimir-reads-networking.json'; [filename]: ($.dashboard('Reads networking') + { uid: std.md5(filename) }) .addClusterSelectorTemplates(false) - .addRowIf($._config.gateway_enabled, $.jobNetworkingRow('Gateway', 'gateway')) - .addRow($.jobNetworkingRow('Query-frontend', 'query_frontend')) - .addRow($.jobNetworkingRow('Query-scheduler', 'query_scheduler')) - .addRow($.jobNetworkingRow('Querier', 'querier')) - .addRow($.jobNetworkingRow('Store-gateway', 'store_gateway')) - .addRow($.jobNetworkingRow('Ruler', 'ruler')) + .addRow($.containerNetworkingRow('Summary', 'read')) + .addRowIf($._config.gateway_enabled, $.containerNetworkingRow('Gateway', 'gateway')) + .addRow($.containerNetworkingRow('Query-frontend', 'query_frontend')) + .addRow($.containerNetworkingRow('Query-scheduler', 'query_scheduler')) + .addRow($.containerNetworkingRow('Querier', 'querier')) + .addRow($.containerNetworkingRow('Store-gateway', 'store_gateway')) + .addRow($.containerNetworkingRow('Ruler', 'ruler')) + { templating+: { list: [ diff --git a/operations/mimir-mixin/dashboards/writes-networking.libsonnet b/operations/mimir-mixin/dashboards/writes-networking.libsonnet index 805fafff74b..ae0cf75cb0a 100644 --- a/operations/mimir-mixin/dashboards/writes-networking.libsonnet +++ b/operations/mimir-mixin/dashboards/writes-networking.libsonnet @@ -5,9 +5,10 @@ local filename = 'mimir-writes-networking.json'; [filename]: ($.dashboard('Writes networking') + { uid: std.md5(filename) }) .addClusterSelectorTemplates(false) - .addRowIf($._config.gateway_enabled, $.jobNetworkingRow('Gateway', 'gateway')) - .addRow($.jobNetworkingRow('Distributor', 'distributor')) - .addRow($.jobNetworkingRow('Ingester', 'ingester')) + .addRow($.containerNetworkingRow('Summary', 'write')) + .addRowIf($._config.gateway_enabled, $.containerNetworkingRow('Gateway', 'gateway')) + .addRow($.containerNetworkingRow('Distributor', 'distributor')) + .addRow($.containerNetworkingRow('Ingester', 'ingester')) + { templating+: { list: [