From d5da774d1f8992f352b54c28081c87b20e4dd3a6 Mon Sep 17 00:00:00 2001 From: Tim Middleton Date: Mon, 3 Oct 2022 15:42:03 +0800 Subject: [PATCH] Minor dashboard updates including store reads/writes - cpu load graph --- .../grafana/cache-details-dashboard.json | 61 ++++++- .../grafana/coherence-dashboard-main.json | 2 +- .../grafana/members-summary-dashboard.json | 107 ++++++++++- .../grafana/service-details-dashboard.json | 166 ++++++++++++++++-- .../grafana/services-summary-dashboard.json | 124 ++++++++++++- 5 files changed, 428 insertions(+), 32 deletions(-) diff --git a/dashboards/grafana/cache-details-dashboard.json b/dashboards/grafana/cache-details-dashboard.json index 057726835..f8a053f5f 100644 --- a/dashboards/grafana/cache-details-dashboard.json +++ b/dashboards/grafana/cache-details-dashboard.json @@ -1137,15 +1137,18 @@ "unit": "ms" }, { - "alias": "", + "alias": "Store Writes", + "align": "auto", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)" ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", "decimals": 2, - "pattern": "/.*/", + "mappingType": 1, + "pattern": "Value #F", "thresholds": [], "type": "number", "unit": "short" @@ -1431,6 +1434,40 @@ "thresholds": [], "type": "number", "unit": "short" + }, + { + "alias": "Store Writes", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value #K", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "Store Reads", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value #L", + "thresholds": [], + "type": "number", + "unit": "none" } ], "targets": [ @@ -1515,6 +1552,24 @@ "intervalFactor": 1, "legendFormat": "", "refId": "J" + }, + { + "expr": "sum(vendor:coherence_cache_store_writes{cluster=\"$ClusterName\",coherence_service='$ServiceName', name=\"$CacheName\"}) by (member)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "K" + }, + { + "expr": "sum(vendor:coherence_cache_store_reads{cluster=\"$ClusterName\",coherence_service='$ServiceName', name=\"$CacheName\"}) by (member)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "L" } ], "title": "Cache Storage Details for Cache $CacheName", @@ -1564,6 +1619,7 @@ }, { "alias": "Total Gets", + "align": "auto", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", @@ -1682,6 +1738,7 @@ }, { "alias": "", + "align": "auto", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", diff --git a/dashboards/grafana/coherence-dashboard-main.json b/dashboards/grafana/coherence-dashboard-main.json index c55aaadc9..0a2709ec0 100644 --- a/dashboards/grafana/coherence-dashboard-main.json +++ b/dashboards/grafana/coherence-dashboard-main.json @@ -842,7 +842,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Current Load Averages", + "title": "Current Load Averages (Stacked)", "tooltip": { "shared": true, "sort": 0, diff --git a/dashboards/grafana/members-summary-dashboard.json b/dashboards/grafana/members-summary-dashboard.json index 947ade3d8..6f53fdc50 100644 --- a/dashboards/grafana/members-summary-dashboard.json +++ b/dashboards/grafana/members-summary-dashboard.json @@ -490,15 +490,110 @@ "dashLength": 10, "dashes": false, "datasource": "prometheus", + "description": "", "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, - "w": 12, + "w": 24, "x": 0, "y": 14 }, "hiddenSeries": false, + "id": 98, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(vendor:coherence_os_process_cpu_load{cluster=\"$ClusterName\"}) by (member)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Member: {{member}} ({{role}})", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Process CPU", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 22 + }, + "hiddenSeries": false, "id": 65, "legend": { "alignAsTable": true, @@ -589,7 +684,7 @@ "h": 8, "w": 12, "x": 12, - "y": 14 + "y": 22 }, "hiddenSeries": false, "id": 66, @@ -684,7 +779,7 @@ "h": 8, "w": 12, "x": 0, - "y": 22 + "y": 30 }, "hiddenSeries": false, "id": 68, @@ -784,7 +879,7 @@ "h": 8, "w": 12, "x": 12, - "y": 22 + "y": 30 }, "hiddenSeries": false, "id": 69, @@ -882,7 +977,7 @@ "h": 9, "w": 12, "x": 0, - "y": 30 + "y": 38 }, "hiddenSeries": false, "id": 23, @@ -976,7 +1071,7 @@ "h": 9, "w": 12, "x": 12, - "y": 30 + "y": 38 }, "hiddenSeries": false, "id": 24, diff --git a/dashboards/grafana/service-details-dashboard.json b/dashboards/grafana/service-details-dashboard.json index 4b4a2d11c..a2cd2dd18 100644 --- a/dashboards/grafana/service-details-dashboard.json +++ b/dashboards/grafana/service-details-dashboard.json @@ -291,12 +291,14 @@ "decimals": 0, "description": "Thread utilization across all members.", "fill": 1, + "fillGradient": 0, "gridPos": { "h": 8, "w": 24, "x": 0, "y": 8 }, + "hiddenSeries": false, "id": 19, "legend": { "alignAsTable": false, @@ -379,15 +381,133 @@ "bars": false, "dashLength": 10, "dashes": false, + "datasource": null, + "decimals": 0, + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 43, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(vendor:coherence_service_request_timeout_count{cluster=\"$ClusterName\",name='$ServiceName'}) - sum(vendor:coherence_service_request_timeout_count{cluster=\"$ClusterName\",name='$ServiceName'} offset $__interval)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Request Timeouts", + "refId": "A" + }, + { + "expr": "sum(vendor:coherence_service_task_timeout_count{cluster=\"$ClusterName\",name='$ServiceName'}) - sum(vendor:coherence_service_task_timeout_count{cluster=\"$ClusterName\",name='$ServiceName'} offset $__interval)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Task Timeouts", + "refId": "B" + }, + { + "expr": "sum(vendor:coherence_service_task_hung_count{cluster=\"$ClusterName\",name='$ServiceName'}) - sum(vendor:coherence_service_task_hung_count{cluster=\"$ClusterName\",name='$ServiceName'} offset $__interval)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Task Hangs", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Request/ Task Errors and Hangs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": "", + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "decimals": 0, + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, "decimals": 1, "description": "The rate of tasks for this service across all members.", "fill": 1, + "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 16 + "y": 24 }, + "hiddenSeries": false, "id": 4, "interval": "", "legend": { @@ -473,12 +593,14 @@ "dashes": false, "decimals": 0, "fill": 1, + "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 16 + "y": 24 }, + "hiddenSeries": false, "id": 36, "legend": { "alignAsTable": false, @@ -563,12 +685,14 @@ "dashes": false, "decimals": 0, "fill": 1, + "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 24 + "y": 32 }, + "hiddenSeries": false, "id": 22, "interval": "", "legend": { @@ -655,12 +779,14 @@ "dashes": false, "decimals": 0, "fill": 1, + "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 24 + "y": 32 }, + "hiddenSeries": false, "id": 23, "legend": { "alignAsTable": false, @@ -746,12 +872,14 @@ "dashes": false, "decimals": 1, "fill": 1, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, - "y": 32 + "y": 40 }, + "hiddenSeries": false, "id": 28, "legend": { "alignAsTable": false, @@ -838,12 +966,14 @@ "dashes": false, "decimals": 0, "fill": 1, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, - "y": 32 + "y": 40 }, + "hiddenSeries": false, "id": 14, "legend": { "alignAsTable": false, @@ -927,7 +1057,7 @@ "h": 2, "w": 24, "x": 0, - "y": 41 + "y": 49 }, "id": 35, "links": [], @@ -957,7 +1087,7 @@ "h": 5, "w": 4, "x": 0, - "y": 43 + "y": 51 }, "id": 9, "interval": null, @@ -1038,7 +1168,7 @@ "h": 5, "w": 4, "x": 4, - "y": 43 + "y": 51 }, "id": 12, "interval": null, @@ -1119,7 +1249,7 @@ "h": 5, "w": 4, "x": 8, - "y": 43 + "y": 51 }, "id": 13, "interval": null, @@ -1200,7 +1330,7 @@ "h": 5, "w": 4, "x": 12, - "y": 43 + "y": 51 }, "id": 11, "interval": null, @@ -1280,7 +1410,7 @@ "h": 5, "w": 4, "x": 16, - "y": 43 + "y": 51 }, "id": 32, "interval": null, @@ -1360,7 +1490,7 @@ "h": 5, "w": 4, "x": 20, - "y": 43 + "y": 51 }, "id": 31, "interval": null, @@ -1430,7 +1560,7 @@ "h": 9, "w": 12, "x": 0, - "y": 48 + "y": 56 }, "id": 30, "legend": { @@ -1536,7 +1666,7 @@ "h": 9, "w": 12, "x": 12, - "y": 48 + "y": 56 }, "id": 37, "legend": { @@ -1630,7 +1760,7 @@ "h": 7, "w": 24, "x": 0, - "y": 57 + "y": 65 }, "id": 8, "links": [], @@ -1822,7 +1952,7 @@ "h": 2, "w": 24, "x": 0, - "y": 64 + "y": 72 }, "id": 38, "links": [], @@ -1838,7 +1968,7 @@ "h": 8, "w": 24, "x": 0, - "y": 66 + "y": 74 }, "id": 42, "links": [], diff --git a/dashboards/grafana/services-summary-dashboard.json b/dashboards/grafana/services-summary-dashboard.json index 1e86acb54..82ce3c8e9 100644 --- a/dashboards/grafana/services-summary-dashboard.json +++ b/dashboards/grafana/services-summary-dashboard.json @@ -438,6 +438,120 @@ "alignLevel": null } }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 0, + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 18 + }, + "hiddenSeries": false, + "id": 98, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(vendor:coherence_service_request_timeout_count{cluster=\"$ClusterName\"}) - sum(vendor:coherence_service_request_timeout_count{cluster=\"$ClusterName\"} offset $__interval)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Request Timeouts", + "refId": "A" + }, + { + "expr": "sum(vendor:coherence_service_task_timeout_count{cluster=\"$ClusterName\"}) - sum(vendor:coherence_service_task_timeout_count{cluster=\"$ClusterName\"} offset $__interval)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Task Timeouts", + "refId": "B" + }, + { + "expr": "sum(vendor:coherence_service_task_hung_count{cluster=\"$ClusterName\"}) - sum(vendor:coherence_service_task_hung_count{cluster=\"$ClusterName\"} offset $__interval)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Hung Tasks", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Request/Task Timeouts - All Services", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "none", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "columns": [], "datasource": "prometheus", @@ -446,7 +560,7 @@ "h": 6, "w": 8, "x": 0, - "y": 18 + "y": 26 }, "id": 77, "links": [], @@ -561,7 +675,7 @@ "h": 6, "w": 8, "x": 8, - "y": 18 + "y": 26 }, "id": 78, "links": [], @@ -659,7 +773,7 @@ "h": 6, "w": 8, "x": 16, - "y": 18 + "y": 26 }, "id": 79, "links": [], @@ -758,7 +872,7 @@ "h": 7, "w": 12, "x": 0, - "y": 24 + "y": 32 }, "id": 48, "links": [], @@ -892,7 +1006,7 @@ "h": 7, "w": 12, "x": 12, - "y": 24 + "y": 32 }, "id": 93, "links": [],