diff --git a/docs/guides/latest/monitoring/grafana/index.md b/docs/guides/latest/monitoring/grafana/index.md index adb1edc3..5933bf19 100644 --- a/docs/guides/latest/monitoring/grafana/index.md +++ b/docs/guides/latest/monitoring/grafana/index.md @@ -1,3 +1,17 @@ +--- +title: Stash Grafana Dashboard | Stash +description: Using Stash Grafana Dashboard +menu: + docs_{{ .version }}: + identifier: monitoring-grafana-dashboard + name: Grafana Dashboard + parent: monitoring + weight: 30 +product_name: stash +menu_name: docs_{{ .version }} +section_menu_id: guides +--- + {{< notice type="warning" message="This is an Enterprise-only feature. You must be **Stash Enterprise** customer to use pre-built Stash Grafana dashboard." >}} # Stash Grafana Dashboard @@ -88,4 +102,4 @@ Once, you have successfully imported the dashboard, you should see the Stash das The following video gives a tour of the different components of the Stash Grafana dashboard. -{{< youtube VYx1OI1tgkE >}} +{{< youtube b5r9PDwl--U >}} diff --git a/docs/guides/latest/monitoring/overview/images/monitoring-structure.svg b/docs/guides/latest/monitoring/overview/images/monitoring-structure.svg index 145007af..db271dd2 100644 --- a/docs/guides/latest/monitoring/overview/images/monitoring-structure.svg +++ b/docs/guides/latest/monitoring/overview/images/monitoring-structure.svg @@ -9,966 +9,38 @@ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" version="1.1" - viewBox="0 0 787.53076 207.75351" + viewBox="0 0 784.05695 407.40213" stroke-miterlimit="10" - id="svg316" - sodipodi:docname="Stash Monitoring.svg" - width="787.53076" - height="207.75351" + id="svg535" + sodipodi:docname="monitoring-structure-3.svg" + width="784.05695" + height="407.40213" style="fill:none;stroke:none;stroke-linecap:square;stroke-miterlimit:10" inkscape:version="0.92.5 (2060ec1f9f, 2020-04-08)"> + id="metadata541"> image/svg+xml + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + id="defs539"> + id="clipPath1350"> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + style="opacity:0.50800003;fill:#00ff00" + id="rect1352" + width="784.05695" + height="407.40213" + x="88.825623" + y="53.167259" /> + inkscape:current-layer="svg535" /> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + style="fill:#000000;fill-opacity:0;fill-rule:evenodd" /> + style="fill:#efefef;fill-rule:nonzero" /> + id="g215" + style="filter:url(#shadowFilter-g5ce4a3f31a_1_0.11)"> + id="defs230"> + id="feGaussianBlur217" /> + id="feFuncR219" /> + id="feFuncG221" /> + id="feFuncB223" /> + id="feFuncA225" /> + id="g5ce4a3f31a_1_0.11"> + + + transform="matrix(0.1488,0,0,0.1488,726.43195,78.088081)" + id="g246"> + id="g5ce4a3f31a_1_0.12"> - - - - - + id="g250" + style="filter:url(#shadowFilter-g5ce4a3f31a_1_0.13)"> + id="defs265"> + id="feGaussianBlur252" /> + id="feFuncR254" /> + id="feFuncG256" /> + id="feFuncB258" /> + id="feFuncA260" /> + id="g5ce4a3f31a_1_0.13"> + style="fill:#3c78d8;fill-rule:evenodd" /> + + style="fill:#000000;fill-opacity:0;fill-rule:evenodd" /> + + + + + + + + + + + + + + + + style="fill:#000000;fill-opacity:0;fill-rule:evenodd" /> + + + id="g316" + style="filter:url(#shadowFilter-g5ce4a3f31a_1_0.14)"> + id="defs331"> + id="feGaussianBlur318" /> + id="feFuncR320" /> + id="feFuncG322" /> + id="feFuncB324" /> + id="feFuncA326" /> + id="g5ce4a3f31a_1_0.14"> + id="g338" + style="filter:url(#shadowFilter-g5ce4a3f31a_1_0.15)"> + id="defs353"> + id="feGaussianBlur340" /> + id="feFuncR342" /> + id="feFuncG344" /> + id="feFuncB346" /> + id="feFuncA348" /> + id="g5ce4a3f31a_1_0.15"> + transform="matrix(0.22982283,0,0,0.22982283,722.24957,411.6473)" + id="g362"> + id="g5ce4a3f31a_1_0.16"> + + + + + + + id="g383" + style="filter:url(#shadowFilter-g5ce4a3f31a_1_0.17)"> + id="defs398"> + id="feGaussianBlur385" /> + id="feFuncR387" /> + id="feFuncG389" /> + id="feFuncB391" /> + id="feFuncA393" /> + id="g5ce4a3f31a_1_0.17"> + style="fill:#78909c;fill-rule:evenodd" /> + style="fill:#ffffff;fill-rule:nonzero" /> + style="fill:#000000;fill-opacity:0;fill-rule:evenodd" /> + + + + id="g419" + style="filter:url(#shadowFilter-g5ce4a3f31a_1_0.18)"> + id="defs434"> + id="feGaussianBlur421" /> + intercept="0.34901962" + id="feFuncR423" /> + intercept="0.34901962" + id="feFuncG425" /> + intercept="0.34901962" + id="feFuncB427" /> + id="feFuncA429" /> + id="g5ce4a3f31a_1_0.18"> + style="fill:#d9ead3;fill-rule:evenodd" /> - - - - - - - + + width="100%" + height="100%" /> - - + + + + + + + + + + + - - - - + id="g5ce4a3f31a_1_0.19"> + - - + id="g463" + style="filter:url(#shadowFilter-g5ce4a3f31a_1_0.20)"> + id="defs478"> + id="feGaussianBlur465" /> + intercept="0.34901962" + id="feFuncR467" /> + intercept="0.34901962" + id="feFuncG469" /> + intercept="0.34901962" + id="feFuncB471" /> + id="feFuncA473" /> + id="g5ce4a3f31a_1_0.20"> - - - - - - - - - + d="m 401.15662,285.06802 h 51.34677 v 45.77826 c -25.67337,0 -25.67337,17.44238 -51.34677,7.53192 z" + id="path480" + inkscape:connector-curvature="0" + style="fill:#d9d2e9;fill-rule:evenodd" /> + id="g485" + style="filter:url(#shadowFilter-g5ce4a3f31a_1_0.21)"> + id="defs500"> + id="feGaussianBlur487" /> + intercept="0.34901962" + id="feFuncR489" /> + intercept="0.34901962" + id="feFuncG491" /> + intercept="0.34901962" + id="feFuncB493" /> + id="feFuncA495" /> + id="g5ce4a3f31a_1_0.21"> - + style="fill:#f4cccc;fill-rule:evenodd" /> - - - - - - - diff --git a/docs/guides/latest/monitoring/overview/index.md b/docs/guides/latest/monitoring/overview/index.md index 40885b59..00ef6281 100644 --- a/docs/guides/latest/monitoring/overview/index.md +++ b/docs/guides/latest/monitoring/overview/index.md @@ -20,7 +20,9 @@ Stash has native support for monitoring via [Prometheus](https://prometheus.io/) ## How Prometheus monitoring works -Stash uses [Prometheus PushGateway](https://github.com/prometheus/pushgateway) to export the metrics for backup & restore operations. The following diagram shows the logical structure of the Stash monitoring flow. +Stash monitoring metrics comes from two sources. The first one is [Prometheus PushGateway](https://github.com/prometheus/pushgateway) that running as sidecar of Stash operator pod. The backup and restore processes pushes their metrics in this pushgateway. The second metrics source is [Panopticon](https://blog.byte.builders/post/introducing-panopticon/) which is a generic state metric exporter for Kubernetes developed by AppsCode. It watches Stash CRDs and export necessary metrics. + +The following diagram shows the logical structure of the Stash monitoring flow.
Stash Monitoring Flow @@ -29,60 +31,69 @@ Stash uses [Prometheus PushGateway](https://github.com/prometheus/pushgateway) t Stash operator runs two containers. The `operator` container runs controllers and other necessary stuff and the `pushgateway` container runs [prom/pushgateway](https://hub.docker.com/r/prom/pushgateway) image. Stash sidecar from different workloads and backup/restore jobs pushes its metrics to this pushgateway. The pushgateway exposes the metrics at `/metrics` path of `:56789` port. Then, a Prometheus server scrapes these metrics through `stash` or `stash-enterprise` Service and acts as a data source of [Grafana](https://grafana.com/) dashboard. Stash operator itself also provides some valuable metrics at `/metrics` path of `:8443` port. +The Panopticon tool runs as a separate workload. It watches for Stash CRDs and exports relevant metrics. + ## Available Metrics Stash exports metrics for the backup process, restore process, repository status, etc. This section will list the metrics exported by Stash for different processes. ### Backup Metrics -This section lists the metrics Stash exports for the backup process. +This section lists the metrics available for Stash. Some of the metrics are only available for Stash Enterprise edition. **Backup Session Metrics:** A backup session represents a backup run. Stash exports the following metrics regarding the overall backup session. -| Metric Name | Usage | -| ---------------------------------------- | -------------------------------------------------------------------------------- | -| `stash_backup_session_success` | Indicates whether the entire backup session was succeeded or not | -| `stash_backup_target_count_total` | Indicates the total number of targets that were backed up in this backup session | -| `stash_backup_session_duration_seconds` | Indicates total time taken to complete the entire backup session | -| `stash_backup_last_success_time_seconds` | Indicates the time(in Unix epoch) when the last backup session was succeeded | +| Metric Name | Usage | Community | Enterprise | +| ---------------------------------------- | -------------------------------------------------------------------------------- | --------- | ---------- | +| `stash_backupsession_created` | Indicates the timestamp when the BackupSession was created | ✗ | ✓ | +| `stash_backupsession_info` | Metrics about the BackupSession owner, phase etc. | ✗ | ✓ | +| `stash_backup_session_success` | Indicates whether the entire backup session was succeeded or not | ✓ | ✓ | +| `stash_backup_target_count_total` | Indicates the total number of targets that were backed up in this backup session | ✓ | ✓ | +| `stash_backup_session_duration_seconds` | Indicates total time taken to complete the entire backup session | ✓ | ✓ | +| `stash_backup_last_success_time_seconds` | Indicates the time(in Unix epoch) when the last backup session was succeeded | ✓ | ✓ | **Backup Target Metrics:** In each backup session, Stash takes backup of one or more targets. Stash exports the following metrics for the individual backup target. -| Metric Name | Usage | -| ----------------------------------------------- | -------------------------------------------------------------------------------------- | -| `stash_backup_target_success` | Indicates whether the backup for a target has succeeded or not | -| `stash_backup_target_host_count_total` | Indicates the total number of hosts that was backed up for this target | -| `stash_backup_target_last_success_time_seconds` | Indicates the time (in Unix epoch) when the last backup was successful for this target | +| Metric Name | Usage | Community | Enterprise | +| ----------------------------------------------- | -------------------------------------------------------------------------------------- | --------- | ---------- | +| `stash_backupconfiguration_created` | Indicates the timestamp when the BackupConfiguration was created | ✗ | ✓ | +| `stash_backupconfiguration_info` | Metrics about backup target, schedule, driver etc. | ✗ | ✓ | +| `stash_backupconfiguration_conditions` | Metric about condition of backup setup | ✗ | ✓ | +| `stash_backup_target_success` | Indicates whether the backup for a target has succeeded or not | ✓ | ✓ | +| `stash_backup_target_host_count_total` | Indicates the total number of hosts that was backed up for this target | ✓ | ✓ | +| `stash_backup_target_last_success_time_seconds` | Indicates the time (in Unix epoch) when the last backup was successful for this target | ✓ | ✓ | + **Backup Host Metrics:** Stash may take a backup of multiple hosts for a single target. The following metrics are available for the individual backup hosts. -| Metric Name | Usage | -| ------------------------------------------------ | ---------------------------------------------------------------------------- | -| `stash_backup_host_backup_success` | Indicates whether the backup for a host succeeded or not | -| `stash_backup_host_data_size_bytes` | Total size of the target data to backup for a host (in bytes) | -| `stash_backup_host_data_uploaded_bytes` | Amount of data uploaded to the repository for a host (in bytes) | -| `stash_backup_host_files_total` | Total number of files that has been backed up for a host | -| `stash_backup_host_files_new` | Total number of new files that has been created since last backup for a host | -| `stash_backup_host_files_modified` | Total number of files that has been modified since last backup for a host | -| `stash_backup_host_files_unmodified` | Total number of files that has not been changed since last backup for a host | -| `stash_backup_host_backup_duration_seconds` | Indicates total time taken to complete the backup process for a host | -| `stash_backup_host_data_processing_time_seconds` | Total time taken to process the target data for a host | +| Metric Name | Usage | Community | Enterprise | +| ------------------------------------------------ | ---------------------------------------------------------------------------- | --------- | ---------- | +| `stash_backup_host_backup_success` | Indicates whether the backup for a host succeeded or not | ✓ | ✓ | +| `stash_backup_host_data_size_bytes` | Total size of the target data to backup for a host (in bytes) | ✓ | ✓ | +| `stash_backup_host_data_uploaded_bytes` | Amount of data uploaded to the repository for a host (in bytes) | ✓ | ✓ | +| `stash_backup_host_files_total` | Total number of files that has been backed up for a host | ✓ | ✓ | +| `stash_backup_host_files_new` | Total number of new files that has been created since last backup for a host | ✓ | ✓ | +| `stash_backup_host_files_modified` | Total number of files that has been modified since last backup for a host | ✓ | ✓ | +| `stash_backup_host_files_unmodified` | Total number of files that has not been changed since last backup for a host | ✓ | ✓ | +| `stash_backup_host_backup_duration_seconds` | Indicates total time taken to complete the backup process for a host | ✓ | ✓ | +| `stash_backup_host_data_processing_time_seconds` | Total time taken to process the target data for a host | ✓ | ✓ | ### Repository Metrics Stash exports the following metrics for a repository. -| Metric Name | Usage | -| ----------------------------------- | ----------------------------------------------------------------------------------------------------- | -| `stash_repository_integrity` | Result of repository integrity check after the last backup | -| `stash_repository_size_bytes` | Indicates size of repository after last backup (in bytes) | -| `stash_repository_snapshot_count` | Indicates the number of snapshots stored in the repository | -| `stash_repository_snapshot_cleaned` | Indicates the number of old snapshots cleaned up according to retention policy on last backup session | +| Metric Name | Usage | Community | Enterprise | +| ----------------------------------- | ----------------------------------------------------------------------------------------------------- | --------- | ---------- | +| `stash_repository_created` | Indicates the timestamp when the Repository has been created | ✗ | ✓ | +| `stash_repository_integrity` | Result of repository integrity check after the last backup | ✓ | ✓ | +| `stash_repository_size_bytes` | Indicates size of repository after last backup (in bytes) | ✓ | ✓ | +| `stash_repository_snapshot_count` | Indicates the number of snapshots stored in the repository | ✓ | ✓ | +| `stash_repository_snapshot_cleaned` | Indicates the number of old snapshots cleaned up according to retention policy on last backup session | ✓ | ✓ | ### Restore Metrics @@ -92,29 +103,31 @@ This section lists the metrics Stash exports for the restore process. A restore session represents a restore run. Stash exports the following metrics regarding the overall restore process. -| Metric Name | Usage | -| ---------------------------------------- | -------------------------------------------------------------------------------- | -| `stash_restore_session_success` | Indicates whether the entire restore session was succeeded or not | -| `stash_restore_session_duration_seconds` | Indicates the total time taken to complete the entire restore session | -| `stash_restore_target_count_total` | Indicates the total number of targets that were restored in this restore session | +| Metric Name | Usage | Community | Enterprise | +| ---------------------------------------- | -------------------------------------------------------------------------------- | --------- | ---------- | +| `stash_restoresession_created` | Indicates the timestamp when the RestoreSession has been created | ✗ | ✓ | +| `stash_restoresession_info` | Metrics about RestoreSession's target, phase etc | ✗ | ✓ | +| `stash_restore_session_success` | Indicates whether the entire restore session was succeeded or not | ✓ | ✓ | +| `stash_restore_session_duration_seconds` | Indicates the total time taken to complete the entire restore session | ✓ | ✓ | +| `stash_restore_target_count_total` | Indicates the total number of targets that were restored in this restore session | ✓ | ✓ | **Restore Target Metrics:** Stash restore one or more targets in each restore run. Stash exports the following metrics regarding a restore target. -| Metric Name | Usage | -| --------------------------------------- | ------------------------------------------------------------------------------ | -| `stash_restore_target_success` | Indicates whether the restore for a target has succeeded or not | -| `stash_restore_target_host_count_total` | Indicates the total number of hosts that were restored for this restore target | +| Metric Name | Usage | Community | Enterprise | +| --------------------------------------- | ------------------------------------------------------------------------------ | --------- | ---------- | +| `stash_restore_target_success` | Indicates whether the restore for a target has succeeded or not | ✓ | ✓ | +| `stash_restore_target_host_count_total` | Indicates the total number of hosts that were restored for this restore target | ✓ | ✓ | **Restore Host Metrics:** Stash may restore multiple hosts for a single target. The following metrics are available for the individual restore host. -| Metric Name | Usage | -| --------------------------------------------- | ------------------------------------------------------------------------- | -| `stash_restore_host_restore_success` | Indicates whether the restore process was succeeded for a host | -| `stash_restore_host_restore_duration_seconds` | Indicates the total time taken to complete the restore process for a host | +| Metric Name | Usage | Community | Enterprise | +| --------------------------------------------- | ------------------------------------------------------------------------- | --------- | ---------- | +| `stash_restore_host_restore_success` | Indicates whether the restore process was succeeded for a host | ✓ | ✓ | +| `stash_restore_host_restore_duration_seconds` | Indicates the total time taken to complete the restore process for a host | ✓ | ✓ | ### Operator Metrics @@ -198,8 +211,8 @@ The Pushgateway itself also exports some metrics related to Pushgateway build in You have to enable Prometheus monitoring during installing / upgrading Stash. The following parameters are available to configure monitoring in Stash. -| Helm Values | Acceptable Values | Default | Usage | -| -------------------------------------------------- | --------------------------------------------------- | ---------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Helm Values | Acceptable Values | Default | Usage | +| --------------------------------------------------- | --------------------------------------------------- | ---------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `stash-enterprise.monitoring.agent` | `prometheus.io/builtin` or `prometheus.io/operator` | `none` | Specify which monitoring agent to use for monitoring Stash. | | `stash-enterprise.monitoring.backup` | `true` or `false` | `false` | Specify whether to monitor Stash backup and restore. | | `stash-enterprise.monitoring.operator` | `true` or `false` | `false` | Specify whether to monitor Stash operator. |