diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 93afc317..c9a658b4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,4 +23,4 @@ jobs: - name: Check links run: | - liche -r docs -d $(pwd) -c 10 -p -h -l -x '^('"$(pwd)"'/docs/addons/.*/guides/.*|.*github.com.*|.*api.slack.com.*|.*askapache.com.*)$' + liche -r docs -d $(pwd) -c 10 -p -h -l -x '^('"$(pwd)"'/docs/addons/.*/guides/.*|.*github.com.*|.*api.slack.com.*|.*askapache.com.*|.*twitter.com.*)$' diff --git a/docs/examples/monitoring/coreos/prometheus-service.yaml b/docs/examples/monitoring/coreos/prometheus-service.yaml deleted file mode 100644 index bf9b1c75..00000000 --- a/docs/examples/monitoring/coreos/prometheus-service.yaml +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: prometheus - namespace: monitoring -spec: - type: ClusterIP - ports: - - name: web - port: 9090 - protocol: TCP - targetPort: 9090 - selector: - app: prometheus diff --git a/docs/examples/monitoring/coreos/prometheus.yaml b/docs/examples/monitoring/coreos/prometheus.yaml deleted file mode 100644 index 5e78946e..00000000 --- a/docs/examples/monitoring/coreos/prometheus.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: Prometheus -metadata: - name: prometheus - namespace: monitoring - labels: - k8s-app: prometheus -spec: - replicas: 1 - serviceAccountName: prometheus - serviceMonitorSelector: - matchLabels: - k8s-app: prometheus - secrets: - - stash-apiserver-cert - resources: - requests: - memory: 400Mi diff --git a/docs/examples/monitoring/profiler.yaml b/docs/examples/monitoring/profiler.yaml deleted file mode 100644 index 1e1a3503..00000000 --- a/docs/examples/monitoring/profiler.yaml +++ /dev/null @@ -1,20 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: appscode:system:profiler -rules: -- nonResourceURLs: ["/debug/pprof/", "/debug/pprof/*"] - verbs: ["get", "post"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: appscode:system:profiler -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: appscode:system:profiler -subjects: -- apiGroup: rbac.authorization.k8s.io - kind: User - name: system:anonymous diff --git a/docs/guides/latest/monitoring/builtin.md b/docs/guides/latest/monitoring/builtin.md deleted file mode 100644 index c76400f0..00000000 --- a/docs/guides/latest/monitoring/builtin.md +++ /dev/null @@ -1,363 +0,0 @@ ---- -title: Builtin Prometheus | Stash -description: Monitor Stash using official Prometheus server -menu: - docs_{{ .version }}: - identifier: monitoring-builtin - name: Builtin Prometheus - parent: monitoring - weight: 20 -product_name: stash -menu_name: docs_{{ .version }} -section_menu_id: guides ---- - -# Monitoring Stash with builtin Prometheus - -This tutorial will show you how to configure builtin [Prometheus](https://github.com/prometheus/prometheus) scraper to monitor Stash backup and restore operations as well as Stash operator. - -## Before You Begin - -At first, you need to have a Kubernetes cluster, and the kubectl command-line tool must be configured to communicate with your cluster. If you do not already have a cluster, you can create one by using [kind](https://kind.sigs.k8s.io/docs/user/quick-start/). - -To keep Prometheus resources isolated, we are going to use a separate namespace to deploy Prometheus server. - -```bash -$ kubectl create ns monitoring -namespace/monitoring created -``` - -## Enable Monitoring in Stash - -Enable Prometheus monitoring using `prometheus.io/builtin` agent while installing Stash. To know details about how to enable monitoring see [here](/docs/guides/v1alpha1/monitoring/overview.md#how-to-enable-monitoring). Here, we are going to enable monitoring for `backup`, `restore` and `operator` metrics using Helm 3. - -```bash -$ helm install stash-operator appscode/stash --version {{< param "info.version" >}} \ - --namespace kube-system \ - --set monitoring.agent=prometheus.io/builtin \ - --set monitoring.backup=true \ - --set monitoring.operator=true \ - --set monitoring.prometheus.namespace=monitoring -``` - -This will add necessary annotations to `stash-operator` service. Prometheus server will scrape metrics using those annotations. Let's check which annotations are added to the service, - -```yaml -$ kubectl get service -n kube-system stash-operator -o yaml -apiVersion: v1 -kind: Service -metadata: - annotations: - kubectl.kubernetes.io/last-applied-configuration: | - {"apiVersion":"v1","kind":"Service","metadata":{"annotations":{},"labels":{"app":"stash"},"name":"stash-operator","namespace":"kube-system"},"spec":{"ports":[{"name":"api","port":443,"targetPort":8443},{"name":"pushgateway","port":56789,"targetPort":56789}],"selector":{"app":"stash"}}} - prometheus.io/operator_path: /metrics - prometheus.io/operator_port: "8443" - prometheus.io/operator_scheme: https - prometheus.io/pushgateway_path: /metrics - prometheus.io/pushgateway_port: "56789" - prometheus.io/pushgateway_scheme: http - prometheus.io/scrape: "true" - creationTimestamp: 2018-11-07T04:10:26Z - labels: - app: stash - name: stash-operator - namespace: kube-system - resourceVersion: "1649" - selfLink: /api/v1/namespaces/kube-system/services/stash-operator - uid: 0e73664a-e243-11e8-a768-080027767ca3 -spec: - clusterIP: 10.105.200.228 - ports: - - name: api - port: 443 - protocol: TCP - targetPort: 8443 - - name: pushgateway - port: 56789 - protocol: TCP - targetPort: 56789 - selector: - app: stash - sessionAffinity: None - type: ClusterIP -status: - loadBalancer: {} -``` - -Here, `prometheus.io/scrape: "true"` annotation indicates that Prometheus should scrape metrics for this service. - -The following three annotations point to `pushgateway` endpoints which provides backup and restore metrics. - -```ini -prometheus.io/pushgateway_path: /metrics -prometheus.io/pushgateway_port: "56789" -prometheus.io/pushgateway_scheme: http -``` - -The following three annotations point to `api` endpoints which provides operator specific metrics. - -```ini -prometheus.io/operator_path: /metrics -prometheus.io/operator_port: "8443" -prometheus.io/operator_scheme: https -``` - -Now, we are ready to configure our Prometheus server to scrape those metrics. - -## Deploy Prometheus Server - -We have deployed Stash in `kube-system` namespace. Stash exports operator metrics via TLS secured `api` endpoint. So, Prometheus server need to provide certificate while scraping metrics from this endpoint. Stash has created a secret named `stash-apiserver-certs` with this certificate in `monitoring` namespace as we have specified that we are going to deploy Prometheus in that namespace through `--prometheus-namespace` flag. We have to mount this secret in Prometheus deployment. - -Let's check `stash-apiserver-cert` certificate has been created in `monitoring` namespace. - -```bash -$ kubectl get secret -n monitoring -l=app=stash -NAME TYPE DATA AGE -stash-apiserver-cert kubernetes.io/tls 2 2m21s -``` - -**Create RBAC:** - -If you are using a RBAC enabled cluster, you have to give necessary RBAC permissions for Prometheus. Let's create necessary RBAC stuffs for Prometheus, - -```bash -$ kubectl apply -f https://github.com/stashed/docs/raw/{{< param "info.version" >}}/docs/examples/guides/latest/monitoring/builtin/prom-rbac.yaml -clusterrole.rbac.authorization.k8s.io/stash-prometheus-server created -serviceaccount/stash-prometheus-server created -clusterrolebinding.rbac.authorization.k8s.io/stash-prometheus-server created -``` - -**Create ConfigMap:** - -Now, create a ConfigMap with necessary scraping configuration. Bellow, the YAML of ConfigMap that we are going to create in this tutorial. - -```yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: stash-prometheus-server-conf - labels: - name: stash-prometheus-server-conf - namespace: monitoring -data: - prometheus.yml: |- - global: - scrape_interval: 30s - scrape_timeout: 10s - evaluation_interval: 30s - scrape_configs: - - job_name: stash-pushgateway - scrape_interval: 30s - scrape_timeout: 10s - metrics_path: /metrics - scheme: http - honor_labels: true - kubernetes_sd_configs: - - role: endpoints - relabel_configs: - - source_labels: [__meta_kubernetes_service_label_app] - regex: stash # default label for stash-operator service is "app: stash". customize this field according to label of stash-operator service of your setup. - action: keep - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] - regex: true - action: keep - - source_labels: [__meta_kubernetes_endpoint_port_name] - regex: pushgateway - action: keep - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_pushgateway_path] - regex: (.+) - target_label: __metrics_path__ - action: replace - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_pushgateway_scheme] - action: replace - target_label: __scheme__ - regex: (https?) - - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_pushgateway_port] - action: replace - target_label: __address__ - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:$2 - - source_labels: [__meta_kubernetes_namespace] - separator: ; - regex: (.*) - target_label: namespace - replacement: $1 - action: replace - - source_labels: [__meta_kubernetes_service_name] - separator: ; - regex: (.*) - target_label: service - replacement: $1 - action: replace - - job_name: stash-operator - scrape_interval: 30s - scrape_timeout: 10s - metrics_path: /metrics - scheme: https - kubernetes_sd_configs: - - role: endpoints - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - tls_config: - ca_file: /etc/prometheus/secret/stash-apiserver-cert/tls.crt - server_name: stash-operator.kube-system.svc - relabel_configs: - - source_labels: [__meta_kubernetes_service_label_app] - regex: stash # default label for stash-operator service is "app: stash". customize this field according to label of stash-operator service of your setup. - action: keep - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] - regex: true - action: keep - - source_labels: [__meta_kubernetes_endpoint_port_name] - regex: api - action: keep - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_operator_path] - regex: (.+) - target_label: __metrics_path__ - action: replace - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_operator_scheme] - action: replace - target_label: __scheme__ - regex: (https?) - - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_operator_port] - action: replace - target_label: __address__ - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:$2 - - source_labels: [__meta_kubernetes_namespace] - separator: ; - regex: (.*) - target_label: namespace - replacement: $1 - action: replace - - source_labels: [__meta_kubernetes_service_name] - separator: ; - regex: (.*) - target_label: service - replacement: $1 - action: replace -``` - -Here, we have two scraping job. One is `stash-pushgateway` that scrapes backup and restore metrics and another is `stash-operator` which scrapes operator metrics. - -Look at the `tls_config` field of `stash-operator` job. We have provided certificate file through `ca_file` field. This certificate comes from `stash-apiserver-cert` that we are going to mount in Prometheus deployment. Here, `server_name` is used to verify hostname. In our case, the certificate is valid for hostname `server` and `stash-operator.kube-system.svc`. - -Also note that, we have provided a bearer-token file through `bearer_token_file` field. This file is token for `stash-prometheus-server` serviceaccount that we have created while creating RBAC stuffs. This is required for authorizing Prometheus to Stash API Server. - -Let's create the ConfigMap we have shown above, - -```bash -$ kubectl apply -f https://github.com/stashed/docs/raw/{{< param "info.version" >}}/docs/examples/guides/latest/monitoring/builtin/prom-config.yaml -configmap/stash-prometheus-server-conf created -``` - -**Deploy Prometheus:** - -Now, we are ready to deploy Prometheus server. YAML for the deployment that we are going to create for Prometheus is shown below. - -```yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: stash-prometheus-server - namespace: monitoring -spec: - replicas: 1 - selector: - matchLabels: - app: prometheus - template: - metadata: - labels: - app: prometheus - spec: - serviceAccountName: stash-prometheus-server - containers: - - name: prometheus - image: prom/prometheus:v2.4.3 - args: - - "--config.file=/etc/prometheus/prometheus.yml" - - "--storage.tsdb.path=/prometheus/" - ports: - - containerPort: 9090 - volumeMounts: - - name: prometheus-config-volume - mountPath: /etc/prometheus/ - - name: prometheus-storage-volume - mountPath: /prometheus/ - - name: stash-apiserver-cert - mountPath: /etc/prometheus/secret/stash-apiserver-cert - volumes: - - name: prometheus-config-volume - configMap: - defaultMode: 420 - name: stash-prometheus-server-conf - - name: prometheus-storage-volume - emptyDir: {} - - name: stash-apiserver-cert - secret: - defaultMode: 420 - secretName: stash-apiserver-cert - items: # avoid mounting private key - - key: tls.crt - path: tls.crt -``` - -Notice that, we have mounted `stash-apiserver-cert` secret as a volume at `/etc/prometheus/secret/stash-apiserver-cert` directory. - -Now, let's create the deployment, - -```bash -$ kubectl apply -f https://github.com/stashed/docs/raw/{{< param "info.version" >}}/docs/examples/guides/latest/monitoring/builtin/prom-deployment.yaml -deployment.apps/stash-prometheus-server created -``` - -### Verify Monitoring Metrics - -Prometheus server is running on port `9090`. We are going to use [port forwarding](https://kubernetes.io/docs/tasks/access-application-cluster/port-forward-access-application-cluster/) to access Prometheus dashboard. Run following command on a separate terminal, - -```bash -$ kubectl port-forward -n monitoring stash-prometheus-server-9ddbf79b6-8l6hk 9090 -Forwarding from 127.0.0.1:9090 -> 9090 -Forwarding from [::1]:9090 -> 9090 -``` - -Now, we can access the dashboard at `localhost:9090`. Open [http://localhost:9090](http://localhost:9090) in your browser. You should see `pushgateway` and `api` endpoints of `stash-operator` service as targets. - -
-  Stash Monitoring Flow -
Fig: Prometheus dashboard
-
- -**Backup and Restore Metrics:** - -When you perform a backup or restore using Stash, it will send respective Prometheus metrics. You can check if the metrics have been sent successfully by performing backup and restore as described [here](/docs/guides/latest/workloads/deployment.md). - -A screenshot that shows Prometheus metrics send by Stash backup and restore process is given below, - -
-  Stash Monitoring Flow -
Fig: Stash Backup and Restore metrics
-
- -## Cleanup - -To cleanup the Kubernetes resources created by this tutorial, run: - -```bash -kubectl delete clusterrole stash-prometheus-server -kubectl delete clusterrolebinding stash-prometheus-server - -kubectl delete serviceaccount/stash-prometheus-server -n monitoring -kubectl delete configmap/stash-prometheus-server-conf -n monitoring -kubectl delete deployment stash-prometheus-server -n monitoring -kubectl delete secret stash-apiserver-cert -n monitoring - -kubectl delete ns monitoring -``` - -To uninstall Stash follow this [guide](/docs/setup/README.md). - -## Next Steps - -- Learn how monitoring in Stash works from [here](/docs/guides/latest/monitoring/overview.md). -- Learn how to monitor Stash using Prometheus operator from [here](/docs/guides/latest/monitoring/coreos.md). diff --git a/docs/guides/latest/monitoring/coreos.md b/docs/guides/latest/monitoring/coreos.md deleted file mode 100644 index 3e51083e..00000000 --- a/docs/guides/latest/monitoring/coreos.md +++ /dev/null @@ -1,198 +0,0 @@ ---- -title: CoreOS Prometheus Operator | Stash -description: Monitor Stash using Prometheus operator -menu: - docs_{{ .version }}: - identifier: monitoring-coreos-operator - name: Prometheus Operator - parent: monitoring - weight: 30 -product_name: stash -menu_name: docs_{{ .version }} -section_menu_id: guides ---- - -# Monitoring Using CoreOS Prometheus Operator - -CoreOS [prometheus-operator](https://github.com/coreos/prometheus-operator) provides simple and Kubernetes native way to deploy and configure Prometheus server. This tutorial will show you how to use Prometheus operator for monitoring Stash. - -## Before You Begin - -- At first, you need to have a Kubernetes cluster, and the kubectl command-line tool must be configured to communicate with your cluster. If you do not already have a cluster, you can create one by using [kind](https://kind.sigs.k8s.io/docs/user/quick-start/). - -- To keep Prometheus resources isolated, we are going to use a separate namespace to deploy Prometheus operator and respective resources. - - ```bash - $ kubectl create ns monitoring - namespace/monitoring created - ``` - -- We need a CoreOS prometheus-operator instance running. If you already don't have a running instance, deploy one following the docs from [here](https://github.com/appscode/third-party-tools/blob/master/monitoring/prometheus/coreos-operator/README.md). - -## Enable Monitoring in Stash - -Enable Prometheus monitoring using `prometheus.io/operator` agent while installing Stash. To know details about how to enable monitoring see [here](/docs/guides/latest/monitoring/overview.md#how-to-enable-monitoring). - -Here, we are going to enable monitoring for both `backup`, `restore` and `operator` metrics using Helm 3. - -```bash -$ helm install stash-operator appscode/stash --version {{< param "info.version" >}} \ - --namespace kube-system \ - --set monitoring.agent=prometheus.io/operator \ - --set monitoring.backup=true \ - --set monitoring.operator=true \ - --set monitoring.prometheus.namespace=monitoring \ - --set monitoring.serviceMonitor.labels.k8s-app=prometheus -``` - -This will create a `ServiceMonitor` crd with name `stash-servicemonitor` in monitoring namespace for monitoring endpoints of `stash-operator` service. This ServiceMonitor will have label `k8s-app: prometheus` provided by `--servicemonitor-label` flag. This label will be used by Prometheus crd to select this ServiceMonitor. - -Let's check the ServiceMonitor crd using following command, - -```yaml -$ kubectl get servicemonitor stash-servicemonitor -n monitoring -o yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - annotations: - kubectl.kubernetes.io/last-applied-configuration: | - {"apiVersion":"monitoring.coreos.com/v1","kind":"ServiceMonitor","metadata":{"annotations":{},"labels":{"k8s-app":"prometheus"},"name":"stash-servicemonitor","namespace":"monitoring"},"spec":{"endpoints":[{"honorLabels":true,"port":"pushgateway"},{"bearerTokenFile":"/var/run/secrets/kubernetes.io/serviceaccount/token","port":"api","scheme":"https","tlsConfig":{"caFile":"/etc/prometheus/secrets/stash-apiserver-cert/tls.crt","serverName":"stash-operator.kube-system.svc"}}],"namespaceSelector":{"matchNames":["kube-system"]},"selector":{"matchLabels":{"app":"stash"}}}} - creationTimestamp: 2018-11-21T09:35:37Z - generation: 1 - labels: - k8s-app: prometheus - name: stash-servicemonitor - namespace: monitoring - resourceVersion: "6126" - selfLink: /apis/monitoring.coreos.com/v1/namespaces/monitoring/servicemonitors/stash-servicemonitor - uid: cd6cca14-ed70-11e8-8838-0800272dd258 -spec: - endpoints: - - honorLabels: true - port: pushgateway - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - port: api - scheme: https - tlsConfig: - caFile: /etc/prometheus/secrets/stash-apiserver-cert/tls.crt - serverName: stash-operator.kube-system.svc - namespaceSelector: - matchNames: - - kube-system - selector: - matchLabels: - app: stash -``` - -Here, we have two endpoints at `spec.endpoints` field. One is `pushgateway` that exports backup and recovery metrics and another is `api` which exports operator metrics. - -Stash exports operator metrics via TLS secured `api` endpoint. So, Prometheus server need to provide certificate while scraping metrics from this endpoint. Stash has created a secret named `stash-apiserver-certs` with this certificate in `monitoring` namespace as we have specified that we are going to deploy Prometheus in that namespace through `--prometheus-namespace` flag. We have to specify this secret in Prometheus crd through `spec.secrets` field. Prometheus operator will mount this secret at `/etc/prometheus/secrets/stash-apiserver-cert` directory of respective Prometheus pod. So, we need to configure `tlsConfig` field to use that certificate. Here, `caFile` indicates the certificate to use and `serverName` is used to verify hostname. In our case, the certificate is valid for hostname `server` and `stash-operator.kube-system.svc`. - -Let's check secret `stash-apiserver-cert` has been created in monitoring namespace. - -```bash -$ kubectl get secret -n monitoring -l=app=stash -NAME TYPE DATA AGE -stash-apiserver-cert kubernetes.io/tls 2 31m -``` - -Also note that, there is a `bearerTokenFile` field. This file is token for the serviceaccount that will be created while creating RBAC stuff for Prometheus crd. This is required for authorizing Prometheus to scrape Stash API server. - -Now, we are ready to deploy Prometheus server. - -## Deploy Prometheus Server - -In order to deploy Prometheus server, we have to create `Prometheus` crd. Prometheus crd defines a desired Prometheus server setup. For more details about `Prometheus` crd, please visit [here](https://github.com/coreos/prometheus-operator/blob/master/Documentation/design.md#prometheus). - -If you are using a RBAC enabled cluster, you have to give necessary permissions to Prometheus. Check the documentation to see required RBAC permission from [here](https://github.com/appscode/third-party-tools/blob/master/monitoring/prometheus/coreos-operator/README.md#deploy-prometheus-server). - -**Create Prometheus:** - -Below is the YAML of `Prometheus` crd that we are going to create for this tutorial, - -```yaml -apiVersion: monitoring.coreos.com/v1 -kind: Prometheus -metadata: - name: prometheus - namespace: monitoring - labels: - k8s-app: prometheus -spec: - replicas: 1 - serviceAccountName: prometheus - serviceMonitorSelector: - matchLabels: - k8s-app: prometheus - secrets: - - stash-apiserver-cert - resources: - requests: - memory: 400Mi -``` - -Here, `spec.serviceMonitorSelector` is used to select the `ServiceMonitor` crd that is created by Stash. We have provided `stash-apiserver-cert` secret in `spec.secrets` field. This will be mounted in Prometheus pod. - -Let's create the `Prometheus` object we have shown above, - -```bash -$ kubectl apply -f https://github.com/stashed/docs/raw/{{< param "info.version" >}}/docs/examples/guides/latest/monitoring/coreos/prometheus.yaml -prometheus.monitoring.coreos.com/prometheus created -``` - -Prometheus operator watches for `Prometheus` crd. Once a `Prometheus` crd is created, Prometheus operator generates respective configuration and creates a StatefulSet to run Prometheus server. - -Let's check StatefulSet has been created, - -```bash -$ kubectl get statefulset -n monitoring -NAME DESIRED CURRENT AGE -prometheus-prometheus 1 1 4m -``` - -Check StatefulSet's pod is running, - -```bash -$ kubectl get pod prometheus-prometheus-0 -n monitoring -NAME READY STATUS RESTARTS AGE -prometheus-prometheus-0 2/2 Running 0 6m -``` - -Now, we are ready to access Prometheus dashboard. - -### Verify Monitoring Metrics - -Prometheus server is running on port `9090`. We are going to use [port forwarding](https://kubernetes.io/docs/tasks/access-application-cluster/port-forward-access-application-cluster/) to access Prometheus dashboard. Run following command on a separate terminal, - -```bash -$ kubectl port-forward -n monitoring prometheus-prometheus-0 9090 -Forwarding from 127.0.0.1:9090 -> 9090 -Forwarding from [::1]:9090 -> 9090 -``` - -Now, we can access the dashboard at `localhost:9090`. Open [http://localhost:9090](http://localhost:9090) in your browser. You should see `pushgateway` and `api` endpoints of `stash-operator` service as target. - -
-  Stash Monitoring Flow -
Fig: Prometheus dashboard
-
- -## Cleanup - -To cleanup the Kubernetes resources created by this tutorial, run: - -```bash -# cleanup Prometheus resources -kubectl delete -n monitoring prometheus prometheus -kubectl delete -n monitoring secret stash-apiserver-cert - -# delete namespace -kubectl delete ns monitoring -``` - -To uninstall Stash follow this [guide](/docs/setup/README.md). - -## Next Steps - -- Learn how monitoring in Stash works from [here](/docs/guides/latest/monitoring/overview.md). -- Learn how to monitor Stash using builtin Prometheus from [here](/docs/guides/latest/monitoring/builtin.md). diff --git a/docs/examples/monitoring/builtin/prom-config.yaml b/docs/guides/latest/monitoring/examples/prom-config.yaml similarity index 85% rename from docs/examples/monitoring/builtin/prom-config.yaml rename to docs/guides/latest/monitoring/examples/prom-config.yaml index 6cc0a2af..e2031712 100644 --- a/docs/examples/monitoring/builtin/prom-config.yaml +++ b/docs/guides/latest/monitoring/examples/prom-config.yaml @@ -21,8 +21,8 @@ data: kubernetes_sd_configs: - role: endpoints relabel_configs: - - source_labels: [__meta_kubernetes_service_label_app] - regex: stash # default label for stash-operator service is "app: stash". customize this field according to label of stash-operator service of your setup. + - source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_instance] + regex: stash # default label for stash Service is "app.kubernetes.io/instance: stash". customize this field according to label of stash Service of your setup. action: keep - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] regex: true @@ -65,10 +65,10 @@ data: bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token tls_config: ca_file: /etc/prometheus/secret/stash-apiserver-cert/tls.crt - server_name: stash-operator.kube-system.svc + server_name: stash.kube-system.svc relabel_configs: - - source_labels: [__meta_kubernetes_service_label_app] - regex: stash # default label for stash-operator service is "app: stash". customize this field according to label of stash-operator service of your setup. + - source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_instance] + regex: stash # default label for stash Service is "app.kubernetes.io/instance: stash". customize this field according to label of stash Service of your setup. action: keep - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] regex: true diff --git a/docs/examples/monitoring/builtin/prom-deployment.yaml b/docs/guides/latest/monitoring/examples/prom-deployment.yaml similarity index 96% rename from docs/examples/monitoring/builtin/prom-deployment.yaml rename to docs/guides/latest/monitoring/examples/prom-deployment.yaml index ade55c79..b509dc7e 100644 --- a/docs/examples/monitoring/builtin/prom-deployment.yaml +++ b/docs/guides/latest/monitoring/examples/prom-deployment.yaml @@ -16,7 +16,7 @@ spec: serviceAccountName: stash-prometheus-server containers: - name: prometheus - image: prom/prometheus:v2.4.3 + image: prom/prometheus:v2.23.0 args: - "--config.file=/etc/prometheus/prometheus.yml" - "--storage.tsdb.path=/prometheus/" diff --git a/docs/examples/monitoring/builtin/prom-rbac.yaml b/docs/guides/latest/monitoring/examples/prom-rbac.yaml similarity index 100% rename from docs/examples/monitoring/builtin/prom-rbac.yaml rename to docs/guides/latest/monitoring/examples/prom-rbac.yaml diff --git a/docs/guides/latest/monitoring/images/monitoring-structure.svg b/docs/guides/latest/monitoring/images/monitoring-structure.svg new file mode 100644 index 00000000..145007af --- /dev/null +++ b/docs/guides/latest/monitoring/images/monitoring-structure.svg @@ -0,0 +1,1891 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/guides/latest/monitoring/prom-builtin-target.png b/docs/guides/latest/monitoring/images/prom_builtin_target.png similarity index 100% rename from docs/images/guides/latest/monitoring/prom-builtin-target.png rename to docs/guides/latest/monitoring/images/prom_builtin_target.png diff --git a/docs/guides/latest/monitoring/images/prom_operator_web_ui.png b/docs/guides/latest/monitoring/images/prom_operator_web_ui.png new file mode 100644 index 00000000..dd21d916 Binary files /dev/null and b/docs/guides/latest/monitoring/images/prom_operator_web_ui.png differ diff --git a/docs/guides/latest/monitoring/overview.md b/docs/guides/latest/monitoring/overview.md index 8d42d0a3..df80a288 100644 --- a/docs/guides/latest/monitoring/overview.md +++ b/docs/guides/latest/monitoring/overview.md @@ -16,174 +16,289 @@ section_menu_id: guides # Monitoring Stash -Stash has native support for monitoring via [Prometheus](https://prometheus.io/). You can use builtin [Prometheus](https://github.com/prometheus/prometheus) scraper or [CoreOS Prometheus Operator](https://github.com/coreos/prometheus-operator) to monitor Stash. This tutorial will show you how this monitoring works with Stash and how to enable them. +Stash has native support for monitoring via [Prometheus](https://prometheus.io/). You can use builtin [Prometheus](https://github.com/prometheus/prometheus) scraper or [prometheus-operator](https://github.com/prometheus-operator/prometheus-operator) to monitor Stash. This tutorial will show you how Prometheus monitoring works with Stash, what metrics Stash exports, and how to enable monitoring. -## Overview +## How Prometheus monitoring works -Stash uses [Prometheus PushGateway](https://github.com/prometheus/pushgateway) to export the metrics for backup & restore operations. Following diagram shows the logical structure of Stash monitoring flow. +Stash uses [Prometheus PushGateway](https://github.com/prometheus/pushgateway) to export the metrics for backup & restore operations. The following diagram shows the logical structure of the Stash monitoring flow.
-  Stash Monitoring Flow + Stash Monitoring Flow
Fig: Monitoring process in Stash
-Stash operator runs two containers. The `operator` container runs controller and other necessary stuffs and the `pushgateway` container runs [prom/pushgateway](https://hub.docker.com/r/prom/pushgateway) image. Stash sidecar from different workloads pushes its metrics to this pushgateway. Then Prometheus server scrapes these metrics through `stash-operator` service. Stash operator itself also provides some metrics at `/metrics` path of `:8443` port. +Stash operator runs two containers. The `operator` container runs controllers and other necessary stuff and the `pushgateway` container runs [prom/pushgateway](https://hub.docker.com/r/prom/pushgateway) image. Stash sidecar from different workloads and backup/restore jobs pushes its metrics to this pushgateway. The pushgateway exposes the metrics at `/metrics` path of `:56789` port. Then, a Prometheus server scrapes these metrics through `stash` or `stash-enterprise` Service and acts as a data source of [Grafana](https://grafana.com/) dashboard. Stash operator itself also provides some valuable metrics at `/metrics` path of `:8443` port. + +## Available Metrics + +Stash exports metrics for the backup process, restore process, repository status, etc. This section will list the metrics exported by Stash for different processes. ### Backup Metrics -Following metrics available for backup process: - -| Metric | Uses | -| --------------------------------------------- | -------------------------------------------------------------------- | -| `stash_backup_setup_success` | Indicates whether backup was successfully setup for the target | -| `stash_backup_session_success` | Indicates whether the current backup session succeeded or not | -| `stash_backup_session_duration_total_seconds` | Total time taken to complete the backup session | -| `stash_backup_data_size_bytes` | Total size of the target data to backup (in bytes) | -| `stash_backup_data_uploaded_bytes` | Amount of data uploaded to the repository in this session (in bytes) | -| `stash_backup_data_processing_time_seconds` | Total time taken to backup the target data | -| `stash_backup_files_total` | Total number of files that has been backed up | -| `stash_backup_files_new` | Total number of new files that has been created since last backup | -| `stash_backup_files_modified` | Total number of files that has been modified since last backup | -| `stash_backup_files_unmodified` | Total number of files that has not been changed since last backup | +This section lists the metrics Stash exports for the backup process. + +**Backup Session Metrics:** + +A backup session represents a backup run. Stash exports the following metrics regarding the overall backup session. + +| Metric Name | Usage | +| ---------------------------------------- | -------------------------------------------------------------------------------- | +| `stash_backup_session_success` | Indicates whether the entire backup session was succeeded or not | +| `stash_backup_target_count_total` | Indicates the total number of targets that were backed up in this backup session | +| `stash_backup_session_duration_seconds` | Indicates total time taken to complete the entire backup session | +| `stash_backup_last_success_time_seconds` | Indicates the time(in Unix epoch) when the last backup session was succeeded | + +**Backup Target Metrics:** +In each backup session, Stash takes backup of one or more targets. Stash exports the following metrics for the individual backup target. + +| Metric Name | Usage | +| ----------------------------------------------- | -------------------------------------------------------------------------------------- | +| `stash_backup_target_success` | Indicates whether the backup for a target has succeeded or not | +| `stash_backup_target_host_count_total` | Indicates the total number of hosts that was backed up for this target | +| `stash_backup_target_last_success_time_seconds` | Indicates the time (in Unix epoch) when the last backup was successful for this target | + +**Backup Host Metrics:** + +Stash may take a backup of multiple hosts for a single target. The following metrics are available for the individual backup hosts. + +| Metric Name | Usage | +| ------------------------------------------------ | ---------------------------------------------------------------------------- | +| `stash_backup_host_backup_success` | Indicates whether the backup for a host succeeded or not | +| `stash_backup_host_data_size_bytes` | Total size of the target data to backup for a host (in bytes) | +| `stash_backup_host_data_uploaded_bytes` | Amount of data uploaded to the repository for a host (in bytes) | +| `stash_backup_host_files_total` | Total number of files that has been backed up for a host | +| `stash_backup_host_files_new` | Total number of new files that has been created since last backup for a host | +| `stash_backup_host_files_modified` | Total number of files that has been modified since last backup for a host | +| `stash_backup_host_files_unmodified` | Total number of files that has not been changed since last backup for a host | +| `stash_backup_host_backup_duration_seconds` | Indicates total time taken to complete the backup process for a host | +| `stash_backup_host_data_processing_time_seconds` | Total time taken to process the target data for a host | ### Repository Metrics -Following metrics are available for backup repository: +Stash exports the following metrics for a repository. -| Metric | Uses | -| ----------------------------------- | ------------------------------------------------------------------------------------------------- | -| `stash_repository_integrity` | Result of repository integrity check after last backup | -| `stash_repository_size_bytes` | Indicates size of repository after last backup (in bytes) | -| `stash_repository_snapshot_count` | Indicates number of snapshots stored in the repository | -| `stash_repository_snapshot_cleaned` | Indicates number of old snapshots cleaned up according to retention policy on last backup session | +| Metric Name | Usage | +| ----------------------------------- | ----------------------------------------------------------------------------------------------------- | +| `stash_repository_integrity` | Result of repository integrity check after the last backup | +| `stash_repository_size_bytes` | Indicates size of repository after last backup (in bytes) | +| `stash_repository_snapshot_count` | Indicates the number of snapshots stored in the repository | +| `stash_repository_snapshot_cleaned` | Indicates the number of old snapshots cleaned up according to retention policy on last backup session | ### Restore Metrics -Following metrics are available for restore process: +This section lists the metrics Stash exports for the restore process. -| Metric | Uses | -| ---------------------------------------------- | --------------------------------------------------------- | -| `stash_restore_session_success` | Result of repository integrity check after last backup | -| `stash_restore_session_duration_total_seconds` | Indicates size of repository after last backup (in bytes) | +**Restore Session Metrics:** + +A restore session represents a restore run. Stash exports the following metrics regarding the overall restore process. + +| Metric Name | Usage | +| ---------------------------------------- | -------------------------------------------------------------------------------- | +| `stash_restore_session_success` | Indicates whether the entire restore session was succeeded or not | +| `stash_restore_session_duration_seconds` | Indicates the total time taken to complete the entire restore session | +| `stash_restore_target_count_total` | Indicates the total number of targets that were restored in this restore session | + +**Restore Target Metrics:** + +Stash restore one or more targets in each restore run. Stash exports the following metrics regarding a restore target. + +| Metric Name | Usage | +| --------------------------------------- | ------------------------------------------------------------------------------ | +| `stash_restore_target_success` | Indicates whether the restore for a target has succeeded or not | +| `stash_restore_target_host_count_total` | Indicates the total number of hosts that were restored for this restore target | + +**Restore Host Metrics:** + +Stash may restore multiple hosts for a single target. The following metrics are available for the individual restore host. + +| Metric Name | Usage | +| --------------------------------------------- | ------------------------------------------------------------------------- | +| `stash_restore_host_restore_success` | Indicates whether the restore process was succeeded for a host | +| `stash_restore_host_restore_duration_seconds` | Indicates the total time taken to complete the restore process for a host | ### Operator Metrics -Following metrics are available for Stash operator. These metrics are accessible through `api` endpoint of `stash-operator` service. - -**API Server Metrics:** - -| Metric Name | Uses | -| ----------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- | -| apiserver_audit_event_total | Counter of audit events generated and sent to the audit backend. | -| apiserver_client_certificate_expiration_seconds | Distribution of the remaining lifetime on the certificate used to authenticate a request. | -| apiserver_current_inflight_requests | Maximal number of currently used inflight request limit of this apiserver per request kind in last second. | -| apiserver_request_count | Counter of apiserver requests broken out for each verb, API resource, client, and HTTP response contentType and code. | -| apiserver_request_latencies | Response latency distribution in microseconds for each verb, resource and subresource. | -| apiserver_request_latencies_summary | Response latency summary in microseconds for each verb, resource and subresource. | -| authenticated_user_requests | Counter of authenticated requests broken out by username. | - -**Go Metrics:** - -| Metric Name | Uses | -| ------------------------------------- | ------------------------------------------------------------------ | -| go_gc_duration_seconds | A summary of the GC invocation durations. | -| go_goroutines | Number of goroutines that currently exist. | -| go_memstats_alloc_bytes | Number of bytes allocated and still in use. | -| go_memstats_alloc_bytes_total | Total number of bytes allocated, even if freed. | -| go_memstats_buck_hash_sys_bytes | Number of bytes used by the profiling bucket hash table. | -| go_memstats_frees_total | Total number of frees. | -| go_memstats_gc_sys_bytes | Number of bytes used for garbage collection system metadata. | -| go_memstats_heap_alloc_bytes | Number of heap bytes allocated and still in use. | -| go_memstats_heap_idle_bytes | Number of heap bytes waiting to be used. | -| go_memstats_heap_inuse_bytes | Number of heap bytes that are in use. | -| go_memstats_heap_objects | Number of allocated objects. | -| go_memstats_heap_released_bytes_total | Total number of heap bytes released to OS. | -| go_memstats_heap_sys_bytes | Number of heap bytes obtained from system. | -| go_memstats_last_gc_time_seconds | Number of seconds since 1970 of last garbage collection. | -| go_memstats_lookups_total | Total number of pointer lookups. | -| go_memstats_mallocs_total | Total number of mallocs. | -| go_memstats_mcache_inuse_bytes | Number of bytes in use by mcache structures. | -| go_memstats_mcache_sys_bytes | Number of bytes used for mcache structures obtained from system. | -| go_memstats_mspan_inuse_bytes | Number of bytes in use by mspan structures. | -| go_memstats_mspan_sys_bytes | Number of bytes used for mspan structures obtained from system. | -| go_memstats_next_gc_bytes | Number of heap bytes when next garbage collection will take place. | -| go_memstats_other_sys_bytes | Number of bytes used for other system allocations. | -| go_memstats_stack_inuse_bytes | Number of bytes in use by the stack allocator. | -| go_memstats_stack_sys_bytes | Number of bytes obtained from system for stack allocator. | -| go_memstats_sys_bytes | Number of bytes obtained by system. Sum of all system allocations. | - -**HTTP Metrics:** - -| Metrics | Uses | -| ---------------------------------- | ------------------------------------------- | -| http_request_duration_microseconds | The HTTP request latencies in microseconds. | -| http_request_size_bytes | The HTTP request sizes in bytes. | -| http_requests_total | Total number of HTTP requests made. | -| http_response_size_bytes | The HTTP response sizes in bytes. | - -**Process Metrics:** - -| Metric Name | Uses | -| ----------------------------- | ------------------------------------------------------ | -| process_cpu_seconds_total | Total user and system CPU time spent in seconds. | -| process_max_fds | Maximum number of open file descriptors. | -| process_open_fds | Number of open file descriptors. | -| process_resident_memory_bytes | Resident memory size in bytes. | -| process_start_time_seconds | Start time of the process since unix epoch in seconds. | -| process_virtual_memory_bytes | Virtual memory size in bytes. | +Following metrics are available for the Stash operator. These metrics are accessible through `api` endpoint of `stash` service. + +| Metric Name | Usage | +| ------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- | +| `apiserver_audit_event_total` | Counter of audit events generated and sent to the audit backend. | +| `apiserver_client_certificate_expiration_seconds` | Distribution of the remaining lifetime on the certificate used to authenticate a request. | +| `apiserver_current_inflight_requests` | Maximal number of currently used inflight request limit of this apiserver per request kind in last second. | +| `apiserver_request_count` | Counter of apiserver requests broken out for each verb, API resource, client, and HTTP response contentType and code. | +| `apiserver_request_latencies` | Response latency distribution in microseconds for each verb, resource, and subresource. | +| `apiserver_request_latencies_summary` | Response latency summary in microseconds for each verb, resource, and subresource. | +| `authenticated_user_requests` | Counter of authenticated requests broken out by username. | + +### Pushgateway Metrics + +The Pushgateway itself also exports some metrics related to Pushgateway build info, HTTP requests handled by it, Go process that running inside it, and CPU & Memory consumed by it, etc. + +**Build and Last Activity:** + +| Metric Name | Usage | +| ------------------------ | ------------------------------------------------------------------------------------------------------------------------ | +| `pushgateway_build_info` | A metric with a constant '1' value labeled by version, revision, branch, and goversion from which pushgateway was built. | +| `push_time_seconds` | Last Unix time when this group was changed in the Pushgateway. | + +**CPU & Memory Related Metrics:** + +| Metric Name | Usage | +| ------------------------------- | ------------------------------------------------------ | +| `process_cpu_seconds_total` | Total user and system CPU time spent in seconds. | +| `process_max_fds` | Maximum number of open file descriptors. | +| `process_open_fds` | Number of open file descriptors. | +| `process_resident_memory_bytes` | Resident memory size in bytes. | +| `process_start_time_seconds` | Start time of the process since unix epoch in seconds. | +| `process_virtual_memory_bytes` | Virtual memory size in bytes. | + +**Go Environment Related Metrics:** + +| Metric Name | Usage | +| --------------------------------------- | ------------------------------------------------------------------------------------------- | +| `go_gc_duration_seconds` | A summary of the GC invocation durations. | +| `go_goroutines` | Number of goroutines that currently exist. | +| `go_info` | Information about the Go environment. | +| `go_memstats_alloc_bytes` | Number of bytes allocated and still in use. | +| `go_memstats_alloc_bytes_total` | Total number of bytes allocated, even if freed. | +| `go_memstats_buck_hash_sys_bytes` | Number of bytes used by the profiling bucket hash table. | +| `go_memstats_frees_total` | Total number of frees. | +| `go_memstats_gc_cpu_fraction` | The fraction of this program's available CPU time used by the GC since the program started. | +| `go_memstats_gc_sys_bytes` | Number of bytes used for garbage collection system metadata. | +| `go_memstats_heap_alloc_bytes` | Number of heap bytes allocated and still in use. | +| `go_memstats_heap_idle_bytes` | Number of heap bytes waiting to be used. | +| `go_memstats_heap_inuse_bytes` | Number of heap bytes that are in use. | +| `go_memstats_heap_objects` | Number of allocated objects. | +| `go_memstats_heap_released_bytes_total` | Total number of heap bytes released to OS. | +| `go_memstats_heap_sys_bytes` | Number of heap bytes obtained from system. | +| `go_memstats_last_gc_time_seconds` | Number of seconds since 1970 of last garbage collection. | +| `go_memstats_lookups_total` | Total number of pointer lookups. | +| `go_memstats_mallocs_total` | Total number of mallocs. | +| `go_memstats_mcache_inuse_bytes` | Number of bytes in use by mcache structures. | +| `go_memstats_mcache_sys_bytes` | Number of bytes used for mcache structures obtained from system. | +| `go_memstats_mspan_inuse_bytes` | Number of bytes in use by mspan structures. | +| `go_memstats_mspan_sys_bytes` | Number of bytes used for mspan structures obtained from system. | +| `go_memstats_next_gc_bytes` | Number of heap bytes when next garbage collection will take place. | +| `go_memstats_other_sys_bytes` | Number of bytes used for other system allocations. | +| `go_memstats_stack_inuse_bytes` | Number of bytes in use by the stack allocator. | +| `go_memstats_stack_sys_bytes` | Number of bytes obtained from system for stack allocator. | +| `go_memstats_sys_bytes` | Number of bytes obtained by system. Sum of all system allocations. | +| `go_threads` | Number of OS threads created. | + +**HTTP Request Related Metrics:** + +| Metric Name | Usage | +| ------------------------------------ | ------------------------------------------- | +| `http_request_duration_microseconds` | The HTTP request latencies in microseconds. | +| `http_request_size_bytes` | The HTTP request sizes in bytes. | +| `http_requests_total` | Total number of HTTP requests made. | +| `http_response_size_bytes` | The HTTP response sizes in bytes. | ## How to Enable Monitoring -You can enable monitoring through some flags while installing or upgrading or updating Stash. You can also chose which monitoring agent to use for monitoring. Stash will configure respective resources accordingly. Here, are the list of available flags and their uses, +You have to enable Prometheus monitoring during installing / upgrading Stash. The following parameters are available to configure monitoring in Stash. + +| Helm Values | Acceptable Values | Default | Usage | +| ---------------------------------- | --------------------------------------------------- | ---------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `monitoring.agent` | `prometheus.io/builtin` or `prometheus.io/operator` | `none` | Specify which monitoring agent to use for monitoring Stash. | +| `monitoring.backup` | `true` or `false` | `false` | Specify whether to monitor Stash backup and restore. | +| `monitoring.operator` | `true` or `false` | `false` | Specify whether to monitor Stash operator. | +| `monitoring.serviceMonitor.labels` | any label | `app: ` and `release: `. | Specify the labels for ServiceMonitor. Prometheus crd will select ServiceMonitor using these labels. Only usable when monitoring agent is `prometheus.io/operator`. | -| Script Flag | Helm Values | Acceptable Values | Default | Uses | -| ------------------------ | ---------------------------------- | --------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `--monitoring-agent` | `monitoring.agent` | `prometheus.io/builtin` or `prometheus.io/operator` | `none` | Specify which monitoring agent to use for monitoring Stash. | -| `--monitoring-backup` | `monitoring.backup` | `true` or `false` | `false` | Specify whether to monitor Stash backup and restore. | -| `--monitoring-operator` | `monitoring.operator` | `true` or `false` | `false` | Specify whether to monitor Stash operator. | -| `--prometheus-namespace` | `monitoring.prometheus.namespace` | any namespace | same namespace as Stash operator | Specify the namespace where Prometheus server is running or will be deployed | -| `--servicemonitor-label` | `monitoring.serviceMonitor.labels` | any label | For Helm installation, `app: ` and `release: `. For script installation, `app: stash` | Specify the labels for ServiceMonitor. Prometheus crd will select ServiceMonitor using these labels. Only usable when monitoring agent is `prometheus.io/operator`. | +You can enable monitoring in Stash as below, -You have to provides these flags while installing or upgrading or updating Stash. Here, are examples for both script and Helm installation process are given which enable monitoring with `prometheus.io/operator` Prometheuse server for `backup`, `restore` and `operator` metrics. + +
+
+ +### New Installation + +If you haven't installed Stash yet, run the following command to enable Prometheus monitoring during installation **Helm 3:** ```bash -$ helm install stash-operator appscode/stash --version {{< param "info.version" >}} \ - --namespace kube-system \ - --set monitoring.agent=prometheus.io/operator \ - --set monitoring.backup=true \ - --set monitoring.operator=true \ - --set monitoring.prometheus.namespace=monitoring \ - --set monitoring.serviceMonitor.labels.k8s-app=prometheus +$ helm install stash appscode/stash -n kube-system \ +--version {{< param "info.version" >}} \ +--set monitoring.agent=prometheus.io/operator \ +--set monitoring.backup=true \ +--set monitoring.operator=true \ +--set monitoring.serviceMonitor.labels.k8s-app=prometheus \ +--set-file license=/path/to/license-file.txt ``` **Helm 2:** ```bash -$ helm install appscode/stash --name stash-operator --version {{< param "info.version" >}} \ - --namespace kube-system \ - --set monitoring.agent=prometheus.io/operator \ - --set monitoring.backup=true \ - --set monitoring.operator=true \ - --set monitoring.prometheus.namespace=monitoring \ - --set monitoring.serviceMonitor.labels.k8s-app=prometheus +$ helm install appscode/stash --name stash -n kube-system \ +--version {{< param "info.version" >}} \ +--set monitoring.agent=prometheus.io/operator \ +--set monitoring.backup=true \ +--set monitoring.operator=true \ +--set monitoring.serviceMonitor.labels.k8s-app=prometheus \ +--set-file license=/path/to/license-file.txt ``` **YAML (with Helm 3):** ```bash -$ helm template stash-operator appscode/stash --version {{< param "info.version" >}} \ - --namespace kube-system \ - --no-hooks \ - --set monitoring.agent=prometheus.io/operator \ - --set monitoring.backup=true \ - --set monitoring.operator=true \ - --set monitoring.prometheus.namespace=monitoring \ - --set monitoring.serviceMonitor.labels.k8s-app=prometheus | kubectl apply -f - +$ helm install stash appscode/stash -n kube-system \ +--no-hooks \ +--version {{< param "info.version" >}} \ +--set monitoring.agent=prometheus.io/operator \ +--set monitoring.backup=true \ +--set monitoring.operator=true \ +--set monitoring.serviceMonitor.labels.k8s-app=prometheus \ +--set-file license=/path/to/license-file.txt | kubectl apply -f - +``` + +
+
+ +### Existing Installation + +If you have installed Stash already in your cluster but didn't enable monitoring during installation, you can use `helm upgrade` command to enable monitoring in the existing installation. + +**Helm 3:** + +```bash +$ helm upgrade stash appscode/stash -n kube-system \ +--reuse-values \ +--set monitoring.agent=prometheus.io/operator \ +--set monitoring.backup=true \ +--set monitoring.operator=true \ +--set monitoring.serviceMonitor.labels.k8s-apps=prometheus ``` -## Next Steps +**Helm 2:** + +```bash +$ helm upgrade appscode/stash --name stash -n kube-system \ +--reuse-values \ +--set monitoring.agent=prometheus.io/operator \ +--set monitoring.backup=true \ +--set monitoring.operator=true \ +--set monitoring.serviceMonitor.labels.k8s-apps=prometheus +``` + +**YAML (with Helm 3):** + +```bash +$ helm upgrade stash appscode/stash -n kube-system \ +--no-hooks \ +--reuse-values \ +--set monitoring.agent=prometheus.io/operator \ +--set monitoring.backup=true \ +--set monitoring.operator=true \ +--set monitoring.serviceMonitor.labels.k8s-apps=prometheus | kubectl apply -f - +``` -- Learn how to monitor Stash using built-in Prometheus from [here](/docs/guides/latest/monitoring/builtin.md). -- Learn how to monitor Stash using Prometheus operator from [here](/docs/guides/latest/monitoring/coreos.md). +
+
diff --git a/docs/guides/latest/monitoring/prometheus_builtin.md b/docs/guides/latest/monitoring/prometheus_builtin.md new file mode 100644 index 00000000..c09bc53c --- /dev/null +++ b/docs/guides/latest/monitoring/prometheus_builtin.md @@ -0,0 +1,421 @@ +--- +title: Builtin Prometheus | Stash +description: Monitor Stash using official Prometheus server +menu: + docs_{{ .version }}: + identifier: monitoring-builtin + name: Builtin Prometheus + parent: monitoring + weight: 30 +product_name: stash +menu_name: docs_{{ .version }} +section_menu_id: guides +--- + +# Monitoring Stash with builtin Prometheus + +This tutorial will show you how to configure builtin [Prometheus](https://github.com/prometheus/prometheus) scraper to monitor Stash backup and restore operations as well as the Stash operator. + +To keep Prometheus resources isolated, we are going to use a separate namespace called `monitoring` to deploy the Prometheus server and its respective resources. Create the namespace as below if you haven't done already. + +```bash +$ kubectl create ns monitoring +namespace/monitoring created +``` + +## Enable Monitoring in Stash + +At first, we have to enable Prometheus monitoring in Stash during installation. We have to use `prometheus.io/builtin` as the agent for monitoring via built-in Prometheus. + +Here, we are going to enable monitoring for both backup metrics and operator metrics using Helm 3. + + +
+
+ +### New Installation + +If you haven't installed Stash yet, run the following command to enable Prometheus monitoring during installation + +```bash +$ helm install stash appscode/stash -n kube-system \ +--version {{< param "info.version" >}} \ +--set monitoring.agent=prometheus.io/builtin \ +--set monitoring.backup=true \ +--set monitoring.operator=true \ +--set-file license=/path/to/license-file.txt +``` +
+
+ +### Existing Installation + +If you have installed Stash already in your cluster but didn't enable monitoring during installation, you can use `helm upgrade` command to enable monitoring in the existing installation. + +```bash +$ helm upgrade stash appscode/stash -n kube-system \ +--reuse-values \ +--set monitoring.agent=prometheus.io/builtin \ +--set monitoring.backup=true \ +--set monitoring.operator=true +``` +
+
+ +This will add the necessary annotations to `stash` Service. Prometheus server will discover the respective endpoints using those annotations. + +Let's verify the annotations has been added to the Service, + +```bash +$ kubectl get service -n kube-system stash -o yaml +``` + +```yaml +apiVersion: v1 +kind: Service +metadata: + annotations: + meta.helm.sh/release-name: stash + meta.helm.sh/release-namespace: kube-system + prometheus.io/operator_path: /metrics + prometheus.io/operator_port: "8443" + prometheus.io/operator_scheme: https + prometheus.io/pushgateway_path: /metrics + prometheus.io/pushgateway_port: "56789" + prometheus.io/pushgateway_scheme: http + prometheus.io/scrape: "true" + labels: + app.kubernetes.io/instance: stash + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: stash + app.kubernetes.io/version: v0.11.7 + helm.sh/chart: stash-v0.11.7 + name: stash + namespace: kube-system +spec: + clusterIP: 10.110.53.2 + ports: + - name: api + port: 443 + protocol: TCP + targetPort: 8443 + - name: pushgateway + port: 56789 + protocol: TCP + targetPort: 56789 + selector: + app.kubernetes.io/instance: stash + app.kubernetes.io/name: stash + sessionAffinity: None + type: ClusterIP +status: + loadBalancer: {} +``` + +The `stash` Service has two endpoints. The `pushgateway` endpoint exports backup, restore, and repository metrics and the `api` endpoint exports Stash operator metrics. + +If you look at the annotations section of the above Service, you should see that Stash has added Prometheus specific annotations (prefixed with `prometheus.io`) to the Service. + +Here, `prometheus.io/scrape: "true"` annotation indicates that Prometheus should scrape metrics for this service. + +The following three annotations point to `pushgateway` endpoint which provides backup and restore metrics. + +```ini +prometheus.io/pushgateway_path: /metrics +prometheus.io/pushgateway_port: "56789" +prometheus.io/pushgateway_scheme: http +``` + +The following three annotations point to `api` endpoint which provides operator metrics. + +```ini +prometheus.io/builtin_path: /metrics +prometheus.io/builtin_port: "8443" +prometheus.io/builtin_scheme: https +``` + +Now, we are ready to configure our Prometheus server to scrape those metrics. + +## Deploy Prometheus Server + +In this section, we are going to configure & deploy a Prometheus server to scrape Stash metrics using the `stash` Service. We are going to deploy the Prometheus server in `monitoring` namespace. + +**Copy Certificate Secret:** + +We have deployed Stash in `kube-system` namespace. Stash exports operator metrics via TLS secured `api` endpoint. So, the Prometheus server needs to provide the respective certificate during scraping the metrics from this endpoint. Stash should create a secret named `stash-apiserver-certs` with the certificate in `kube-system`. + +Let's verify that the Secret has been created in `kube-system` namespace. + +```bash +$ kubectl get secret -n kube-system -l app.kubernetes.io/instance=stash +NAME TYPE DATA AGE +stash-apiserver-cert Opaque 2 6m +stash-license Opaque 1 6m +``` + +Now, we have to copy this Secret in `monitoring` namespace so that we can mount the certificate into our Prometheus server. + +Let's copy the `stash-apiserver-cert` Secret into `monitoring` namespace using the following command, + +```bash +kubectl get secret stash-apiserver-cert --namespace=kube-system -oyaml | grep -v '^\s*namespace:\s' | kubectl apply --namespace=monitoring -f - +``` + +Verify that the Secret has been copied successfully in the `monitoring` namespace, + +```bash +$ kubectl get secret -n monitoring -l app.kubernetes.io/instance=stash +NAME TYPE DATA AGE +stash-apiserver-cert Opaque 2 109s +``` + +**Create RBAC:** + +Now, let's create the necessary RBAC stuffs for the Prometheus server, + +```bash +$ kubectl apply -f https://github.com/stashed/docs/raw/{{< param "info.version" >}}/docs/guides/latest/monitoring/examples/prom-rbac.yaml +clusterrole.rbac.authorization.k8s.io/stash-prometheus-server created +serviceaccount/stash-prometheus-server created +clusterrolebinding.rbac.authorization.k8s.io/stash-prometheus-server created +``` + +**Create ConfigMap:** + +Now, create a ConfigMap with the necessary scraping configuration. Bellow, the YAML of the ConfigMap that we are going to create for scrapping metrics from Stash. + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: stash-prometheus-server-conf + labels: + name: stash-prometheus-server-conf + namespace: monitoring +data: + prometheus.yml: |- + global: + scrape_interval: 30s + scrape_timeout: 10s + evaluation_interval: 30s + scrape_configs: + - job_name: stash-pushgateway + scrape_interval: 30s + scrape_timeout: 10s + metrics_path: /metrics + scheme: http + honor_labels: true + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_instance] + regex: stash # default label for stash Service is "app.kubernetes.io/instance: stash". customize this field according to label of stash Service of your setup. + action: keep + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] + regex: true + action: keep + - source_labels: [__meta_kubernetes_endpoint_port_name] + regex: pushgateway + action: keep + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_pushgateway_path] + regex: (.+) + target_label: __metrics_path__ + action: replace + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_pushgateway_scheme] + action: replace + target_label: __scheme__ + regex: (https?) + - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_pushgateway_port] + action: replace + target_label: __address__ + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: namespace + replacement: $1 + action: replace + - source_labels: [__meta_kubernetes_service_name] + separator: ; + regex: (.*) + target_label: service + replacement: $1 + action: replace + - job_name: stash-operator + scrape_interval: 30s + scrape_timeout: 10s + metrics_path: /metrics + scheme: https + kubernetes_sd_configs: + - role: endpoints + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + tls_config: + ca_file: /etc/prometheus/secret/stash-apiserver-cert/tls.crt + server_name: stash.kube-system.svc + relabel_configs: + - source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_instance] + regex: stash # default label for stash Service is "app.kubernetes.io/instance: stash". customize this field according to label of stash Service of your setup. + action: keep + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] + regex: true + action: keep + - source_labels: [__meta_kubernetes_endpoint_port_name] + regex: api + action: keep + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_operator_path] + regex: (.+) + target_label: __metrics_path__ + action: replace + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_operator_scheme] + action: replace + target_label: __scheme__ + regex: (https?) + - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_operator_port] + action: replace + target_label: __address__ + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: namespace + replacement: $1 + action: replace + - source_labels: [__meta_kubernetes_service_name] + separator: ; + regex: (.*) + target_label: service + replacement: $1 + action: replace +``` + +Here, we have two scraping jobs. The `stash-pushgateway` job scrapes the backup and restore metrics and the `stash-operator` job scrapes operator metrics. + +Notice the `tls_config` field of `stash-operator` job. We have provided the certificate file through `ca_file` field. This certificate comes from `stash-apiserver-cert` that we are going to mount in the Prometheus Deployment. Here, `server_name` is used to verify hostname. In our case, the certificate is valid for hostname `server` and `stash.kube-system.svc`. + +Also, note that we have provided a bearer-token file through `bearer_token_file` field. This file is a token for `stash-prometheus-server` ServiceAccount that we have created during creating the RBAC stuffs. This is required for authorizing Prometheus to Stash API Server. + +Let's create the ConfigMap we have shown above, + +```bash +$ kubectl apply -f https://github.com/stashed/docs/raw/{{< param "info.version" >}}/docs/guides/latest/monitoring/examples/prom-config.yaml +configmap/stash-prometheus-server-conf created +``` + +**Deploy Prometheus:** + +Now, we are ready to deploy our Prometheus server. YAML for the Deployment that we are going to create is shown below. + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: stash-prometheus-server + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus + template: + metadata: + labels: + app: prometheus + spec: + serviceAccountName: stash-prometheus-server + containers: + - name: prometheus + image: prom/prometheus:v2.23.0 + args: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus/" + ports: + - containerPort: 9090 + volumeMounts: + - name: prometheus-config-volume + mountPath: /etc/prometheus/ + - name: prometheus-storage-volume + mountPath: /prometheus/ + - name: stash-apiserver-cert + mountPath: /etc/prometheus/secret/stash-apiserver-cert + volumes: + - name: prometheus-config-volume + configMap: + defaultMode: 420 + name: stash-prometheus-server-conf + - name: prometheus-storage-volume + emptyDir: {} + - name: stash-apiserver-cert + secret: + defaultMode: 420 + secretName: stash-apiserver-cert + items: # avoid mounting private key + - key: tls.crt + path: tls.crt +``` + +Notice that, we have mounted `stash-apiserver-cert` secret as a volume at `/etc/prometheus/secret/stash-apiserver-cert` directory. We have also mounted the ConfigMap `stash-prometheus-server-conf` that we have created earlier with the necessary configuration to scrape metrics from Stash. + +Let's create the Deployment we have shown above, + +```bash +$ kubectl apply -f https://github.com/stashed/docs/raw/{{< param "info.version" >}}/docs/guides/latest/monitoring/examples/prom-deployment.yaml +deployment.apps/stash-prometheus-server created +``` + +Now, wait for the Prometheus server to go into `Running` state, + +```bash +$ kubectl get pods -n monitoring +NAME READY STATUS RESTARTS AGE +stash-prometheus-server-77d6bc8b68-wtxlt 1/1 Running 0 66s +``` + +Once the Prometheus server Pod goes into the `Running` state, it should automatically discover the Stash endpoints using the configuration we have provided in the ConfigMap. + +### Verify Monitoring Metrics + +Now, we are going to verify whether the Prometheus server has discovered the Stash endpoints or not. The Prometheus server we have deployed above is running on port `9090`. We are going to use [port forwarding](https://kubernetes.io/docs/tasks/access-application-cluster/port-forward-access-application-cluster/) to access the Prometheus web UI. + +Run following command on a separate terminal to port-forward the Prometheus server Pod, + +```bash +$ kubectl port-forward -n monitoring stash-prometheus-server-77d6bc8b68-wtxlt 9090 +Forwarding from 127.0.0.1:9090 -> 9090 +Forwarding from [::1]:9090 -> 9090 +``` + +Now, we can access the web UI at `localhost:9090`. Open [http://localhost:9090/targets](http://localhost:9090/targets) in your browser. You should see `pushgateway` and `api` endpoints of `stash` service as targets. + +
+ Stash Monitoring Flow +
Fig: Prometheus dashboard
+
+ +As you can see from the above image that the Prometheus server has successfully discovered the Stash endpoints. Now, if you perform backup and restore operations, you should see the respective metrics have been scrapped by the Prometheus server. + +## Cleanup + +To cleanup the Kubernetes resources created by this tutorial, run: + +```bash +kubectl delete clusterrole stash-prometheus-server +kubectl delete clusterrolebinding stash-prometheus-server + +kubectl delete serviceaccount/stash-prometheus-server -n monitoring +kubectl delete configmap/stash-prometheus-server-conf -n monitoring +kubectl delete deployment stash-prometheus-server -n monitoring +kubectl delete secret stash-apiserver-cert -n monitoring + +kubectl delete ns monitoring +``` + +To uninstall Stash follow this [guide](/docs/setup/README.md). diff --git a/docs/guides/latest/monitoring/prometheus_operator.md b/docs/guides/latest/monitoring/prometheus_operator.md new file mode 100644 index 00000000..80a0e6a1 --- /dev/null +++ b/docs/guides/latest/monitoring/prometheus_operator.md @@ -0,0 +1,283 @@ +--- +title: Prometheus Operator | Stash +description: Monitor Stash using Prometheus operator +menu: + docs_{{ .version }}: + identifier: monitoring-prometheus-operator + name: Prometheus Operator + parent: monitoring + weight: 20 +product_name: stash +menu_name: docs_{{ .version }} +section_menu_id: guides +--- + +# Monitoring Using Prometheus Operator + +[Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator) provides a simple and Kubernetes native way to deploy and configure a Prometheus server. This tutorial will show you how to use the Prometheus operator for monitoring Stash. + +To keep Prometheus resources isolated, we are going to use a separate namespace `monitoring` to deploy the Prometheus operator and respective resources. Create the namespace if you haven't created it yet, + +```bash +$ kubectl create ns monitoring +namespace/monitoring created +``` + +## Install Prometheus Stack + +At first, you have to install Prometheus operator in your cluster. In this section, we are going to install Prometheus operator from [prometheus-community/kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack). You can skip this section if you already have Prometheus operator running. + +Install `prometheus-community/kube-prometheus-stack` chart as below, + +- Add necessary helm repositories. + +```bash +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo add stable https://charts.helm.sh/stable +helm repo update +``` + +- Install `kube-prometheus-stack` chart. + +```bash +helm install prometheus-stack prometheus-community/kube-prometheus-stack -n monitoring +``` + +This chart will install [prometheus-operator/prometheus-operator](https://github.com/prometheus-operator/prometheus-operator), [kubernetes/kube-state-metrics](https://github.com/kubernetes/kube-state-metrics), [prometheus/node_exporter](https://github.com/prometheus/node_exporter), and [grafana/grafana](https://github.com/grafana/grafana) etc. + +The above chart will also deploy a Prometheus server. Verify that the Prometheus server has been deployed by the following command: + +```bash +$ kubectl get prometheus -n monitoring +NAME VERSION REPLICAS AGE +prometheus-stack-kube-prom-prometheus v2.22.1 1 3m +``` + +Let's check the YAML of the above Prometheus object, + +```bash +$ kubectl get prometheus -n monitoring prometheus-stack-kube-prom-prometheus -o yaml +``` + +```yaml +apiVersion: monitoring.coreos.com/v1 +kind: Prometheus +metadata: + annotations: + meta.helm.sh/release-name: prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-prometheus + app.kubernetes.io/managed-by: Helm + chart: kube-prometheus-stack-12.8.0 + heritage: Helm + release: prometheus-stack + name: prometheus-stack-kube-prom-prometheus + namespace: monitoring +spec: + alerting: + alertmanagers: + - apiVersion: v2 + name: prometheus-stack-kube-prom-alertmanager + namespace: monitoring + pathPrefix: / + port: web + enableAdminAPI: false + externalUrl: http://prometheus-stack-kube-prom-prometheus.monitoring:9090 + image: quay.io/prometheus/prometheus:v2.22.1 + listenLocal: false + logFormat: logfmt + logLevel: info + paused: false + podMonitorNamespaceSelector: {} + podMonitorSelector: + matchLabels: + release: prometheus-stack + portName: web + probeNamespaceSelector: {} + probeSelector: + matchLabels: + release: prometheus-stack + replicas: 1 + retention: 10d + routePrefix: / + ruleNamespaceSelector: {} + ruleSelector: + matchLabels: + app: kube-prometheus-stack + release: prometheus-stack + securityContext: + fsGroup: 2000 + runAsGroup: 2000 + runAsNonRoot: true + runAsUser: 1000 + serviceAccountName: prometheus-stack-kube-prom-prometheus + serviceMonitorNamespaceSelector: {} + serviceMonitorSelector: + matchLabels: + release: prometheus-stack + version: v2.22.1 +``` + +Notice the following ServiceMonitor related sections, + +```yaml +serviceMonitorNamespaceSelector: {} # select from all namespaces +serviceMonitorSelector: + matchLabels: + release: prometheus-stack +``` + +Here, you can see the Prometheus server is selecting the ServiceMonitors from all namespaces that have `release: prometheus-stack` label. + +The above chart will also create a Service for the Prometheus server so that we can access the Prometheus Web UI. Let's verify the Service has been created, + +```bash +$ kubectl get service -n monitoring +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +alertmanager-operated ClusterIP None 9093/TCP,9094/TCP,9094/UDP 10m +prometheus-operated ClusterIP None 9090/TCP 10m +prometheus-stack-grafana ClusterIP 10.105.244.221 80/TCP 11m +prometheus-stack-kube-prom-alertmanager ClusterIP 10.97.172.208 9093/TCP 11m +prometheus-stack-kube-prom-operator ClusterIP 10.97.94.139 443/TCP 11m +prometheus-stack-kube-prom-prometheus ClusterIP 10.105.123.218 9090/TCP 11m +prometheus-stack-kube-state-metrics ClusterIP 10.96.52.8 8080/TCP 11m +prometheus-stack-prometheus-node-exporter ClusterIP 10.107.204.248 9100/TCP 11m +``` + +Here, we can use the `prometheus-stack-kube-prom-prometheus` Service to access the Web UI of our Prometheus Server. + +## Enable Monitoring in Stash + +In this section, we are going to enable Prometheus monitoring in Stash. We have to enable Prometheus monitoring during installing Stash. You have to use `prometheus.io/operator` as the agent for monitoring via Prometheus operator. + +Here, we are going to enable monitoring for both backup metrics and operator metrics using Helm 3. We are going to tell Stash to create ServiceMonitor with `release: prometheus-stack` label so that the Prometheus server we have deployed in the previous section can collect Stash metrics without any further configuration. + + +
+
+ +### New Installation + +If you haven't installed Stash yet, run the following command to enable Prometheus monitoring during installation + +```bash +$ helm install stash appscode/stash -n kube-system \ +--version {{< param "info.version" >}} \ +--set monitoring.agent=prometheus.io/operator \ +--set monitoring.backup=true \ +--set monitoring.operator=true \ +--set monitoring.serviceMonitor.labels.release=prometheus-stack \ +--set-file license=/path/to/license-file.txt +``` + +
+
+ +### Existing Installation + +If you have installed Stash already in your cluster but didn't enable monitoring during installation, you can use `helm upgrade` command to enable monitoring in the existing installation. + +```bash +$ helm upgrade stash appscode/stash -n kube-system \ +--reuse-values \ +--set monitoring.agent=prometheus.io/operator \ +--set monitoring.backup=true \ +--set monitoring.operator=true \ +--set monitoring.serviceMonitor.labels.release=prometheus-stack +``` + +
+
+ +This will create a `ServiceMonitor` object with the same name and namespace as the Stash operator. The `ServiceMonitor` will have the label `release: prometheus-stack` as we have provided it through the `--set monitoring.serviceMonitor.labels` parameter. + +Let's verify that the ServiceMonitor has been created in the Stash operator namespace. + +```bash +$ kubectl get servicemonitor -n kube-system +NAME AGE +stash 65s +``` + +Let's check the YAML of the `ServiceMonitor` object, + +```bash +$ kubectl get servicemonitor stash -n kube-system -o yaml +``` + +```yaml +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + annotations: + meta.helm.sh/release-name: stash + meta.helm.sh/release-namespace: kube-system + labels: + app.kubernetes.io/managed-by: Helm + release: prometheus-stack + name: stash + namespace: kube-system +spec: + endpoints: + - honorLabels: true + port: pushgateway + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + port: api + scheme: https + tlsConfig: + ca: + secret: + key: tls.crt + name: stash-apiserver-cert + serverName: stash.kube-system.svc + namespaceSelector: + matchNames: + - kube-system + selector: + matchLabels: + app.kubernetes.io/instance: stash + app.kubernetes.io/name: stash +``` + +Here, we have two endpoints in `spec.endpoints` section. The `pushgateway` endpoint exports backup and recovery metrics and the `api` endpoint exports the operator metrics. + +## Verify Monitoring + +As soon as the Stash operator pod goes into the `Running` state, the Prometheus server we have deployed in the first section should discover the endpoints exposed by Stash for metrics. + +Now, we are going to [forward port](https://kubernetes.io/docs/tasks/access-application-cluster/port-forward-access-application-cluster/) of `prometheus-stack-kube-prom-prometheus` Service to access the Prometheus web UI. Run the following command on a separate terminal, + +```bash +$ kubectl port-forward -n monitoring service/prometheus-stack-kube-prom-prometheus 9090 +Forwarding from 127.0.0.1:9090 -> 9090 +Forwarding from [::1]:9090 -> 9090 +``` + +Now, you can access the Web UI at `localhost:9090`. Open [http://localhost:9090/targets](http://localhost:9090/targets) in your browser. You should see `pushgateway` and `api` endpoints of the Stash operator are among the targets. This verifies that the Prometheus server is scrapping Stash metrics. + +
+ Stash Monitoring Flow +
Fig: Prometheus Web UI
+
+ +## Cleanup + +To cleanup the Kubernetes resources created by this tutorial, run: + +```bash +# cleanup Prometheus resources +helm delete prometheus-stack -n monitoring + +# delete namespace +kubectl delete ns monitoring +``` + +To uninstall Stash follow this [guide](/docs/setup/README.md). diff --git a/docs/images/guides/latest/monitoring/prom-coreos-target.png b/docs/images/guides/latest/monitoring/prom-coreos-target.png deleted file mode 100644 index 597da262..00000000 Binary files a/docs/images/guides/latest/monitoring/prom-coreos-target.png and /dev/null differ diff --git a/docs/images/guides/latest/monitoring/prometheus_backup_restore_met.png b/docs/images/guides/latest/monitoring/prometheus_backup_restore_met.png deleted file mode 100644 index abcbf870..00000000 Binary files a/docs/images/guides/latest/monitoring/prometheus_backup_restore_met.png and /dev/null differ diff --git a/docs/images/guides/latest/monitoring/stash-monitoring-structure.svg b/docs/images/guides/latest/monitoring/stash-monitoring-structure.svg deleted file mode 100644 index 397fee5d..00000000 --- a/docs/images/guides/latest/monitoring/stash-monitoring-structure.svg +++ /dev/null @@ -1,334 +0,0 @@ - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -