From 4959d79aa44294005eba3376f802e28cc0d1765a Mon Sep 17 00:00:00 2001 From: Stefan Prodan Date: Wed, 16 Aug 2023 14:12:25 +0300 Subject: [PATCH] Init monitoring section Signed-off-by: Stefan Prodan --- content/en/flux/monitoring/_index.md | 6 ++ content/en/flux/monitoring/custom-metrics.md | 7 ++ content/en/flux/monitoring/events.md | 59 +++++++++++++++ content/en/flux/monitoring/logs.md | 79 ++++++++++++++++++++ content/en/flux/monitoring/metrics.md | 69 +++++++++++++++++ 5 files changed, 220 insertions(+) create mode 100644 content/en/flux/monitoring/_index.md create mode 100644 content/en/flux/monitoring/custom-metrics.md create mode 100644 content/en/flux/monitoring/events.md create mode 100644 content/en/flux/monitoring/logs.md create mode 100644 content/en/flux/monitoring/metrics.md diff --git a/content/en/flux/monitoring/_index.md b/content/en/flux/monitoring/_index.md new file mode 100644 index 000000000..e27c140a6 --- /dev/null +++ b/content/en/flux/monitoring/_index.md @@ -0,0 +1,6 @@ +--- +title: "Flux monitoring" +linkTitle: "Monitoring" +description: "How to configure monitoring for Flux." +weight: 50 +--- diff --git a/content/en/flux/monitoring/custom-metrics.md b/content/en/flux/monitoring/custom-metrics.md new file mode 100644 index 000000000..bcfac7e88 --- /dev/null +++ b/content/en/flux/monitoring/custom-metrics.md @@ -0,0 +1,7 @@ +--- +title: "Flux custom Prometheus metrics" +linkTitle: "Custom metrics" +description: "How to extend the Flux Prometheus metrics with kube-state-metrics" +weight: 2 +--- + diff --git a/content/en/flux/monitoring/events.md b/content/en/flux/monitoring/events.md new file mode 100644 index 000000000..51f441a6c --- /dev/null +++ b/content/en/flux/monitoring/events.md @@ -0,0 +1,59 @@ +--- +title: "Flux events" +linkTitle: "Events" +description: "How to monitor the Flux events" +weight: 4 +--- + +The Flux controllers emit Kubernetes events for every reconciliation operation. + +## Kubernetes events + +The Flux controllers events contain the following fields: + +- `type` can be `Normal` or `Warning` +- `firstTimestamp` timestamp in the ISO 8601 format +- `lastTimestamp` timestamp in the ISO 8601 format +- `message` info or warning description +- `reason` short machine understandable string +- `involvedObject` the API version, kind, name and namespace of the Flux object +- `metadata.annotations` the Flux specific metadata e.g. source revision +- `source.component` the Flux controller name + +### Samples + +Sample of a `Normal` event produced by kustomize-controller: + +```json +{ + "kind": "Event", + "apiVersion": "v1", + "metadata": { + "name": "flux-system.177bd633e296a292", + "namespace": "flux-system", + "annotations": { + "kustomize.toolkit.fluxcd.io/revision": "main@sha1:802723078affd3eb2a3898630261ab3ca5d6dd40" + } + }, + "involvedObject": { + "kind": "Kustomization", + "namespace": "flux-system", + "name": "flux-system", + "apiVersion": "kustomize.toolkit.fluxcd.io/v1", + }, + "reason": "ReconciliationSucceeded", + "message": "Reconciliation finished in 436.493292ms, next run in 10m0s", + "source": { + "component": "kustomize-controller" + }, + "firstTimestamp": "2023-08-16T10:26:43Z", + "lastTimestamp": "2023-08-16T10:26:43Z", + "type": "Normal", +} +``` + +## Events inspection with kubectl + +```shell +kubectl events -n monitoring --for helmreleaase/kube-prom-stack +``` diff --git a/content/en/flux/monitoring/logs.md b/content/en/flux/monitoring/logs.md new file mode 100644 index 000000000..b0e374556 --- /dev/null +++ b/content/en/flux/monitoring/logs.md @@ -0,0 +1,79 @@ +--- +title: "Flux logs" +linkTitle: "Logs" +description: "How to monitor the Flux logs with Loki and Grafana" +weight: 3 +--- + +The Flux controllers follow the Kubernetes structured logging conventions. + +## Structured logging + +The Flux controllers logs are written to `stderr` in JSON format, with the following common tags: + +- `level` can be `debug`, `info` or `error` +- `ts` timestamp in the ISO 8601 format +- `msg` info or error description +- `error` error details (present when `level` is `error`) +- `controllerGroup` the Flux CR group +- `controllerKind` the Flux CR kind +- `name` The Flux CR name +- `namespace` The Flux CR namespace +- `reconcileID` the UID of the Flux reconcile operation + +### Samples + +Sample of a `info` log produced by kustomize-controller: + +```json +{ + "level": "info", + "ts": "2023-08-16T09:36:41.286Z", + "controllerGroup": "kustomize.toolkit.fluxcd.io", + "controllerKind": "Kustomization", + "name": "redis", + "namespace": "apps", + "msg": "server-side apply completed", + "revision": "main@sha1:30081ad7170fb8168536768fe399493dd43160d7", + "output": { + "ConfigMap/apps/redis": "created", + "Deployment/apps/redis": "configured", + "HorizontalPodAutoscaler/apps/redis": "deleted", + "Service/apps/redis": "unchanged", + "Secret/apps/redis": "skipped" + } +} +``` + +Sample of an `error` log produced by kustomize-controller: + +```json +{ + "level": "error", + "ts": "2023-08-16T09:36:41.286Z", + "controllerGroup": "kustomize.toolkit.fluxcd.io", + "controllerKind": "Kustomization", + "name": "redis", + "namespace": "apps", + "msg": "Reconciliation failed after 2s, next try in 5m0s", + "revision": "main@sha1:f68c334e0f5fae791d1e47dbcabed256f4f89e68", + "error": "Service/apps/redis dry-run failed, reason: Invalid, error: Service redis is invalid: spec.type: Unsupported value: Ingress" +} +``` + +## Log inspection with kubectl + +```shell +kubectl -n flux-system logs deploy/kustomize-controller +``` + +## Log aggregation with Grafana Loki + +To install Grafana Loki and Promtail in the `monitoring` namespace, apply the +[manifests/monitoring/loki-stack](https://github.com/fluxcd/flux2/tree/main/manifests/monitoring/loki-stack). + +### Grafana dashboard + +Control plane logs [http://localhost:3000/d/flux-logs](http://localhost:3000/d/flux-logs/flux-logs): + +![Control plane logs dashboard](/img/logs-dashboard.png) diff --git a/content/en/flux/monitoring/metrics.md b/content/en/flux/monitoring/metrics.md new file mode 100644 index 000000000..27403f018 --- /dev/null +++ b/content/en/flux/monitoring/metrics.md @@ -0,0 +1,69 @@ +--- +title: "Flux Prometheus metrics" +linkTitle: "Metrics" +description: "How to monitor Flux with Prometheus Operator and Grafana" +weight: 1 +--- + +## Reconcile metrics + +Ready status metrics: + +```sh +gotk_reconcile_condition{kind, name, namespace, type="Ready", status="True"} +gotk_reconcile_condition{kind, name, namespace, type="Ready", status="False"} +gotk_reconcile_condition{kind, name, namespace, type="Ready", status="Unknown"} +``` + +Suspend status metrics: + +```sh +gotk_suspend_status{kind, name, namespace} +``` + +Time spent reconciling: + +```sh +gotk_reconcile_duration_seconds_bucket{kind, name, namespace, le} +gotk_reconcile_duration_seconds_sum{kind, name, namespace} +gotk_reconcile_duration_seconds_count{kind, name, namespace} +``` + +## Control plane metrics + +Controller CPU and memory usage: + +```sh +process_cpu_seconds_total{namespace, pod} +container_memory_working_set_bytes{namespace, pod} +``` + +Kubernetes API usage: + +```shell +rest_client_requests_total{namespace, pod} +``` + +Controller runtime: + +```shell +workqueue_longest_running_processor_seconds{name} +controller_runtime_reconcile_total{controller, result} +``` + +## Setup monitoring with kube-prom-stack + +Flux uses [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) +to provide a monitoring stack made out of: + +* **Prometheus Operator** - manages Prometheus clusters atop Kubernetes +* **Prometheus** - collects metrics from the Flux controllers and Kubernetes API +* **Grafana** dashboards - displays the Flux control plane resource usage and reconciliation stats +* **kube-state-metrics** - generates metrics about the state of the Kubernetes objects + +### Alert manager examples + +## Flux Grafana dashboards + +### Grafana annotations +