diff --git a/.github/workflows/contrib.yaml b/.github/workflows/contrib.yaml new file mode 100644 index 00000000000..612580ce246 --- /dev/null +++ b/.github/workflows/contrib.yaml @@ -0,0 +1,11 @@ +name: Test contrib/mixin +on: [push, pull_request] +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-go@v2 + with: + go-version: "1.17.6" + - run: make -C contrib/mixin tools test diff --git a/.gitignore b/.gitignore index dbb48c6e153..ab1bbe4ceb6 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ *.test hack/tls-setup/certs .idea +/contrib/mixin/manifests /contrib/raftexample/raftexample /contrib/raftexample/raftexample-* /vendor @@ -22,4 +23,4 @@ hack/tls-setup/certs *.bak .gobincache/ /Documentation/dev-guide/api_reference_v3.md -/Documentation/dev-guide/api_concurrency_reference_v3.md \ No newline at end of file +/Documentation/dev-guide/api_concurrency_reference_v3.md diff --git a/contrib/mixin/Makefile b/contrib/mixin/Makefile new file mode 100644 index 00000000000..843215b00c4 --- /dev/null +++ b/contrib/mixin/Makefile @@ -0,0 +1,23 @@ +.PHONY: tools manifests test clean + +OS := linux +ARCH ?= amd64 +PROMETHEUS_VERSION := 2.33.1 + +tools: + go install github.com/google/go-jsonnet/cmd/jsonnet@latest + go install github.com/brancz/gojsontoyaml@latest + wget -qO- "https://github.com/prometheus/prometheus/releases/download/v${PROMETHEUS_VERSION}/prometheus-${PROMETHEUS_VERSION}.${OS}-${ARCH}.tar.gz" |\ + tar xvz --strip-components=1 -C "$$(go env GOPATH)/bin" prometheus-${PROMETHEUS_VERSION}.${OS}-${ARCH}/promtool + +manifests: manifests/etcd-prometheusRules.yaml + +manifests/etcd-prometheusRules.yaml: + mkdir -p manifests + jsonnet -e '(import "mixin.libsonnet").prometheusAlerts' | gojsontoyaml > manifests/etcd-prometheusRules.yaml + +test: manifests/etcd-prometheusRules.yaml + promtool test rules test.yaml + +clean: + rm -rf manifests/*.yaml diff --git a/contrib/mixin/README.md b/contrib/mixin/README.md index 224066f457d..2ec70004cda 100644 --- a/contrib/mixin/README.md +++ b/contrib/mixin/README.md @@ -12,11 +12,15 @@ Instructions for use are the same as the [kubernetes-mixin](https://github.com/k ## Testing alerts -Make sure to have [jsonnet](https://jsonnet.org/) and [gojsontoyaml](https://github.com/brancz/gojsontoyaml) installed. +Make sure to have [jsonnet](https://jsonnet.org/) and [gojsontoyaml](https://github.com/brancz/gojsontoyaml) installed. You can fetch it via + +``` +make tools +``` First compile the mixin to a YAML file, which the promtool will read: ``` -jsonnet -e '(import "mixin.libsonnet").prometheusAlerts' | gojsontoyaml > mixin.yaml +make manifests ``` Then run the unit test: diff --git a/contrib/mixin/test.yaml b/contrib/mixin/test.yaml index 24162bd4d4e..e8f53f55007 100644 --- a/contrib/mixin/test.yaml +++ b/contrib/mixin/test.yaml @@ -1,5 +1,5 @@ rule_files: - - mixin.yaml + - manifests/etcd-prometheusRules.yaml evaluation_interval: 1m @@ -86,14 +86,15 @@ tests: exp_annotations: description: 'etcd cluster "etcd": members are down (1).' summary: 'etcd cluster members are down.' + - interval: 1m input_series: - series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.0"}' values: '0 0 2 0 0 1 0 0 0 0 0 0 0 0 0 0' - series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.1"}' - values: '0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0' + values: '0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0' - series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.2"}' - values: '0 0 0 0 0 0 0 0' + values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' alert_rule_test: - eval_time: 10m alertname: etcdHighNumberOfLeaderChanges @@ -111,25 +112,34 @@ tests: - series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.1"}' values: '0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0' - series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.2"}' - values: '0 0 0 0 0 0 0 0' + values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' alert_rule_test: - eval_time: 10m alertname: etcdHighNumberOfLeaderChanges exp_alerts: + - interval: 1m input_series: - - series: '((etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100){job="etcd",instance="10.10.10.0"}' - values: '0 10 20 0 0 10 0 0 30 0 0 0 0 0 0 0' - - series: '((etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100){job="etcd",instance="10.10.10.1"}' - values: '0 0 10 0 20 0 0 0 0 0 0 0 0 0 0 0' - - series: '((etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100){job="etcd",instance="10.10.10.2"}' - values: '0 0 0 0 0 0 0 0' + - series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.0"}' + values: '0 1 2 0 0 1 0 3 0 0 0 0 0 0 0 0' + - series: 'etcd_server_quota_backend_bytes{job="etcd",instance="10.10.10.0"}' + values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1' + - series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.1"}' + values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' + - series: 'etcd_server_quota_backend_bytes{job="etcd",instance="10.10.10.1"}' + values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1' + - series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.2"}' + values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' + - series: 'etcd_server_quota_backend_bytes{job="etcd",instance="10.10.10.2"}' + values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1' alert_rule_test: - - eval_time: 10m + - eval_time: 11m alertname: etcdExcessiveDatabaseGrowth exp_alerts: - exp_labels: + instance: '10.10.10.0' job: etcd severity: warning exp_annotations: - message: 'etcd cluster "etcd": Observed surge in etcd writes leading to 50% increase in database size over the past four hours, please check as it might be disruptive.' + description: 'etcd cluster "etcd": Observed surge in etcd writes leading to 50% increase in database size over the past four hours on etcd instance 10.10.10.0, please check as it might be disruptive.' + summary: 'etcd cluster database growing very fast.'