-
Notifications
You must be signed in to change notification settings - Fork 823
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Prometheus and grafana improvements based on load testing
- made prometheus PVC size configurable on the command line - moved majority of Prometheus config overrides to separate yaml file. - removed scraping of container stats from prometheus config, otherwise clusters of 10K pods are very quickly consuming tons of space - added taints and tolerations to prometheus and grafana They will now will prefer (but not require) to be scheduled on nodes labeled with `stable.agones.dev/agones-metrics: true`. They will also tolerate taint `stable.agones.dev/agones-metrics=true:NoExecute`. Creating node pool dedicated for monitoring is as simple as: ``` gcloud container node-pools create agones-metrics ... \ --node-taints stable.agones.dev/agones-metrics=true:NoExecute \ --node-labels stable.agones.dev/agones-metrics=true \ --num-nodes=1 ```
- Loading branch information
Showing
5 changed files
with
166 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
alertmanager: | ||
enabled: false | ||
nodeExporter: | ||
enabled: false | ||
kubeStateMetrics: | ||
enabled: false | ||
pushgateway: | ||
enabled: false | ||
server: | ||
resources: | ||
requests: | ||
memory: 4Gi | ||
cpu: 2 | ||
tolerations: | ||
- key: "stable.agones.dev/agones-metrics" | ||
operator: "Equal" | ||
value: "true" | ||
effect: "NoExecute" | ||
affinity: | ||
nodeAffinity: | ||
preferredDuringSchedulingIgnoredDuringExecution: | ||
- weight: 1 | ||
preference: | ||
matchExpressions: | ||
- key: stable.agones.dev/agones-metrics | ||
operator: Exists | ||
serverFiles: | ||
prometheus.yml: | ||
rule_files: | ||
- /etc/config/rules | ||
- /etc/config/alerts | ||
|
||
scrape_configs: | ||
- job_name: prometheus | ||
static_configs: | ||
- targets: | ||
- localhost:9090 | ||
|
||
# A scrape configuration for running Prometheus on a Kubernetes cluster. | ||
# This uses separate scrape configs for cluster components (i.e. API server, node) | ||
# and services to allow each to use different authentication configs. | ||
# | ||
# Kubernetes labels will be added as Prometheus labels on metrics via the | ||
# `labelmap` relabeling action. | ||
|
||
# Scrape config for API servers. | ||
# | ||
# Kubernetes exposes API servers as endpoints to the default/kubernetes | ||
# service so this uses `endpoints` role and uses relabelling to only keep | ||
# the endpoints associated with the default/kubernetes service using the | ||
# default named port `https`. This works for single API server deployments as | ||
# well as HA API server deployments. | ||
- job_name: 'kubernetes-apiservers' | ||
|
||
kubernetes_sd_configs: | ||
- role: endpoints | ||
|
||
# Default to scraping over https. If required, just disable this or change to | ||
# `http`. | ||
scheme: https | ||
|
||
# This TLS & bearer token file config is used to connect to the actual scrape | ||
# endpoints for cluster components. This is separate to discovery auth | ||
# configuration because discovery & scraping are two separate concerns in | ||
# Prometheus. The discovery auth config is automatic if Prometheus runs inside | ||
# the cluster. Otherwise, more config options have to be provided within the | ||
# <kubernetes_sd_config>. | ||
tls_config: | ||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt | ||
# If your node certificates are self-signed or use a different CA to the | ||
# master CA, then disable certificate verification below. Note that | ||
# certificate verification is an integral part of a secure infrastructure | ||
# so this should only be disabled in a controlled environment. You can | ||
# disable certificate verification by uncommenting the line below. | ||
# | ||
insecure_skip_verify: true | ||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token | ||
|
||
# Keep only the default/kubernetes service endpoints for the https port. This | ||
# will add targets for each API server which Kubernetes adds an endpoint to | ||
# the default/kubernetes service. | ||
relabel_configs: | ||
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] | ||
action: keep | ||
regex: default;kubernetes;https | ||
|
||
# Example scrape config for pods | ||
# | ||
# The relabeling allows the actual pod scrape endpoint to be configured via the | ||
# following annotations: | ||
# | ||
# * `prometheus.io/scrape`: Only scrape pods that have a value of `true` | ||
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this. | ||
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`. | ||
- job_name: 'kubernetes-pods' | ||
|
||
kubernetes_sd_configs: | ||
- role: pod | ||
|
||
relabel_configs: | ||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] | ||
action: keep | ||
regex: true | ||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] | ||
action: replace | ||
target_label: __metrics_path__ | ||
regex: (.+) | ||
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] | ||
action: replace | ||
regex: ([^:]+)(?::\d+)?;(\d+) | ||
replacement: $1:$2 | ||
target_label: __address__ | ||
- action: labelmap | ||
regex: __meta_kubernetes_pod_label_(.+) | ||
- source_labels: [__meta_kubernetes_namespace] | ||
action: replace | ||
target_label: kubernetes_namespace | ||
- source_labels: [__meta_kubernetes_pod_name] | ||
action: replace | ||
target_label: kubernetes_pod_name |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters