Skip to content

Commit

Permalink
fix grafana config (#206)
Browse files Browse the repository at this point in the history
* fix grafana config
* mount monitor configmap to grafana container
* upgrade grafana to 4.6.5
* fix grafana configuration behind reverse proxy
* make grafana deployment optional
* add e2e test for monitor
* use env to set grafana server root_url and domain
  • Loading branch information
tennix authored Dec 3, 2018
1 parent 5d23db6 commit ebcb306
Show file tree
Hide file tree
Showing 8 changed files with 126 additions and 9 deletions.
12 changes: 10 additions & 2 deletions charts/tidb-cluster/templates/config/_grafana-config.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,23 @@
;http_port = 3000

# The public facing domain name used to access grafana from a browser
;domain = localhost
{{- if .Values.monitor.grafana.serverDomain }}
domain = {{ .Values.monitor.grafana.serverDomain }}
{{- else }}
domain = localhost
{{- end }}

# Redirect to correct domain if host header does not match domain
# Prevents DNS rebinding attacks
;enforce_domain = false

# The full public facing url you use in browser, used for redirects and emails
# If you use reverse proxy and sub path specify full url (with sub path)
root_url = {{ .Values.grafanaUrl }}
{{- if .Values.monitor.grafana.serverRootUrl }}
root_url = {{ .Values.monitor.grafana.serverRootUrl }}
{{- else }}
root_url = %(protocol)s://%(domain)s:%(http_port)s/
{{- end }}

# Log web requests
;router_logging = false
Expand Down
3 changes: 2 additions & 1 deletion charts/tidb-cluster/templates/monitor-configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ data:

alert-rules-config: |-
{{ tuple "config/_alert-rules-config.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}

{{- if .Values.monitor.grafana.create }}
grafana-config: |-
{{ tuple "config/_grafana-config.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
{{- end }}
{{- end }}
24 changes: 22 additions & 2 deletions charts/tidb-cluster/templates/monitor-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ spec:
- /bin/sh
- -c
- |
mkdir -p /data/prometheus /data/grafana
chmod 777 /data/prometheus /data/grafana
mkdir -p /data/prometheus {{- if .Values.monitor.grafana.create }} /data/grafana {{- end }}
chmod 777 /data/prometheus {{- if .Values.monitor.grafana.create }} /data/grafana {{- end }}
securityContext:
runAsUser: 0
volumeMounts:
Expand Down Expand Up @@ -89,6 +89,7 @@ spec:
readOnly: true
- name: monitor-data
mountPath: /data
{{- if .Values.monitor.grafana.create }}
- name: grafana
image: {{ .Values.monitor.grafana.image }}
imagePullPolicy: {{ .Values.monitor.grafana.imagePullPolicy | default "IfNotPresent" }}
Expand All @@ -101,6 +102,18 @@ spec:
containerPort: 3000
protocol: TCP
env:
# The following two env (SERVER_ROOT_URL and SERVER_DOMAIN) should be configured in configuration file
# but grafana container startup script will chown of configuration directory
# this will be failed because configmap is mounted as readonly volume in container
# so they're temporarily set here as env
{{- if .Values.monitor.grafana.serverRootUrl }}
- name: GF_SERVER_ROOT_URL
value: {{ .Values.monitor.grafana.serverRootUrl | quote }}
{{- end }}
{{- if .Values.monitor.grafana.serverRootUrl }}
- name: GF_SERVER_DOMAIN
value: {{ .Values.monitor.grafana.serverDomain | quote }}
{{- end }}
- name: GF_PATHS_DATA
value: /data/grafana
- name: GF_SECURITY_ADMIN_USER
Expand All @@ -116,8 +129,13 @@ spec:
- name: TZ
value: {{ .Values.timezone | default "UTC" }}
volumeMounts:
# configmap is always mounted as read only volume in container
# grafana startup script will fail with read only configuration directory
# - name: grafana-config
# mountPath: /etc/grafana
- name: monitor-data
mountPath: /data
{{- end }}
volumes:
- name: monitor-data
{{- if .Values.monitor.persistent }}
Expand All @@ -134,12 +152,14 @@ spec:
path: prometheus.yml
- key: alert-rules-config
path: alert.rules
{{- if .Values.monitor.grafana.create }}
- name: grafana-config
configMap:
name: {{ .Values.clusterName }}-monitor
items:
- key: grafana-config
path: grafana.ini
{{- end }}
{{- if .Values.monitor.tolerations }}
tolerations:
{{ toYaml .Values.monitor.tolerations | indent 6 }}
Expand Down
2 changes: 1 addition & 1 deletion charts/tidb-cluster/templates/monitor-job.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{- if .Values.monitor.create }}
{{- if (.Values.monitor.create) and (.Values.monitor.grafana.create) }}
apiVersion: batch/v1
kind: Job
metadata:
Expand Down
2 changes: 1 addition & 1 deletion charts/tidb-cluster/templates/monitor-secret.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{- if .Values.monitor.create }}
{{- if (.Values.monitor.create) and (.Values.monitor.grafana.create) }}
apiVersion: v1
kind: Secret
metadata:
Expand Down
2 changes: 2 additions & 0 deletions charts/tidb-cluster/templates/monitor-service.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{{- if .Values.monitor.create }}
{{- if .Values.monitor.grafana.create }}
apiVersion: v1
kind: Service
metadata:
Expand All @@ -20,6 +21,7 @@ spec:
app.kubernetes.io/name: {{ template "chart.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: monitor
{{- end }}
---
apiVersion: v1
kind: Service
Expand Down
8 changes: 6 additions & 2 deletions charts/tidb-cluster/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ monitor:
image: pingcap/tidb-dashboard-installer:v2.0.0
imagePullPolicy: IfNotPresent
grafana:
image: grafana/grafana:4.6.3
create: true
image: grafana/grafana:4.6.5
imagePullPolicy: IfNotPresent
logLevel: info
resources: {}
Expand All @@ -177,7 +178,10 @@ monitor:
password: admin
service:
type: NodePort
grafanaUrl: http://localhost:3000
# if grafana is running behind a reverse proxy with subpath http://foo.bar/grafana
# config the `serverDomain` and `serverRootUrl` as follows
# serverDomain: foo.bar
# serverRootUrl: "%(protocol)s://%(domain)s/grafana/"
prometheus:
image: prom/prometheus:v2.2.1
imagePullPolicy: IfNotPresent
Expand Down
82 changes: 82 additions & 0 deletions tests/e2e/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@ package e2e

import (
"database/sql"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"net/url"
"strconv"
"strings"
"time"
Expand All @@ -32,9 +36,25 @@ import (
)

const (
username = "admin"
password = "admin"
)

type Result struct {
Metric struct {
Job string `json:"job"`
} `json:"metric"`
Values []interface{} `json:"values"`
}

type Response struct {
Status string `json:"status"`
Data struct {
ResultType string `json:"resultType"`
Result []Result `json:"result"`
}
}

func testCreate(ns, clusterName string) {
By(fmt.Sprintf("When create the TiDB cluster: %s/%s", ns, clusterName))
instanceName := getInstanceName(ns, clusterName)
Expand Down Expand Up @@ -112,6 +132,11 @@ func allMembersRunning(ns, clusterName string) (bool, error) {
return false, nil
}

running, err = monitorMemberRunning(tc)
if err != nil || !running {
return false, nil
}

return true, nil
}

Expand Down Expand Up @@ -331,6 +356,63 @@ func tidbMemberRunning(tc *v1alpha1.TidbCluster) (bool, error) {
return true, nil
}

func monitorMemberRunning(tc *v1alpha1.TidbCluster) (bool, error) {
ns := tc.GetNamespace()
tcName := tc.GetName()
deployName := fmt.Sprintf("%s-monitor", tcName)
deploy, err := kubeCli.AppsV1beta1().Deployments(ns).Get(deployName, metav1.GetOptions{})
if err != nil {
logf(err.Error())
return false, nil
}
if deploy.Status.ReadyReplicas < 1 {
logf("monitor ready replicas %d < 1", deploy.Status.ReadyReplicas)
return false, nil
}
if err := checkGrafanaData(tc); err != nil {
logf("can't get grafana data: %v", err)
return false, nil
}
return true, nil
}

func checkGrafanaData(tc *v1alpha1.TidbCluster) error {
ns := tc.GetNamespace()
tcName := tc.GetName()
svcName := fmt.Sprintf("%s-grafana", tcName)
end := time.Now()
start := end.Add(-time.Minute)
values := url.Values{}
values.Set("query", `sum(tikv_pd_heartbeat_tick_total{type="leader"}) by (job)`)
values.Set("start", fmt.Sprintf("%d", start.Unix()))
values.Set("end", fmt.Sprintf("%d", end.Unix()))
values.Set("step", "30")
u := fmt.Sprintf("http://%s.%s.svc.cluster.local:3000/api/datasources/proxy/1/api/v1/query_range?%s", svcName, ns, values.Encode())
req, err := http.NewRequest(http.MethodGet, u, nil)
if err != nil {
return err
}
req.SetBasicAuth(username, password)
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
buf, err := ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
data := &Response{}
if err := json.Unmarshal(buf, data); err != nil {
return err
}
if data.Status != "success" || len(data.Data.Result) < 1 {
return fmt.Errorf("invalid response: status: %s, result: %v", data.Status, data.Data.Result)
}
return nil
}

func reclaimPolicySynced(tc *v1alpha1.TidbCluster) (bool, error) {
ns := tc.GetNamespace()
instanceName := tc.GetLabels()[label.InstanceLabelKey]
Expand Down

0 comments on commit ebcb306

Please sign in to comment.