From cba67ca81cfa435a442ce016d565ae571bf8b246 Mon Sep 17 00:00:00 2001 From: nexustar Date: Thu, 24 Mar 2022 15:04:40 +0800 Subject: [PATCH] cluster: add ngm metrics to promtheus config (#1806) --- embed/templates/config/prometheus.yml.tpl | 49 ++++++++------------- pkg/cluster/ansible/inventory.go | 2 - pkg/cluster/spec/monitoring.go | 53 +++++++++++++++-------- pkg/cluster/template/config/prometheus.go | 24 +++------- 4 files changed, 57 insertions(+), 71 deletions(-) diff --git a/embed/templates/config/prometheus.yml.tpl b/embed/templates/config/prometheus.yml.tpl index 68e31bfdfc..6dec42b071 100644 --- a/embed/templates/config/prometheus.yml.tpl +++ b/embed/templates/config/prometheus.yml.tpl @@ -40,9 +40,6 @@ rule_files: {{- if .CDCAddrs}} - 'ticdc.rules.yml' {{- end}} -{{- if .KafkaAddrs}} - - 'kafka.rules.yml' -{{- end}} {{- if .LightningAddrs}} - 'lightning.rules.yml' {{- end}} @@ -171,13 +168,6 @@ scrape_configs: {{- end}} {{- end}} {{- if .PumpAddrs}} -{{- if .KafkaExporterAddr}} - - job_name: 'kafka_exporter' - honor_labels: true # don't overwrite job & instance labels - static_configs: - - targets: - - '{{.KafkaExporterAddr}}' -{{- end}} - job_name: 'pump' honor_labels: true # don't overwrite job & instance labels {{- if .TLSEnabled}} @@ -218,22 +208,6 @@ scrape_configs: module: [tcp_connect] {{- end}} static_configs: -{{- if .KafkaAddrs}} - - targets: - {{- range .KafkaAddrs}} - - '{{.}}' - {{- end}} - labels: - group: 'kafka' -{{- end}} -{{- if .ZookeeperAddrs}} - - targets: - {{- range .ZookeeperAddrs}} - - '{{.}}' - {{- end}} - labels: - group: 'zookeeper' -{{- end}} - targets: {{- range .PumpAddrs}} - '{{.}}' @@ -246,12 +220,6 @@ scrape_configs: {{- end}} labels: group: 'drainer' -{{- if .KafkaExporterAddr}} - - targets: - - '{{.KafkaExporterAddr}}' - labels: - group: 'kafka_exporter' -{{- end}} relabel_configs: - source_labels: [__address__] target_label: __param_target @@ -276,6 +244,23 @@ scrape_configs: {{- range .CDCAddrs}} - '{{.}}' {{- end}} +{{- end}} +{{- if .NGMonitoringAddrs}} + - job_name: "ng-monitoring" + honor_labels: true # don't overwrite job & instance labels +{{- if .TLSEnabled}} + scheme: https + tls_config: + insecure_skip_verify: false + ca_file: ../tls/ca.crt + cert_file: ../tls/prometheus.crt + key_file: ../tls/prometheus.pem +{{- end}} + static_configs: + - targets: +{{- range .NGMonitoringAddrs}} + - '{{.}}' +{{- end}} {{- end}} - job_name: "tidb_port_probe" scrape_interval: 30s diff --git a/pkg/cluster/ansible/inventory.go b/pkg/cluster/ansible/inventory.go index 02ba1c8ada..b40ae092e5 100644 --- a/pkg/cluster/ansible/inventory.go +++ b/pkg/cluster/ansible/inventory.go @@ -514,8 +514,6 @@ func parseGroupVars(ctx context.Context, dir, ansCfgFile string, clsMeta *spec.C logger.Infof("Imported %d Grafana node(s).", len(clsMeta.Topology.Grafanas)) } - // kafka_exporter_servers - // pump_servers if grp, ok := inv.Groups["pump_servers"]; ok && len(grp.Hosts) > 0 { /* diff --git a/pkg/cluster/spec/monitoring.go b/pkg/cluster/spec/monitoring.go index 32d2f66372..9587cf4a8d 100644 --- a/pkg/cluster/spec/monitoring.go +++ b/pkg/cluster/spec/monitoring.go @@ -201,14 +201,11 @@ func (i *MonitorInstance) InitConfig( } uniqueHosts := set.NewStringSet() - ngcfg := config.NewNgMonitoringConfig(clusterName, clusterVersion, enableTLS) - if servers, found := topoHasField("PDServers"); found { for i := 0; i < servers.Len(); i++ { pd := servers.Index(i).Interface().(*PDSpec) uniqueHosts.Insert(pd.Host) cfig.AddPD(pd.Host, uint64(pd.ClientPort)) - ngcfg.AddPD(pd.Host, uint64(pd.ClientPort)) } } if servers, found := topoHasField("TiKVServers"); found { @@ -254,6 +251,12 @@ func (i *MonitorInstance) InitConfig( cfig.AddCDC(cdc.Host, uint64(cdc.Port)) } } + if servers, found := topoHasField("Monitors"); found { + for i := 0; i < servers.Len(); i++ { + monitoring := servers.Index(i).Interface().(*PrometheusSpec) + uniqueHosts.Insert(monitoring.Host) + } + } if servers, found := topoHasField("Grafanas"); found { for i := 0; i < servers.Len(); i++ { grafana := servers.Index(i).Interface().(*GrafanaSpec) @@ -298,14 +301,13 @@ func (i *MonitorInstance) InitConfig( if err != nil { return err } + cfig.SetRemoteConfig(string(remoteCfg)) // doesn't work if _, err := i.setTLSConfig(ctx, false, nil, paths); err != nil { return err } - cfig.SetRemoteConfig(string(remoteCfg)) - for _, alertmanager := range spec.ExternalAlertmanagers { cfig.AddAlertmanager(alertmanager.Host, uint64(alertmanager.WebPort)) } @@ -329,19 +331,34 @@ func (i *MonitorInstance) InitConfig( return err } - ngcfg.AddIP(i.GetHost()) - ngcfg.AddPort(spec.NgPort) - ngcfg.AddDeployDir(paths.Deploy) - ngcfg.AddDataDir(paths.Data[0]) - ngcfg.AddLog(paths.Log) - - fp = filepath.Join(paths.Cache, fmt.Sprintf("ngmonitoring_%s_%d.toml", i.GetHost(), i.GetPort())) - if err := ngcfg.ConfigToFile(fp); err != nil { - return err - } - dst = filepath.Join(paths.Deploy, "conf", "ngmonitoring.toml") - if err := e.Transfer(ctx, fp, dst, false, 0, false); err != nil { - return err + if spec.NgPort > 0 { + ngcfg := config.NewNgMonitoringConfig(clusterName, clusterVersion, enableTLS) + if servers, found := topoHasField("PDServers"); found { + for i := 0; i < servers.Len(); i++ { + pd := servers.Index(i).Interface().(*PDSpec) + ngcfg.AddPD(pd.Host, uint64(pd.ClientPort)) + } + } + ngcfg.AddIP(i.GetHost()). + AddPort(spec.NgPort). + AddDeployDir(paths.Deploy). + AddDataDir(paths.Data[0]). + AddLog(paths.Log) + + if servers, found := topoHasField("Monitors"); found { + for i := 0; i < servers.Len(); i++ { + monitoring := servers.Index(i).Interface().(*PrometheusSpec) + cfig.AddNGMonitoring(monitoring.Host, uint64(monitoring.NgPort)) + } + } + fp = filepath.Join(paths.Cache, fmt.Sprintf("ngmonitoring_%s_%d.toml", i.GetHost(), i.GetPort())) + if err := ngcfg.ConfigToFile(fp); err != nil { + return err + } + dst = filepath.Join(paths.Deploy, "conf", "ngmonitoring.toml") + if err := e.Transfer(ctx, fp, dst, false, 0, false); err != nil { + return err + } } fp = filepath.Join(paths.Cache, fmt.Sprintf("prometheus_%s_%d.yml", i.GetHost(), i.GetPort())) diff --git a/pkg/cluster/template/config/prometheus.go b/pkg/cluster/template/config/prometheus.go index cba903a515..b21cb2846f 100644 --- a/pkg/cluster/template/config/prometheus.go +++ b/pkg/cluster/template/config/prometheus.go @@ -28,7 +28,6 @@ import ( type PrometheusConfig struct { ClusterName string TLSEnabled bool - KafkaAddrs []string NodeExporterAddrs []string TiDBStatusAddrs []string TiKVStatusAddrs []string @@ -38,14 +37,13 @@ type PrometheusConfig struct { PumpAddrs []string DrainerAddrs []string CDCAddrs []string - ZookeeperAddrs []string BlackboxExporterAddrs []string LightningAddrs []string MonitoredServers []string AlertmanagerAddrs []string + NGMonitoringAddrs []string PushgatewayAddr string BlackboxAddr string - KafkaExporterAddr string GrafanaAddr string HasTiKVAccelerateRules bool @@ -70,12 +68,6 @@ func NewPrometheusConfig(clusterName, clusterVersion string, enableTLS bool) *Pr return cfg } -// AddKafka add a kafka address -func (c *PrometheusConfig) AddKafka(ip string, port uint64) *PrometheusConfig { - c.KafkaAddrs = append(c.KafkaAddrs, fmt.Sprintf("%s:%d", ip, port)) - return c -} - // AddNodeExpoertor add a node expoter address func (c *PrometheusConfig) AddNodeExpoertor(ip string, port uint64) *PrometheusConfig { c.NodeExporterAddrs = append(c.NodeExporterAddrs, fmt.Sprintf("%s:%d", ip, port)) @@ -130,12 +122,6 @@ func (c *PrometheusConfig) AddCDC(ip string, port uint64) *PrometheusConfig { return c } -// AddZooKeeper add a zookeeper address -func (c *PrometheusConfig) AddZooKeeper(ip string, port uint64) *PrometheusConfig { - c.ZookeeperAddrs = append(c.ZookeeperAddrs, fmt.Sprintf("%s:%d", ip, port)) - return c -} - // AddBlackboxExporter add a BlackboxExporter address func (c *PrometheusConfig) AddBlackboxExporter(ip string, port uint64) *PrometheusConfig { c.BlackboxExporterAddrs = append(c.BlackboxExporterAddrs, fmt.Sprintf("%s:%d", ip, port)) @@ -172,13 +158,13 @@ func (c *PrometheusConfig) AddBlackbox(ip string, port uint64) *PrometheusConfig return c } -// AddKafkaExporter add an kafka exporter address -func (c *PrometheusConfig) AddKafkaExporter(ip string, port uint64) *PrometheusConfig { - c.KafkaExporterAddr = fmt.Sprintf("%s:%d", ip, port) +// AddNGMonitoring add an ng-monitoring server exporter address +func (c *PrometheusConfig) AddNGMonitoring(ip string, port uint64) *PrometheusConfig { + c.NGMonitoringAddrs = append(c.NGMonitoringAddrs, fmt.Sprintf("%s:%d", ip, port)) return c } -// AddGrafana add an kafka exporter address +// AddGrafana add an Grafana address func (c *PrometheusConfig) AddGrafana(ip string, port uint64) *PrometheusConfig { c.GrafanaAddr = fmt.Sprintf("%s:%d", ip, port) return c