diff --git a/pkg/apis/pingcap/v1alpha1/tiflash_config.go b/pkg/apis/pingcap/v1alpha1/tiflash_config.go index b8b9545c6e..f111d4563d 100644 --- a/pkg/apis/pingcap/v1alpha1/tiflash_config.go +++ b/pkg/apis/pingcap/v1alpha1/tiflash_config.go @@ -32,7 +32,9 @@ type TiFlashConfig struct { // +k8s:openapi-gen=false type FlashServerConfig struct { // +optional - EngineAddr string `json:"engine-addr,omitempty" toml:"engine-addr,omitempty"` + EngineAddr string `json:"engine-addr,omitempty" toml:"engine-addr,omitempty"` + // +optional + StatusAddr string `json:"status-addr,omitempty" toml:"status-addr,omitempty"` TiKVServerConfig `json:",inline"` } diff --git a/pkg/controller/controller_utils.go b/pkg/controller/controller_utils.go index 6ee5f55e69..c153df3dd9 100644 --- a/pkg/controller/controller_utils.go +++ b/pkg/controller/controller_utils.go @@ -297,6 +297,15 @@ func AnnProm(port int32) map[string]string { } } +// AnnAdditionalProm adds additional prometheus scarping configuration annotation for the pod +// which has multiple metrics endpoint +// we assumes that the metrics path is as same as the previous metrics path +func AnnAdditionalProm(name string, port int32) map[string]string { + return map[string]string{ + fmt.Sprintf("%s.prometheus.io/port", name): fmt.Sprintf("%d", port), + } +} + func ParseStorageRequest(req corev1.ResourceList) (corev1.ResourceRequirements, error) { if req == nil { return corev1.ResourceRequirements{}, nil diff --git a/pkg/manager/member/tiflash_member_manager.go b/pkg/manager/member/tiflash_member_manager.go index 533b8ba052..816dcc0ce7 100644 --- a/pkg/manager/member/tiflash_member_manager.go +++ b/pkg/manager/member/tiflash_member_manager.go @@ -420,6 +420,7 @@ func getNewStatefulSet(tc *v1alpha1.TidbCluster, cm *corev1.ConfigMap) (*apps.St tiflashLabel := labelTiFlash(tc) setName := controller.TiFlashMemberName(tcName) podAnnotations := CombineAnnotations(controller.AnnProm(8234), baseTiFlashSpec.Annotations()) + podAnnotations = CombineAnnotations(controller.AnnAdditionalProm("tiflash.proxy", 20292), podAnnotations) stsAnnotations := getStsAnnotations(tc, label.TiFlashLabelVal) capacity := controller.TiKVCapacity(tc.Spec.TiFlash.Limits) headlessSvcName := controller.TiFlashPeerMemberName(tcName) diff --git a/pkg/manager/member/tiflash_util.go b/pkg/manager/member/tiflash_util.go index 7e32223d92..7eb520408e 100644 --- a/pkg/manager/member/tiflash_util.go +++ b/pkg/manager/member/tiflash_util.go @@ -140,7 +140,11 @@ func setTiFlashProxyConfigDefault(config *v1alpha1.ProxyConfig, clusterName, ns if config.Server.EngineAddr == "" { config.Server.EngineAddr = fmt.Sprintf("%s-POD_NUM.%s.%s.svc:3930", controller.TiFlashMemberName(clusterName), controller.TiFlashPeerMemberName(clusterName), ns) } + if config.Server.StatusAddr == "" { + config.Server.StatusAddr = "0.0.0.0:20292" + } } + func setTiFlashCommonConfigDefault(config *v1alpha1.CommonConfig, clusterName, ns string) { if config.TmpPath == "" { config.TmpPath = "/data0/tmp" diff --git a/pkg/manager/member/tiflash_util_test.go b/pkg/manager/member/tiflash_util_test.go index 032259be96..4a9a55e4e0 100644 --- a/pkg/manager/member/tiflash_util_test.go +++ b/pkg/manager/member/tiflash_util_test.go @@ -120,6 +120,7 @@ var ( LogLevel: "info", Server: &v1alpha1.FlashServerConfig{ EngineAddr: "test-tiflash-POD_NUM.test-tiflash-peer.test.svc:3930", + StatusAddr: "0.0.0.0:20292", }, }, } @@ -217,6 +218,7 @@ var ( LogLevel: "info1", Server: &v1alpha1.FlashServerConfig{ EngineAddr: "test-tiflash-POD_NUM.test-tiflash-peer.test.svc:3930", + StatusAddr: "0.0.0.0:20292", }, }, } diff --git a/pkg/monitor/monitor/template.go b/pkg/monitor/monitor/template.go index 7cde743fda..6e5c3ea967 100644 --- a/pkg/monitor/monitor/template.go +++ b/pkg/monitor/monitor/template.go @@ -15,7 +15,6 @@ package monitor import ( "fmt" - "github.com/pingcap/tidb-operator/pkg/label" "github.com/pingcap/tidb-operator/pkg/util" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/config" @@ -27,15 +26,14 @@ import ( ) const ( - instanceLabel = "__meta_kubernetes_pod_label_app_kubernetes_io_instance" - componentLabel = "__meta_kubernetes_pod_label_app_kubernetes_io_component" - scrapeLabel = "__meta_kubernetes_pod_annotation_prometheus_io_scrape" - metricsPathLabel = "__meta_kubernetes_pod_annotation_prometheus_io_path" - ioPortLabel = "__meta_kubernetes_pod_annotation_prometheus_io_port" - namespaceLabel = "__meta_kubernetes_namespace" - podNameLabel = "__meta_kubernetes_pod_name" - nodeNameLabel = "__meta_kubernetes_pod_node_name" - podIPLabel = "__meta_kubernetes_pod_ip" + instanceLabel = "__meta_kubernetes_pod_label_app_kubernetes_io_instance" + componentLabel = "__meta_kubernetes_pod_label_app_kubernetes_io_component" + scrapeLabel = "__meta_kubernetes_pod_annotation_prometheus_io_scrape" + metricsPathLabel = "__meta_kubernetes_pod_annotation_prometheus_io_path" + portLabel = "__meta_kubernetes_pod_annotation_prometheus_io_port" + namespaceLabel = "__meta_kubernetes_namespace" + podNameLabel = "__meta_kubernetes_pod_name" + additionalPortLabelPattern = "__meta_kubernetes_pod_annotation_%s_prometheus_io_port" ) var ( @@ -46,6 +44,7 @@ var ( pdPattern config.Regexp tidbPattern config.Regexp addressPattern config.Regexp + tiflashPattern config.Regexp dashBoardConfig = `{ "apiVersion": 1, "providers": [ @@ -92,6 +91,10 @@ func init() { if err != nil { klog.Fatalf("monitor regex template parse error,%v", err) } + tiflashPattern, err = config.NewRegexp("tiflash") + if err != nil { + klog.Fatalf("monitor regex template parse error,%v", err) + } } type MonitorConfigModel struct { @@ -102,6 +105,13 @@ type MonitorConfigModel struct { } func newPrometheusConfig(cmodel *MonitorConfigModel) *config.Config { + pdReplacement := fmt.Sprintf("$1.$2-%s-peer:$3", "pd") + tikvReplacement := fmt.Sprintf("$1.$2-%s-peer:$3", "tikv") + tidbReplacement := fmt.Sprintf("$1.$2-%s-peer:$3", "tidb") + tiflashReplacement := fmt.Sprintf("$1.$2-%s-peer:$3", "tiflash") + tiflashProxyReplacement := fmt.Sprintf("$1.$2-%s-peer:$3", "tiflash") + tiflashProxyPortLabel := fmt.Sprintf(additionalPortLabelPattern, "tiflash_proxy") + var c = config.Config{ GlobalConfig: config.GlobalConfig{ ScrapeInterval: model.Duration(15 * time.Second), @@ -111,42 +121,47 @@ func newPrometheusConfig(cmodel *MonitorConfigModel) *config.Config { "/prometheus-rules/rules/*.rules.yml", }, ScrapeConfigs: []*config.ScrapeConfig{ - scrapeJob("pd", pdPattern, cmodel), - scrapeJob("tidb", tidbPattern, cmodel), - scrapeJob("tikv", tikvPattern, cmodel), + scrapeJob("pd", pdPattern, cmodel, buildAddressRelabelConfig(portLabel, pdReplacement, true)), + scrapeJob("tidb", tidbPattern, cmodel, buildAddressRelabelConfig(portLabel, tidbReplacement, true)), + scrapeJob("tikv", tikvPattern, cmodel, buildAddressRelabelConfig(portLabel, tikvReplacement, true)), + scrapeJob("tiflash", tiflashPattern, cmodel, buildAddressRelabelConfig(portLabel, tiflashReplacement, true)), + scrapeJob("tiflash-proxy", tiflashPattern, cmodel, buildAddressRelabelConfig(tiflashProxyPortLabel, tiflashProxyReplacement, true)), }, } return &c } -func scrapeJob(name string, componentPattern config.Regexp, cmodel *MonitorConfigModel) *config.ScrapeConfig { - +func buildAddressRelabelConfig(portLabelName, replacement string, isTidbClusterComponent bool) *config.RelabelConfig { addressRelabelConfig := &config.RelabelConfig{ SourceLabels: model.LabelNames{ "__address__", - ioPortLabel, + model.LabelName(portLabelName), }, Action: config.RelabelReplace, Regex: portPattern, Replacement: "$1:$2", TargetLabel: "__address__", } - if name == label.PDLabelVal || name == label.TiDBLabelVal || name == label.TiKVLabelVal { + if isTidbClusterComponent { addressRelabelConfig = &config.RelabelConfig{ + Action: config.RelabelReplace, + Regex: addressPattern, + Replacement: replacement, + TargetLabel: "__address__", SourceLabels: model.LabelNames{ podNameLabel, instanceLabel, - ioPortLabel, + model.LabelName(portLabelName), }, - Action: config.RelabelReplace, - Regex: addressPattern, - Replacement: fmt.Sprintf("$1.$2-%s-peer:$3", name), - TargetLabel: "__address__", } } + return addressRelabelConfig +} + +func scrapeJob(jobName string, componentPattern config.Regexp, cmodel *MonitorConfigModel, addressRelabelConfig *config.RelabelConfig) *config.ScrapeConfig { return &config.ScrapeConfig{ - JobName: name, + JobName: jobName, ScrapeInterval: model.Duration(15 * time.Second), Scheme: "http", HonorLabels: true, @@ -257,6 +272,7 @@ func addAlertManagerUrl(pc *config.Config, cmodel *MonitorConfigModel) { func addTlsConfig(pc *config.Config) { for id, sconfig := range pc.ScrapeConfigs { + // TODO support tiflash tls when it gets ready if sconfig.JobName == "pd" || sconfig.JobName == "tidb" || sconfig.JobName == "tikv" { sconfig.HTTPClientConfig.TLSConfig = config.TLSConfig{ CAFile: path.Join(util.ClusterClientTLSPath, corev1.ServiceAccountRootCAKey), diff --git a/pkg/monitor/monitor/template_test.go b/pkg/monitor/monitor/template_test.go index 6ae8fb3692..02dcd5ac38 100644 --- a/pkg/monitor/monitor/template_test.go +++ b/pkg/monitor/monitor/template_test.go @@ -177,6 +177,102 @@ scrape_configs: - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] target_label: cluster action: replace +- job_name: tiflash + honor_labels: true + scrape_interval: 15s + scheme: http + kubernetes_sd_configs: + - api_server: null + role: pod + namespaces: + names: + - ns1 + - ns2 + tls_config: + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] + regex: target + action: keep + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_component] + regex: tiflash + action: keep + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + regex: "true" + action: keep + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + regex: (.+) + target_label: __metrics_path__ + action: replace + - source_labels: [__meta_kubernetes_pod_name, __meta_kubernetes_pod_label_app_kubernetes_io_instance, + __meta_kubernetes_pod_annotation_prometheus_io_port] + regex: (.+);(.+);(.+) + target_label: __address__ + replacement: $1.$2-tiflash-peer:$3 + action: replace + - source_labels: [__meta_kubernetes_namespace] + target_label: kubernetes_namespace + action: replace + - source_labels: [__meta_kubernetes_pod_name] + target_label: instance + action: replace + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] + target_label: cluster + action: replace + - source_labels: [__meta_kubernetes_pod_name] + target_label: instance + action: replace + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] + target_label: cluster + action: replace +- job_name: tiflash-proxy + honor_labels: true + scrape_interval: 15s + scheme: http + kubernetes_sd_configs: + - api_server: null + role: pod + namespaces: + names: + - ns1 + - ns2 + tls_config: + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] + regex: target + action: keep + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_component] + regex: tiflash + action: keep + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + regex: "true" + action: keep + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + regex: (.+) + target_label: __metrics_path__ + action: replace + - source_labels: [__meta_kubernetes_pod_name, __meta_kubernetes_pod_label_app_kubernetes_io_instance, + __meta_kubernetes_pod_annotation_tiflash_proxy_prometheus_io_port] + regex: (.+);(.+);(.+) + target_label: __address__ + replacement: $1.$2-tiflash-peer:$3 + action: replace + - source_labels: [__meta_kubernetes_namespace] + target_label: kubernetes_namespace + action: replace + - source_labels: [__meta_kubernetes_pod_name] + target_label: instance + action: replace + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] + target_label: cluster + action: replace + - source_labels: [__meta_kubernetes_pod_name] + target_label: instance + action: replace + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] + target_label: cluster + action: replace ` model := &MonitorConfigModel{ ReleaseTargetRegex: &target, @@ -354,6 +450,102 @@ scrape_configs: - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] target_label: cluster action: replace +- job_name: tiflash + honor_labels: true + scrape_interval: 15s + scheme: http + kubernetes_sd_configs: + - api_server: null + role: pod + namespaces: + names: + - ns1 + - ns2 + tls_config: + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] + regex: target + action: keep + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_component] + regex: tiflash + action: keep + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + regex: "true" + action: keep + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + regex: (.+) + target_label: __metrics_path__ + action: replace + - source_labels: [__meta_kubernetes_pod_name, __meta_kubernetes_pod_label_app_kubernetes_io_instance, + __meta_kubernetes_pod_annotation_prometheus_io_port] + regex: (.+);(.+);(.+) + target_label: __address__ + replacement: $1.$2-tiflash-peer:$3 + action: replace + - source_labels: [__meta_kubernetes_namespace] + target_label: kubernetes_namespace + action: replace + - source_labels: [__meta_kubernetes_pod_name] + target_label: instance + action: replace + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] + target_label: cluster + action: replace + - source_labels: [__meta_kubernetes_pod_name] + target_label: instance + action: replace + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] + target_label: cluster + action: replace +- job_name: tiflash-proxy + honor_labels: true + scrape_interval: 15s + scheme: http + kubernetes_sd_configs: + - api_server: null + role: pod + namespaces: + names: + - ns1 + - ns2 + tls_config: + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] + regex: target + action: keep + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_component] + regex: tiflash + action: keep + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + regex: "true" + action: keep + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + regex: (.+) + target_label: __metrics_path__ + action: replace + - source_labels: [__meta_kubernetes_pod_name, __meta_kubernetes_pod_label_app_kubernetes_io_instance, + __meta_kubernetes_pod_annotation_tiflash_proxy_prometheus_io_port] + regex: (.+);(.+);(.+) + target_label: __address__ + replacement: $1.$2-tiflash-peer:$3 + action: replace + - source_labels: [__meta_kubernetes_namespace] + target_label: kubernetes_namespace + action: replace + - source_labels: [__meta_kubernetes_pod_name] + target_label: instance + action: replace + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] + target_label: cluster + action: replace + - source_labels: [__meta_kubernetes_pod_name] + target_label: instance + action: replace + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] + target_label: cluster + action: replace ` model := &MonitorConfigModel{ ReleaseTargetRegex: &target,