Skip to content

Commit

Permalink
blackbox: fix the issue of TLS handshake error in tls cluster (#1443)
Browse files Browse the repository at this point in the history
  • Loading branch information
jsvisa authored Jun 24, 2021
1 parent 5d1942a commit 48cd6ae
Show file tree
Hide file tree
Showing 9 changed files with 184 additions and 94 deletions.
38 changes: 0 additions & 38 deletions embed/templates/config/blackbox.yml

This file was deleted.

49 changes: 49 additions & 0 deletions embed/templates/config/blackbox.yml.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
modules:
http_2xx:
prober: http
http:
method: GET
http_post_2xx:
prober: http
http:
method: POST
tcp_connect:
prober: tcp
{{- if .TLSEnabled}}
tls_connect:
prober: tcp
tcp:
tls: true
tls_config:
insecure_skip_verify: false
ca_file: {{.DeployDir}}/tls/ca.crt
cert_file: {{.DeployDir}}/tls/blackbox_exporter.crt
key_file: {{.DeployDir}}/tls/blackbox_exporter.pem
{{- end}}
pop3s_banner:
prober: tcp
tcp:
query_response:
- expect: '^+OK'
tls: true
tls_config:
insecure_skip_verify: false
ssh_banner:
prober: tcp
tcp:
query_response:
- expect: '^SSH-2.0-'
irc_banner:
prober: tcp
tcp:
query_response:
- send: 'NICK prober'
- send: 'USER prober prober prober :prober'
- expect: 'PING :([^ ]+)'
send: 'PONG ${1}'
- expect: '^:[^ ]+ 001'
icmp:
prober: icmp
timeout: 5s
icmp:
preferred_ip_protocol: 'ip4'
8 changes: 8 additions & 0 deletions embed/templates/config/prometheus.yml.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,11 @@ scrape_configs:
scrape_interval: 30s
metrics_path: /probe
params:
{{- if .TLSEnabled}}
module: [tls_connect]
{{- else}}
module: [tcp_connect]
{{- end}}
static_configs:
{{- if .KafkaAddrs}}
- targets:
Expand Down Expand Up @@ -275,7 +279,11 @@ scrape_configs:
scrape_interval: 30s
metrics_path: /probe
params:
{{- if .TLSEnabled}}
module: [tls_connect]
{{- else}}
module: [tcp_connect]
{{- end}}
static_configs:
- targets:
{{- range .TiDBStatusAddrs}}
Expand Down
82 changes: 61 additions & 21 deletions pkg/cluster/manager/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,10 +212,16 @@ func buildScaleOutTask(
iterErr = err
return
}
tb = tb.TLSCert(inst, ca, meta.DirPaths{
Deploy: deployDir,
Cache: m.specManager.Path(name, spec.TempConfigPath),
})
tb = tb.TLSCert(
inst.GetHost(),
inst.ComponentName(),
inst.Role(),
inst.GetMainPort(),
ca,
meta.DirPaths{
Deploy: deployDir,
Cache: m.specManager.Path(name, spec.TempConfigPath),
})
}

t := tb.ScaleConfig(name,
Expand Down Expand Up @@ -282,16 +288,18 @@ func buildScaleOutTask(
}

// Deploy monitor relevant components to remote
dlTasks, dpTasks := buildMonitoredDeployTask(
m.bindVersion,
specManager,
dlTasks, dpTasks, err := buildMonitoredDeployTask(
m,
name,
uninitializedHosts,
topo.BaseTopo().GlobalOptions,
topo.BaseTopo().MonitoredOptions,
base.Version,
gOpt,
)
if err != nil {
return nil, err
}
downloadCompTasks = append(downloadCompTasks, convertStepDisplaysToTasks(dlTasks)...)
deployCompTasks = append(deployCompTasks, convertStepDisplaysToTasks(dpTasks)...)

Expand Down Expand Up @@ -358,23 +366,22 @@ func convertStepDisplaysToTasks(t []*task.StepDisplay) []task.Task {
}

func buildMonitoredDeployTask(
bindVersion spec.BindVersion,
specManager *spec.SpecManager,
m *Manager,
name string,
uniqueHosts map[string]hostInfo, // host -> ssh-port, os, arch
globalOptions *spec.GlobalOptions,
monitoredOptions *spec.MonitoredOptions,
version string,
gOpt operator.Options,
) (downloadCompTasks []*task.StepDisplay, deployCompTasks []*task.StepDisplay) {
) (downloadCompTasks []*task.StepDisplay, deployCompTasks []*task.StepDisplay, err error) {
if monitoredOptions == nil {
return
}

uniqueCompOSArch := set.NewStringSet()
// monitoring agents
for _, comp := range []string{spec.ComponentNodeExporter, spec.ComponentBlackboxExporter} {
version := bindVersion(comp, version)
version := m.bindVersion(comp, version)

for host, info := range uniqueHosts {
// populate unique comp-os-arch set
Expand All @@ -395,8 +402,21 @@ func buildMonitoredDeployTask(
}
// log dir will always be with values, but might not used by the component
logDir := spec.Abs(globalOptions.User, monitoredOptions.LogDir)

deployDirs := []string{
deployDir,
dataDir,
logDir,
filepath.Join(deployDir, "bin"),
filepath.Join(deployDir, "conf"),
filepath.Join(deployDir, "scripts"),
}
if globalOptions.TLSEnabled {
deployDirs = append(deployDirs, filepath.Join(deployDir, "tls"))
}

// Deploy component
t := task.NewBuilder().
tb := task.NewBuilder().
UserSSH(
host,
info.ssh,
Expand All @@ -406,11 +426,7 @@ func buildMonitoredDeployTask(
gOpt.SSHType,
globalOptions.SSHType,
).
Mkdir(globalOptions.User, host,
deployDir, dataDir, logDir,
filepath.Join(deployDir, "bin"),
filepath.Join(deployDir, "conf"),
filepath.Join(deployDir, "scripts")).
Mkdir(globalOptions.User, host, deployDirs...).
CopyComponent(
comp,
info.os,
Expand All @@ -427,15 +443,38 @@ func buildMonitoredDeployTask(
globalOptions.ResourceControl,
monitoredOptions,
globalOptions.User,
globalOptions.TLSEnabled,
meta.DirPaths{
Deploy: deployDir,
Data: []string{dataDir},
Log: logDir,
Cache: specManager.Path(name, spec.TempConfigPath),
Cache: m.specManager.Path(name, spec.TempConfigPath),
},
).
BuildAsStep(fmt.Sprintf(" - Copy %s -> %s", comp, host))
deployCompTasks = append(deployCompTasks, t)
)

if globalOptions.TLSEnabled && comp == spec.ComponentBlackboxExporter {
ca, innerr := crypto.ReadCA(
name,
m.specManager.Path(name, spec.TLSCertKeyDir, spec.TLSCACert),
m.specManager.Path(name, spec.TLSCertKeyDir, spec.TLSCAKey),
)
if innerr != nil {
err = innerr
return
}
tb = tb.TLSCert(
host,
spec.ComponentBlackboxExporter,
spec.ComponentBlackboxExporter,
monitoredOptions.BlackboxExporterPort,
ca,
meta.DirPaths{
Deploy: deployDir,
Cache: m.specManager.Path(name, spec.TempConfigPath),
})
}

deployCompTasks = append(deployCompTasks, tb.BuildAsStep(fmt.Sprintf(" - Copy %s -> %s", comp, host)))
}
}
return
Expand Down Expand Up @@ -485,6 +524,7 @@ func buildRefreshMonitoredConfigTasks(
globalOptions.ResourceControl,
monitoredOptions,
globalOptions.User,
globalOptions.TLSEnabled,
meta.DirPaths{
Deploy: deployDir,
Data: []string{dataDir},
Expand Down
22 changes: 15 additions & 7 deletions pkg/cluster/manager/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,10 +309,16 @@ func (m *Manager) Deploy(

// generate and transfer tls cert for instance
if globalOptions.TLSEnabled {
t = t.TLSCert(inst, ca, meta.DirPaths{
Deploy: deployDir,
Cache: m.specManager.Path(name, spec.TempConfigPath),
})
t = t.TLSCert(
inst.GetHost(),
inst.ComponentName(),
inst.Role(),
inst.GetMainPort(),
ca,
meta.DirPaths{
Deploy: deployDir,
Cache: m.specManager.Path(name, spec.TempConfigPath),
})
}

// generate configs for the component
Expand Down Expand Up @@ -341,16 +347,18 @@ func (m *Manager) Deploy(
}

// Deploy monitor relevant components to remote
dlTasks, dpTasks := buildMonitoredDeployTask(
m.bindVersion,
m.specManager,
dlTasks, dpTasks, err := buildMonitoredDeployTask(
m,
name,
uniqueHosts,
globalOptions,
topo.GetMonitoredOptions(),
clusterVersion,
gOpt,
)
if err != nil {
return err
}
downloadCompTasks = append(downloadCompTasks, dlTasks...)
deployCompTasks = append(deployCompTasks, dpTasks...)

Expand Down
10 changes: 7 additions & 3 deletions pkg/cluster/task/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,14 +225,15 @@ func (b *Builder) ScaleConfig(clusterName, clusterVersion string, specManager *s
}

// MonitoredConfig appends a CopyComponent task to the current task collection
func (b *Builder) MonitoredConfig(name, comp, host string, globResCtl meta.ResourceControl, options *spec.MonitoredOptions, deployUser string, paths meta.DirPaths) *Builder {
func (b *Builder) MonitoredConfig(name, comp, host string, globResCtl meta.ResourceControl, options *spec.MonitoredOptions, deployUser string, tlsEnabled bool, paths meta.DirPaths) *Builder {
b.tasks = append(b.tasks, &MonitoredConfig{
name: name,
component: comp,
host: host,
globResCtl: globResCtl,
options: options,
deployUser: deployUser,
tlsEnabled: tlsEnabled,
paths: paths,
})
return b
Expand Down Expand Up @@ -401,10 +402,13 @@ func (b *Builder) DeploySpark(inst spec.Instance, sparkVersion, srcPath, deployD
}

// TLSCert generates certificate for instance and transfers it to the server
func (b *Builder) TLSCert(inst spec.Instance, ca *crypto.CertificateAuthority, paths meta.DirPaths) *Builder {
func (b *Builder) TLSCert(host, comp, role string, port int, ca *crypto.CertificateAuthority, paths meta.DirPaths) *Builder {
b.tasks = append(b.tasks, &TLSCert{
host: host,
comp: comp,
role: role,
port: port,
ca: ca,
inst: inst,
paths: paths,
})
return b
Expand Down
17 changes: 8 additions & 9 deletions pkg/cluster/task/monitored_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ type MonitoredConfig struct {
globResCtl meta.ResourceControl
options *spec.MonitoredOptions
deployUser string
tlsEnabled bool
paths meta.DirPaths
}

Expand All @@ -66,19 +67,17 @@ func (m *MonitoredConfig) Execute(ctx context.Context) error {
var cfg template.ConfigGenerator
switch m.component {
case spec.ComponentNodeExporter:
if err := m.syncBlackboxConfig(ctx, exec, config.NewBlackboxConfig()); err != nil {
if err := m.syncBlackboxConfig(ctx, exec, config.NewBlackboxConfig(m.paths.Deploy, m.tlsEnabled)); err != nil {
return err
}
cfg = scripts.NewNodeExporterScript(
m.paths.Deploy,
m.paths.Log,
).WithPort(uint64(m.options.NodeExporterPort)).
cfg = scripts.
NewNodeExporterScript(m.paths.Deploy, m.paths.Log).
WithPort(uint64(m.options.NodeExporterPort)).
WithNumaNode(m.options.NumaNode)
case spec.ComponentBlackboxExporter:
cfg = scripts.NewBlackboxExporterScript(
m.paths.Deploy,
m.paths.Log,
).WithPort(uint64(m.options.BlackboxExporterPort))
cfg = scripts.
NewBlackboxExporterScript(m.paths.Deploy, m.paths.Log).
WithPort(uint64(m.options.BlackboxExporterPort))
default:
return fmt.Errorf("unknown monitored component %s", m.component)
}
Expand Down
Loading

0 comments on commit 48cd6ae

Please sign in to comment.