Skip to content

Commit

Permalink
Merge pull request #386 from alauda/metrics/external-dns
Browse files Browse the repository at this point in the history
pinger: add metrics to resolve external address
  • Loading branch information
oilbeater authored Jun 23, 2020
2 parents 150d2f4 + 61aa3ba commit ac1e75c
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 34 deletions.
2 changes: 1 addition & 1 deletion dist/images/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1138,7 +1138,7 @@ spec:
containers:
- name: pinger
image: "$REGISTRY/kube-ovn:$VERSION"
command: ["/kube-ovn/kube-ovn-pinger", "--external-address=114.114.114.114"]
command: ["/kube-ovn/kube-ovn-pinger", "--external-address=114.114.114.114", "--external-dns=alauda.cn"]
imagePullPolicy: $IMAGE_PULL_POLICY
securityContext:
runAsUser: 0
Expand Down
6 changes: 3 additions & 3 deletions dist/monitoring/pinger-grafana.json
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@
"tableColumn": "",
"targets": [
{
"expr": "sum(pinger_dns_healthy)",
"expr": "sum(pinger_internal_dns_healthy)",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
Expand Down Expand Up @@ -540,7 +540,7 @@
"tableColumn": "",
"targets": [
{
"expr": "sum(pinger_dns_unhealthy)",
"expr": "sum(pinger_internal_dns_unhealthy)",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
Expand Down Expand Up @@ -855,7 +855,7 @@
"steppedLine": false,
"targets": [
{
"expr": "(sum(delta(pinger_dns_latency_ms_sum[5m])) by (nodeName))/(sum(delta(pinger_dns_latency_ms_count[5m])) by (nodeName))",
"expr": "(sum(delta(pinger_internal_dns_latency_ms_sum[5m])) by (nodeName))/(sum(delta(pinger_internal_dns_latency_ms_count[5m])) by (nodeName))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{nodeName}}",
Expand Down
11 changes: 7 additions & 4 deletions pkg/pinger/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ type Configuration struct {
DaemonSetName string
Interval int
Mode string
DNS string
InternalDNS string
ExternalDNS string
NodeName string
HostIP string
PodName string
Expand All @@ -36,8 +37,9 @@ func ParseFlags() (*Configuration, error) {
argDaemonSetName = pflag.String("ds-name", "kube-ovn-pinger", "kube-ovn-pinger daemonset name")
argInterval = pflag.Int("interval", 5, "interval seconds between consecutive pings")
argMode = pflag.String("mode", "server", "server or job Mode")
argDns = pflag.String("dns", "kubernetes.default", "check dns from pod")
argExternalAddress = pflag.String("external-address", "", "check ping connection to an external address, default empty that will disable external check")
argInternalDns = pflag.String("internal-dns", "kubernetes.default", "check dns from pod")
argExternalDns = pflag.String("external-dns", "alauda.cn", "check external dns resolve from pod")
argExternalAddress = pflag.String("external-address", "114.114.114.114", "check ping connection to an external address, default empty that will disable external check")
argNetworkMode = pflag.String("network-mode", "kube-ovn", "The cni plugin current cluster used, default: kube-ovn")
)

Expand Down Expand Up @@ -67,7 +69,8 @@ func ParseFlags() (*Configuration, error) {
DaemonSetName: *argDaemonSetName,
Interval: *argInterval,
Mode: *argMode,
DNS: *argDns,
InternalDNS: *argInternalDns,
ExternalDNS: *argExternalDns,
PodIP: os.Getenv("POD_IP"),
HostIP: os.Getenv("HOST_IP"),
NodeName: os.Getenv("NODE_NAME"),
Expand Down
71 changes: 55 additions & 16 deletions pkg/pinger/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,25 +68,50 @@ var (
[]string{
"nodeName",
})
dnsHealthyGauge = prometheus.NewGaugeVec(
internalDnsHealthyGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pinger_dns_healthy",
Name: "pinger_internal_dns_healthy",
Help: "if the dns request is healthy on this node",
},
[]string{
"nodeName",
})
dnsUnhealthyGauge = prometheus.NewGaugeVec(
internalDnsUnhealthyGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pinger_dns_unhealthy",
Name: "pinger_internal_dns_unhealthy",
Help: "if the dns request is unhealthy on this node",
},
[]string{
"nodeName",
})
dnsRequestLatencyHistogram = prometheus.NewHistogramVec(
internalDnsRequestLatencyHistogram = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "pinger_dns_latency_ms",
Name: "pinger_internal_dns_latency_ms",
Help: "the latency ms histogram the node request dns",
Buckets: []float64{2, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50},
},
[]string{
"nodeName",
})
externalDnsHealthyGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pinger_external_dns_healthy",
Help: "if the dns request is healthy on this node",
},
[]string{
"nodeName",
})
externalDnsUnhealthyGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pinger_external_dns_unhealthy",
Help: "if the dns request is unhealthy on this node",
},
[]string{
"nodeName",
})
externalDnsRequestLatencyHistogram = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "pinger_external_dns_latency_ms",
Help: "the latency ms histogram the node request dns",
Buckets: []float64{2, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50},
},
Expand Down Expand Up @@ -176,9 +201,12 @@ func init() {
prometheus.MustRegister(apiserverHealthyGauge)
prometheus.MustRegister(apiserverUnhealthyGauge)
prometheus.MustRegister(apiserverRequestLatencyHistogram)
prometheus.MustRegister(dnsHealthyGauge)
prometheus.MustRegister(dnsUnhealthyGauge)
prometheus.MustRegister(dnsRequestLatencyHistogram)
prometheus.MustRegister(internalDnsHealthyGauge)
prometheus.MustRegister(internalDnsUnhealthyGauge)
prometheus.MustRegister(internalDnsRequestLatencyHistogram)
prometheus.MustRegister(externalDnsHealthyGauge)
prometheus.MustRegister(externalDnsUnhealthyGauge)
prometheus.MustRegister(externalDnsRequestLatencyHistogram)
prometheus.MustRegister(podPingLatencyHistogram)
prometheus.MustRegister(podPingLostCounter)
prometheus.MustRegister(nodePingLatencyHistogram)
Expand Down Expand Up @@ -218,15 +246,26 @@ func SetApiserverUnhealthyMetrics(nodeName string) {
apiserverUnhealthyGauge.WithLabelValues(nodeName).Set(1)
}

func SetDnsHealthyMetrics(nodeName string, latency float64) {
dnsHealthyGauge.WithLabelValues(nodeName).Set(1)
dnsRequestLatencyHistogram.WithLabelValues(nodeName).Observe(latency)
dnsUnhealthyGauge.WithLabelValues(nodeName).Set(0)
func SetInternalDnsHealthyMetrics(nodeName string, latency float64) {
internalDnsHealthyGauge.WithLabelValues(nodeName).Set(1)
internalDnsRequestLatencyHistogram.WithLabelValues(nodeName).Observe(latency)
internalDnsUnhealthyGauge.WithLabelValues(nodeName).Set(0)
}

func SetInternalDnsUnhealthyMetrics(nodeName string) {
internalDnsHealthyGauge.WithLabelValues(nodeName).Set(0)
internalDnsUnhealthyGauge.WithLabelValues(nodeName).Set(1)
}

func SetExternalDnsHealthyMetrics(nodeName string, latency float64) {
externalDnsHealthyGauge.WithLabelValues(nodeName).Set(1)
externalDnsRequestLatencyHistogram.WithLabelValues(nodeName).Observe(latency)
externalDnsUnhealthyGauge.WithLabelValues(nodeName).Set(0)
}

func SetDnsUnhealthyMetrics(nodeName string) {
dnsHealthyGauge.WithLabelValues(nodeName).Set(0)
dnsUnhealthyGauge.WithLabelValues(nodeName).Set(1)
func SetExternalDnsUnhealthyMetrics(nodeName string) {
externalDnsHealthyGauge.WithLabelValues(nodeName).Set(0)
externalDnsUnhealthyGauge.WithLabelValues(nodeName).Set(1)
}

func SetPodPingMetrics(srcNodeName, srcNodeIP, srcPodIP, targetNodeName, targetNodeIP, targetPodIP string, latency float64, lost int) {
Expand Down
40 changes: 32 additions & 8 deletions pkg/pinger/ping.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,15 @@ func ping(config *Configuration) {
checkApiServer(config)
pingNodes(config)
pingPods(config)
nslookup(config)
pingExternal(config)
internalNslookup(config)

if config.ExternalDNS != "" {
externalNslookup(config)
}

if config.ExternalAddress != "" {
pingExternal(config)
}
}

func pingNodes(config *Configuration) {
Expand Down Expand Up @@ -146,21 +153,38 @@ func pingExternal(config *Configuration) {
int(math.Abs(float64(stats.PacketsSent-stats.PacketsRecv))))
}

func nslookup(config *Configuration) {
func internalNslookup(config *Configuration) {
klog.Infof("start to check dns connectivity")
t1 := time.Now()
ctx, cancel := context.WithTimeout(context.TODO(), 10*time.Second)
defer cancel()
var r net.Resolver
addrs, err := r.LookupHost(ctx, config.InternalDNS)
elpased := time.Since(t1)
if err != nil {
klog.Errorf("failed to resolve dns %s, %v", config.InternalDNS, err)
SetInternalDnsUnhealthyMetrics(config.NodeName)
return
}
SetInternalDnsHealthyMetrics(config.NodeName, float64(elpased)/float64(time.Millisecond))
klog.Infof("resolve dns %s to %v in %.2fms", config.InternalDNS, addrs, float64(elpased)/float64(time.Millisecond))
}

func externalNslookup(config *Configuration) {
klog.Infof("start to check dns connectivity")
t1 := time.Now()
ctx, cancel := context.WithTimeout(context.TODO(), 10*time.Second)
defer cancel()
var r net.Resolver
addrs, err := r.LookupHost(ctx, config.DNS)
addrs, err := r.LookupHost(ctx, config.ExternalDNS)
elpased := time.Since(t1)
if err != nil {
klog.Errorf("failed to resolve dns %s, %v", config.DNS, err)
SetDnsUnhealthyMetrics(config.NodeName)
klog.Errorf("failed to resolve dns %s, %v", config.ExternalDNS, err)
SetExternalDnsUnhealthyMetrics(config.NodeName)
return
}
SetDnsHealthyMetrics(config.NodeName, float64(elpased)/float64(time.Millisecond))
klog.Infof("resolve dns %s to %v in %.2fms", config.DNS, addrs, float64(elpased)/float64(time.Millisecond))
SetExternalDnsHealthyMetrics(config.NodeName, float64(elpased)/float64(time.Millisecond))
klog.Infof("resolve dns %s to %v in %.2fms", config.ExternalDNS, addrs, float64(elpased)/float64(time.Millisecond))
}

func checkApiServer(config *Configuration) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/util/network_attachment.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func parsePodNetworkObjectName(podnetwork string) (string, string, string, error
}

// Check and see if each item matches the specification for valid attachment name.
// "Valid attachment names must be comprised of units of the DNS-1123 label format"
// "Valid attachment names must be comprised of units of the InternalDNS-1123 label format"
// [a-z0-9]([-a-z0-9]*[a-z0-9])?
// And we allow at (@), and forward slash (/) (units separated by commas)
// It must start and end alphanumerically.
Expand Down
2 changes: 1 addition & 1 deletion yamls/kube-ovn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ spec:
containers:
- name: pinger
image: "kubeovn/kube-ovn:v1.2.0"
command: ["/kube-ovn/kube-ovn-pinger", "--external-address=114.114.114.114"]
command: ["/kube-ovn/kube-ovn-pinger", "--external-address=114.114.114.114", "--external-dns=alauda.cn"]
imagePullPolicy: IfNotPresent
securityContext:
runAsUser: 0
Expand Down

0 comments on commit ac1e75c

Please sign in to comment.