Skip to content

Commit

Permalink
fix EOF during TLS handshake caused by health check (#4381)
Browse files Browse the repository at this point in the history
Signed-off-by: zhangzujian <zhangzujian.7@gmail.com>
  • Loading branch information
zhangzujian committed Aug 12, 2024
1 parent 39980fb commit 87e7565
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 51 deletions.
10 changes: 6 additions & 4 deletions charts/kube-ovn/templates/controller-deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -168,19 +168,21 @@ spec:
readinessProbe:
exec:
command:
- /kube-ovn/kube-ovn-controller-healthcheck
- /kube-ovn/kube-ovn-healthcheck
- --port=10660
- --tls={{- .Values.func.SECURE_SERVING }}
periodSeconds: 3
timeoutSeconds: 45
timeoutSeconds: 1
livenessProbe:
exec:
command:
- /kube-ovn/kube-ovn-controller-healthcheck
- /kube-ovn/kube-ovn-healthcheck
- --port=10660
- --tls={{- .Values.func.SECURE_SERVING }}
initialDelaySeconds: 300
periodSeconds: 7
failureThreshold: 5
timeoutSeconds: 45
timeoutSeconds: 1
resources:
requests:
{{- if .Release.IsUpgrade }}
Expand Down
18 changes: 12 additions & 6 deletions charts/kube-ovn/templates/monitor-deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,17 +108,23 @@ spec:
initialDelaySeconds: 30
periodSeconds: 7
successThreshold: 1
tcpSocket:
port: 10661
timeoutSeconds: 3
exec:
command:
- /kube-ovn/kube-ovn-healthcheck
- --port=10661
- --tls={{- .Values.func.SECURE_SERVING }}
timeoutSeconds: 1
readinessProbe:
failureThreshold: 3
initialDelaySeconds: 30
periodSeconds: 7
successThreshold: 1
tcpSocket:
port: 10661
timeoutSeconds: 3
exec:
command:
- /kube-ovn/kube-ovn-healthcheck
- --port=10661
- --tls={{- .Values.func.SECURE_SERVING }}
timeoutSeconds: 1
nodeSelector:
kubernetes.io/os: "linux"
kube-ovn/role: "master"
Expand Down
18 changes: 12 additions & 6 deletions charts/kube-ovn/templates/ovncni-ds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -158,17 +158,23 @@ spec:
failureThreshold: 3
periodSeconds: 7
successThreshold: 1
tcpSocket:
port: 10665
timeoutSeconds: 3
exec:
command:
- /kube-ovn/kube-ovn-healthcheck
- --port=10665
- --tls={{- .Values.func.SECURE_SERVING }}
timeoutSeconds: 1
livenessProbe:
failureThreshold: 3
initialDelaySeconds: 30
periodSeconds: 7
successThreshold: 1
tcpSocket:
port: 10665
timeoutSeconds: 3
exec:
command:
- /kube-ovn/kube-ovn-healthcheck
- --port=10665
- --tls={{- .Values.func.SECURE_SERVING }}
timeoutSeconds: 1
resources:
requests:
{{- if .Release.IsUpgrade }}
Expand Down
22 changes: 11 additions & 11 deletions cmd/cmdmain.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ import (
"k8s.io/klog/v2"

"github.com/kubeovn/kube-ovn/cmd/controller"
"github.com/kubeovn/kube-ovn/cmd/controller_health_check"
"github.com/kubeovn/kube-ovn/cmd/daemon"
"github.com/kubeovn/kube-ovn/cmd/health_check"
"github.com/kubeovn/kube-ovn/cmd/ovn_ic_controller"
"github.com/kubeovn/kube-ovn/cmd/ovn_leader_checker"
"github.com/kubeovn/kube-ovn/cmd/ovn_monitor"
Expand All @@ -23,14 +23,14 @@ import (
)

const (
CmdController = "kube-ovn-controller"
CmdDaemon = "kube-ovn-daemon"
CmdMonitor = "kube-ovn-monitor"
CmdPinger = "kube-ovn-pinger"
CmdSpeaker = "kube-ovn-speaker"
CmdControllerHealthCheck = "kube-ovn-controller-healthcheck"
CmdOvnLeaderChecker = "kube-ovn-leader-checker"
CmdOvnICController = "kube-ovn-ic-controller"
CmdController = "kube-ovn-controller"
CmdDaemon = "kube-ovn-daemon"
CmdMonitor = "kube-ovn-monitor"
CmdPinger = "kube-ovn-pinger"
CmdSpeaker = "kube-ovn-speaker"
CmdHealthCheck = "kube-ovn-healthcheck"
CmdOvnLeaderChecker = "kube-ovn-leader-checker"
CmdOvnICController = "kube-ovn-ic-controller"
)

const timeFormat = "2006-01-02_15:04:05"
Expand Down Expand Up @@ -93,8 +93,8 @@ func main() {
case CmdSpeaker:
dumpProfile()
speaker.CmdMain()
case CmdControllerHealthCheck:
controller_health_check.CmdMain()
case CmdHealthCheck:
health_check.CmdMain()
case CmdOvnLeaderChecker:
ovn_leader_checker.CmdMain()
case CmdOvnICController:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
package controller_health_check
package health_check

import (
"flag"
"net"
"os"
"time"

Expand All @@ -12,7 +13,8 @@ import (
)

func CmdMain() {
tls := pflag.Bool("tls", false, "Whether kube-ovn-controller uses TLS")
port := pflag.Int32("port", 0, "Target port")
tls := pflag.Bool("tls", false, "Dial the server with TLS")

klogFlags := flag.NewFlagSet("klog", flag.ExitOnError)
klog.InitFlags(klogFlags)
Expand All @@ -32,18 +34,24 @@ func CmdMain() {
pflag.CommandLine.AddGoFlagSet(flag.CommandLine)
pflag.Parse()

addr := "127.0.0.1:10660"
if os.Getenv("ENABLE_BIND_LOCAL_IP") == "true" {
addr = util.JoinHostPort(os.Getenv("POD_IP"), 10660)
if *port <= 0 {
klog.Errorf("invalid port: %d", port)
os.Exit(1)
}

ip := os.Getenv("POD_IP")
if net.ParseIP(ip) == nil {
klog.Errorf("invalid ip: %q", ip)
os.Exit(1)
}

addr := util.JoinHostPort(ip, *port)
if *tls {
addr = "tls://" + addr
} else {
addr = "tcp://" + addr
}

if err := util.DialTCP(addr, time.Second, false); err != nil {
if err := util.DialTCP(addr, 100*time.Millisecond, false); err != nil {
util.LogFatalAndExit(err, "failed to probe the socket")
}
}
2 changes: 1 addition & 1 deletion dist/images/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,6 @@ RUN ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-controller && \
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-monitor && \
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-pinger && \
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-speaker && \
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-controller-healthcheck && \
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-healthcheck && \
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-leader-checker && \
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-ic-controller
48 changes: 32 additions & 16 deletions dist/images/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4155,17 +4155,21 @@ spec:
readinessProbe:
exec:
command:
- /kube-ovn/kube-ovn-controller-healthcheck
- /kube-ovn/kube-ovn-healthcheck
- --port=10660
- --tls=${SECURE_SERVING}
periodSeconds: 3
timeoutSeconds: 45
timeoutSeconds: 1
livenessProbe:
exec:
command:
- /kube-ovn/kube-ovn-controller-healthcheck
- /kube-ovn/kube-ovn-healthcheck
- --port=10660
- --tls=${SECURE_SERVING}
initialDelaySeconds: 300
periodSeconds: 7
failureThreshold: 5
timeoutSeconds: 45
timeoutSeconds: 1
resources:
requests:
cpu: 200m
Expand Down Expand Up @@ -4337,16 +4341,22 @@ spec:
initialDelaySeconds: 30
periodSeconds: 7
successThreshold: 1
tcpSocket:
port: 10665
timeoutSeconds: 3
exec:
command:
- /kube-ovn/kube-ovn-healthcheck
- --port=10665
- --tls=${SECURE_SERVING}
timeoutSeconds: 1
readinessProbe:
failureThreshold: 3
periodSeconds: 7
successThreshold: 1
tcpSocket:
port: 10665
timeoutSeconds: 3
exec:
command:
- /kube-ovn/kube-ovn-healthcheck
- --port=10665
- --tls=${SECURE_SERVING}
timeoutSeconds: 1
resources:
requests:
cpu: 100m
Expand Down Expand Up @@ -4628,17 +4638,23 @@ spec:
initialDelaySeconds: 30
periodSeconds: 7
successThreshold: 1
tcpSocket:
port: 10661
timeoutSeconds: 3
exec:
command:
- /kube-ovn/kube-ovn-healthcheck
- --port=10661
- --tls=${SECURE_SERVING}
timeoutSeconds: 1
readinessProbe:
failureThreshold: 3
initialDelaySeconds: 30
periodSeconds: 7
successThreshold: 1
tcpSocket:
port: 10661
timeoutSeconds: 3
exec:
command:
- /kube-ovn/kube-ovn-healthcheck
- --port=10661
- --tls=${SECURE_SERVING}
timeoutSeconds: 1
nodeSelector:
kubernetes.io/os: "linux"
kube-ovn/role: "master"
Expand Down

0 comments on commit 87e7565

Please sign in to comment.