Skip to content

Commit

Permalink
fix: find master node IPs correctly in health checks
Browse files Browse the repository at this point in the history
Health checks verify node list in Kubernetes to match expectations, but
initial set of nodes for server-side health checks was driven by
`MasterIPs` functions which returns list of master endpoints which is
not exactly same as master nodes: endpoints also include some
healthchecks.

Signed-off-by: Andrey Smirnov <smirnov.andrey@gmail.com>
  • Loading branch information
smira authored and talos-bot committed Feb 16, 2021
1 parent 6791036 commit 8d7a36c
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,11 @@ func (cluster *clusterState) resolve(ctx context.Context, k8sProvider *cluster.K
return err
}

if cluster.controlPlaneNodes, err = k8sProvider.KubeHelper.MasterIPs(ctx); err != nil {
if cluster.controlPlaneNodes, err = k8sProvider.KubeHelper.NodeIPs(ctx, machine.TypeControlPlane); err != nil {
return err
}

if cluster.workerNodes, err = k8sProvider.KubeHelper.WorkerIPs(ctx); err != nil {
if cluster.workerNodes, err = k8sProvider.KubeHelper.NodeIPs(ctx, machine.TypeJoin); err != nil {
return err
}
}
Expand Down
21 changes: 18 additions & 3 deletions pkg/kubernetes/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
restclient "k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"

"github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/machine"
"github.com/talos-systems/talos/pkg/machinery/constants"
)

Expand Down Expand Up @@ -142,8 +143,8 @@ func (h *Client) MasterIPs(ctx context.Context) (addrs []string, err error) {
return addrs, nil
}

// WorkerIPs returns list of worker nodes IP addresses.
func (h *Client) WorkerIPs(ctx context.Context) (addrs []string, err error) {
// NodeIPs returns list of node IP addresses by machine type.
func (h *Client) NodeIPs(ctx context.Context, machineType machine.Type) (addrs []string, err error) {
resp, err := h.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
if err != nil {
return nil, err
Expand All @@ -152,13 +153,27 @@ func (h *Client) WorkerIPs(ctx context.Context) (addrs []string, err error) {
addrs = []string{}

for _, node := range resp.Items {
if _, ok := node.Labels[constants.LabelNodeRoleMaster]; ok {
_, labelMaster := node.Labels[constants.LabelNodeRoleMaster]
_, labelControlPlane := node.Labels[constants.LabelNodeRoleControlPlane]

skip := true

switch machineType { //nolint: exhaustive
case machine.TypeInit, machine.TypeControlPlane:
skip = !(labelMaster || labelControlPlane)
case machine.TypeJoin:
skip = labelMaster || labelControlPlane
}

if skip {
continue
}

for _, nodeAddress := range node.Status.Addresses {
if nodeAddress.Type == corev1.NodeInternalIP {
addrs = append(addrs, nodeAddress.Address)

break
}
}
}
Expand Down

0 comments on commit 8d7a36c

Please sign in to comment.