Skip to content

Commit

Permalink
fix: talosctl health should not check kube-proxy when it is disabled
Browse files Browse the repository at this point in the history
Fixes #3299.

Signed-off-by: Alexey Palazhchenko <alexey.palazhchenko@gmail.com>
  • Loading branch information
AlekSi authored and talos-bot committed Mar 16, 2021
1 parent 0dbaeb9 commit 7662d03
Show file tree
Hide file tree
Showing 6 changed files with 157 additions and 6 deletions.
28 changes: 28 additions & 0 deletions pkg/cluster/check/default.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ func DefaultClusterChecks() []ClusterCheck {
return ServiceHealthAssertion(ctx, cluster, "etcd", WithNodeTypes(machine.TypeInit, machine.TypeControlPlane))
}, 5*time.Minute, 5*time.Second)
},

// wait for bootkube to finish on init node
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("bootkube to finish", func(ctx context.Context) error {
Expand All @@ -37,54 +38,81 @@ func DefaultClusterChecks() []ClusterCheck {
return nil
}, 5*time.Minute, 5*time.Second)
},

// wait for apid to be ready on all the nodes
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("apid to be ready", func(ctx context.Context) error {
return ApidReadyAssertion(ctx, cluster)
}, 5*time.Minute, 5*time.Second)
},

// wait for kubelet to be healthy on all
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("kubelet to be healthy", func(ctx context.Context) error {
return ServiceHealthAssertion(ctx, cluster, "kubelet", WithNodeTypes(machine.TypeInit, machine.TypeControlPlane))
}, 5*time.Minute, 5*time.Second)
},

// wait for all nodes to finish booting
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all nodes to finish boot sequence", func(ctx context.Context) error {
return AllNodesBootedAssertion(ctx, cluster)
}, 5*time.Minute, 5*time.Second)
},

// wait for all the nodes to report in at k8s level
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all k8s nodes to report", func(ctx context.Context) error {
return K8sAllNodesReportedAssertion(ctx, cluster)
}, 5*time.Minute, 30*time.Second) // give more time per each attempt, as this check is going to build and cache kubeconfig
},

// wait for all the nodes to report ready at k8s level
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all k8s nodes to report ready", func(ctx context.Context) error {
return K8sAllNodesReadyAssertion(ctx, cluster)
}, 10*time.Minute, 5*time.Second)
},

// wait for HA k8s control plane
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all control plane components to be ready", func(ctx context.Context) error {
return K8sFullControlPlaneAssertion(ctx, cluster)
}, 5*time.Minute, 5*time.Second)
},

// wait for kube-proxy to report ready
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("kube-proxy to report ready", func(ctx context.Context) error {
present, err := DaemonSetPresent(ctx, cluster, "kube-system", "k8s-app=kube-proxy")
if err != nil {
return err
}

if !present {
return conditions.ErrSkipAssertion
}

return K8sPodReadyAssertion(ctx, cluster, "kube-system", "k8s-app=kube-proxy")
}, 3*time.Minute, 5*time.Second)
},

// wait for coredns to report ready
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("coredns to report ready", func(ctx context.Context) error {
present, err := ReplicaSetPresent(ctx, cluster, "kube-system", "k8s-app=kube-dns")
if err != nil {
return err
}

if !present {
return conditions.ErrSkipAssertion
}

return K8sPodReadyAssertion(ctx, cluster, "kube-system", "k8s-app=kube-dns")
}, 3*time.Minute, 5*time.Second)
},

// wait for all the nodes to be schedulable
func(cluster ClusterInfo) conditions.Condition {
return conditions.PollingCondition("all k8s nodes to report schedulable", func(ctx context.Context) error {
Expand Down
38 changes: 36 additions & 2 deletions pkg/cluster/check/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ func K8sAllNodesSchedulableAssertion(ctx context.Context, cluster cluster.K8sPro
return fmt.Errorf("some nodes are not schedulable: %v", notSchedulableNodes)
}

// K8sPodReadyAssertion checks whether all the pods are Ready.
// K8sPodReadyAssertion checks whether all the pods matching label selector are Ready, and there is at least one.
func K8sPodReadyAssertion(ctx context.Context, cluster cluster.K8sProvider, namespace, labelSelector string) error {
clientset, err := cluster.K8sClient(ctx)
if err != nil {
Expand All @@ -244,7 +244,7 @@ func K8sPodReadyAssertion(ctx context.Context, cluster cluster.K8sProvider, name
}

if len(pods.Items) == 0 {
return fmt.Errorf("no pods found for namespace %q and label %q", namespace, labelSelector)
return fmt.Errorf("no pods found for namespace %q and label selector %q", namespace, labelSelector)
}

var notReadyPods []string
Expand Down Expand Up @@ -273,3 +273,37 @@ func K8sPodReadyAssertion(ctx context.Context, cluster cluster.K8sProvider, name

return fmt.Errorf("some pods are not ready: %v", notReadyPods)
}

// DaemonSetPresent returns true if there is at least one DaemonSet matching given label selector.
func DaemonSetPresent(ctx context.Context, cluster cluster.K8sProvider, namespace, labelSelector string) (bool, error) {
clientset, err := cluster.K8sClient(ctx)
if err != nil {
return false, err
}

dss, err := clientset.AppsV1().DaemonSets(namespace).List(ctx, metav1.ListOptions{
LabelSelector: labelSelector,
})
if err != nil {
return false, err
}

return len(dss.Items) > 0, nil
}

// ReplicaSetPresent returns true if there is at least one ReplicaSet matching given label selector.
func ReplicaSetPresent(ctx context.Context, cluster cluster.K8sProvider, namespace, labelSelector string) (bool, error) {
clientset, err := cluster.K8sClient(ctx)
if err != nil {
return false, err
}

rss, err := clientset.AppsV1().ReplicaSets(namespace).List(ctx, metav1.ListOptions{
LabelSelector: labelSelector,
})
if err != nil {
return false, err
}

return len(rss.Items) > 0, nil
}
7 changes: 5 additions & 2 deletions pkg/cluster/check/reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,11 @@ func (wr *writerReporter) Update(condition conditions.Condition) {
coloredLine = color.YellowString("%s", line)
wr.lastLineTemporary = true
showSpinner = true
case strings.HasSuffix(line, "OK"):
coloredLine = line
case strings.HasSuffix(line, conditions.OK):
coloredLine = color.GreenString("%s", line)
wr.lastLineTemporary = false
case strings.HasSuffix(line, conditions.ErrSkipAssertion.Error()):
coloredLine = color.BlueString("%s", line)
wr.lastLineTemporary = false
default:
line = fmt.Sprintf("%s %s", spinner[wr.spinnerIdx], line)
Expand Down
3 changes: 3 additions & 0 deletions pkg/conditions/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ import (
"fmt"
)

// OK is returned by the String method of the passed Condition.
const OK = "OK"

// Condition is a object which Wait()s for some condition to become true.
//
// Condition can describe itself via String() method.
Expand Down
11 changes: 9 additions & 2 deletions pkg/conditions/poll.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,18 @@ package conditions

import (
"context"
"errors"
"fmt"
"sync"
"time"
)

// ErrSkipAssertion is used as a return value from AssertionFunc to indicate that this assertion
// (and, by extension, condition, and check) is to be skipped.
// It is not returned as an error by any Condition's Wait method
// but recorded as description and returned by String method.
var ErrSkipAssertion = errors.New("SKIP")

// AssertionFunc is called every poll interval until it returns nil.
type AssertionFunc func(ctx context.Context) error

Expand All @@ -32,7 +39,7 @@ func (p *pollingCondition) String() string {
if p.lastErr != nil {
lastErr = p.lastErr.Error()
} else {
lastErr = "OK"
lastErr = OK
}
}
p.lastErrMu.Unlock()
Expand Down Expand Up @@ -62,7 +69,7 @@ func (p *pollingCondition) Wait(ctx context.Context) error {
return err
}()

if err == nil {
if err == nil || err == ErrSkipAssertion {
return nil
}

Expand Down
76 changes: 76 additions & 0 deletions pkg/conditions/poll_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

package conditions_test

import (
"context"
"errors"
"testing"
"time"

"github.com/stretchr/testify/assert"

"github.com/talos-systems/talos/pkg/conditions"
)

func TestPollingCondition(t *testing.T) {
t.Parallel()

t.Run("OK", func(t *testing.T) {
t.Parallel()

var calls int
cond := conditions.PollingCondition("Test condition", func(ctx context.Context) error {
calls++

if calls < 2 {
return errors.New("failed")
}

return nil
}, time.Second, time.Millisecond)

err := cond.Wait(context.Background())
assert.NoError(t, err)
assert.Equal(t, "Test condition: OK", cond.String())
assert.Equal(t, 2, calls)
})

t.Run("Skip", func(t *testing.T) {
t.Parallel()

var calls int
cond := conditions.PollingCondition("Test condition", func(ctx context.Context) error {
calls++

if calls < 2 {
return errors.New("failed")
}

return conditions.ErrSkipAssertion
}, time.Second, time.Millisecond)

err := cond.Wait(context.Background())
assert.NoError(t, err)
assert.Equal(t, "Test condition: SKIP", cond.String())
assert.Equal(t, 2, calls)
})

t.Run("Fatal", func(t *testing.T) {
t.Parallel()

var calls int
cond := conditions.PollingCondition("Test condition", func(ctx context.Context) error {
calls++

return errors.New("failed")
}, time.Second, 750*time.Millisecond)

err := cond.Wait(context.Background())
assert.Equal(t, context.DeadlineExceeded, err)
assert.Equal(t, "Test condition: failed", cond.String())
assert.Equal(t, 2, calls)
})
}

0 comments on commit 7662d03

Please sign in to comment.