From dc9152df0da16f1decdfe9410468fdbdb216417e Mon Sep 17 00:00:00 2001 From: Lan Date: Wed, 21 Aug 2024 15:17:33 +0800 Subject: [PATCH] Clean up stale resources when 'antctl check cluster' failed (#6597) Signed-off-by: Lan Luo --- pkg/antctl/raw/check/cluster/command.go | 4 ++-- pkg/antctl/raw/check/installation/command.go | 2 +- pkg/antctl/raw/check/installation/test_denyall.go | 5 +++++ pkg/antctl/raw/check/util.go | 13 ++++++++++--- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/pkg/antctl/raw/check/cluster/command.go b/pkg/antctl/raw/check/cluster/command.go index a08e5aa1ccb..e5a993ef1ba 100644 --- a/pkg/antctl/raw/check/cluster/command.go +++ b/pkg/antctl/raw/check/cluster/command.go @@ -101,6 +101,7 @@ func Run(o *options) error { } ctx := context.Background() testContext := NewTestContext(client, config, clusterName, o.testImage) + defer check.Teardown(ctx, testContext.client, testContext.clusterName, testContext.namespace) if err := testContext.setup(ctx); err != nil { return err } @@ -121,7 +122,6 @@ func Run(o *options) error { } } testContext.Log("Test finished: %v tests succeeded, %v tests failed, %v tests were uncertain", numSuccess, numFailure, numUncertain) - check.Teardown(ctx, testContext.client, testContext.clusterName, testContext.namespace) if numFailure > 0 { return fmt.Errorf("%v/%v tests failed", numFailure, len(testsRegistry)) } @@ -205,7 +205,7 @@ func (t *testContext) setup(ctx context.Context) error { } testPods, err := t.client.CoreV1().Pods(t.namespace).List(ctx, metav1.ListOptions{LabelSelector: "component=cluster-checker"}) if err != nil { - return fmt.Errorf("no pod found for test Deployment") + return fmt.Errorf("no Pod found for Deployment %s", deploymentName) } t.testPod = &testPods.Items[0] return nil diff --git a/pkg/antctl/raw/check/installation/command.go b/pkg/antctl/raw/check/installation/command.go index ef194994d56..1b9e43a28be 100644 --- a/pkg/antctl/raw/check/installation/command.go +++ b/pkg/antctl/raw/check/installation/command.go @@ -148,13 +148,13 @@ func Run(o *options) error { } ctx := context.Background() testContext := NewTestContext(client, config, clusterName, o.antreaNamespace, runFilterRegex, o.testImage) + defer check.Teardown(ctx, testContext.client, testContext.clusterName, testContext.namespace) if err := testContext.setup(ctx); err != nil { return err } stats := testContext.runTests(ctx) testContext.Log("Test finished: %v tests succeeded, %v tests failed, %v tests were skipped", stats.numSuccess, stats.numFailure, stats.numSkipped) - check.Teardown(ctx, testContext.client, testContext.clusterName, testContext.namespace) if stats.numFailure > 0 { return fmt.Errorf("%v/%v tests failed", stats.numFailure, stats.numTotal()) } diff --git a/pkg/antctl/raw/check/installation/test_denyall.go b/pkg/antctl/raw/check/installation/test_denyall.go index 251332bc949..c0f0dcecc4b 100644 --- a/pkg/antctl/raw/check/installation/test_denyall.go +++ b/pkg/antctl/raw/check/installation/test_denyall.go @@ -17,6 +17,7 @@ package installation import ( "context" "fmt" + "time" corev1 "k8s.io/api/core/v1" networkingv1 "k8s.io/api/networking/v1" @@ -27,6 +28,9 @@ type DenyAllConnectivityTest struct { networkPolicy *networkingv1.NetworkPolicy } +// Provide enough time for policies to be enforced by the CNI plugin. +const networkPolicyDelay = 2 * time.Second + func init() { RegisterTest("egress-deny-all-connectivity", &DenyAllConnectivityTest{networkPolicy: &networkingv1.NetworkPolicy{ ObjectMeta: metav1.ObjectMeta{ @@ -84,6 +88,7 @@ func (t *DenyAllConnectivityTest) Run(ctx context.Context, testContext *testCont testContext.Log("NetworkPolicy deletion was successful") return nil }() + time.Sleep(networkPolicyDelay) testContext.Log("NetworkPolicy applied successfully") for _, clientPod := range testContext.clientPods { for _, service := range services { diff --git a/pkg/antctl/raw/check/util.go b/pkg/antctl/raw/check/util.go index 1eb24ab87b9..d17d9b0530f 100644 --- a/pkg/antctl/raw/check/util.go +++ b/pkg/antctl/raw/check/util.go @@ -24,6 +24,7 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" @@ -216,12 +217,18 @@ func GenerateRandomNamespace(baseName string) string { func Teardown(ctx context.Context, client kubernetes.Interface, clusterName string, namespace string) { fmt.Fprintf(os.Stdout, fmt.Sprintf("[%s] ", clusterName)+"Deleting installation tests setup...\n") - client.CoreV1().Namespaces().Delete(ctx, namespace, metav1.DeleteOptions{}) + err := client.CoreV1().Namespaces().Delete(ctx, namespace, metav1.DeleteOptions{}) + if err != nil { + fmt.Fprintf(os.Stdout, "Namespace %s deletion failed: %v", namespace, err) + return + } fmt.Fprintf(os.Stdout, fmt.Sprintf("[%s] ", clusterName)+"Waiting for Namespace %s to be deleted\n", namespace) - err := wait.PollUntilContextTimeout(ctx, 2*time.Second, 1*time.Minute, true, func(ctx context.Context) (bool, error) { + err = wait.PollUntilContextTimeout(ctx, 2*time.Second, 1*time.Minute, true, func(ctx context.Context) (bool, error) { _, err := client.CoreV1().Namespaces().Get(ctx, namespace, metav1.GetOptions{}) if err != nil { - return true, nil + if errors.IsNotFound(err) { + return true, nil + } } return false, nil })