From 6772673c4c7981a953d53cb080ae37ac5126efff Mon Sep 17 00:00:00 2001 From: Angelos Kolaitis Date: Mon, 10 Jun 2024 11:06:08 +0300 Subject: [PATCH] Add CI to handle auto test and promote to strict and moonray branches (#476) * Add CI to handle auto test and promote to strict and moonray branches * also apply patch when running tests * fix missing namespace issues with calico * Disable more tests until features are implemented * Add --timeout flags to status check commands * do not fail fast on failing informing tests * skip cleanup test (not yet implemented) * relax default timeout * refactor check network ready * improve wording of x-wait-for failure commands * retry on failures of checking DNS and network * never return false without error * disable test_network too * avoid returning false, nil on CheckNetwork cilium * fixup cilium messages --- ...ration.yaml => integration-informing.yaml} | 29 ++++++--- .github/workflows/strict.yaml | 58 ----------------- .github/workflows/update-branches.yaml | 51 +++++++++++++++ build-scripts/patches/moonray/apply | 11 ++++ src/k8s/cmd/k8s/k8s_x_wait_for.go | 40 +++++++++--- src/k8s/pkg/client/helm/client.go | 1 + src/k8s/pkg/k8sd/features/calico/status.go | 62 ++++++++++++------- src/k8s/pkg/k8sd/features/cilium/status.go | 2 +- src/k8s/pkg/k8sd/features/coredns/status.go | 3 + 9 files changed, 154 insertions(+), 103 deletions(-) rename .github/workflows/{strict-integration.yaml => integration-informing.yaml} (69%) delete mode 100644 .github/workflows/strict.yaml create mode 100644 .github/workflows/update-branches.yaml diff --git a/.github/workflows/strict-integration.yaml b/.github/workflows/integration-informing.yaml similarity index 69% rename from .github/workflows/strict-integration.yaml rename to .github/workflows/integration-informing.yaml index 9371d0eff..6197a3911 100644 --- a/.github/workflows/strict-integration.yaml +++ b/.github/workflows/integration-informing.yaml @@ -1,4 +1,4 @@ -name: Integration Tests (Strict) +name: Informing Integration Tests on: push: @@ -13,8 +13,12 @@ permissions: jobs: build: - name: Build + name: Build ${{ matrix.patch }} runs-on: ubuntu-20.04 + strategy: + matrix: + patch: ["strict", "moonray"] + fail-fast: false steps: - name: Harden Runner uses: step-security/harden-runner@v2 @@ -30,25 +34,27 @@ jobs: - name: Install snapcraft run: | sudo snap install snapcraft --classic - - name: Apply strict patch + - name: Apply ${{ matrix.patch }} patch run: | - ./build-scripts/patches/strict/apply + ./build-scripts/patches/${{ matrix.patch }}/apply - name: Build snap run: | sg lxd -c 'snapcraft --use-lxd' - mv k8s_*.snap k8s-strict.snap + mv k8s_*.snap k8s-${{ matrix.patch }}.snap - name: Uploading snap uses: actions/upload-artifact@v4 with: - name: k8s-strict.snap - path: k8s-strict.snap + name: k8s-${{ matrix.patch }}.snap + path: k8s-${{ matrix.patch }}.snap test-integration: needs: [ build ] - name: Test ${{ matrix.os }} + name: Test ${{ matrix.patch }} ${{ matrix.os }} strategy: matrix: os: ["ubuntu:20.04"] + patch: ["strict", "moonray"] + fail-fast: false runs-on: ubuntu-20.04 steps: - name: Check out code @@ -68,11 +74,14 @@ jobs: - name: Download snap uses: actions/download-artifact@v4 with: - name: k8s-strict.snap + name: k8s-${{ matrix.patch }}.snap path: build + - name: Apply ${{ matrix.patch }} patch + run: | + ./build-scripts/patches/${{ matrix.patch }}/apply - name: Run end to end tests run: | - export TEST_SNAP="$PWD/build/k8s-strict.snap" + export TEST_SNAP="$PWD/build/k8s-${{ matrix.patch }}.snap" export TEST_SUBSTRATE=lxd export TEST_LXD_IMAGE=${{ matrix.os }} cd tests/integration && sg lxd -c 'tox -e integration' diff --git a/.github/workflows/strict.yaml b/.github/workflows/strict.yaml deleted file mode 100644 index cf45dae93..000000000 --- a/.github/workflows/strict.yaml +++ /dev/null @@ -1,58 +0,0 @@ -name: Auto-update strict branch - -on: - push: - branches: - - main - - 'release-[0-9]+.[0-9]+' - -permissions: - contents: read - -jobs: - prepare: - name: Prepare - runs-on: ubuntu-latest - outputs: - strict: ${{ steps.determine.outputs.strict }} - steps: - - name: Harden Runner - uses: step-security/harden-runner@v2 - with: - egress-policy: audit - - name: Determine Strict branch - id: determine - env: - BRANCH: ${{ github.ref }} - run: | - BRANCH=${BRANCH#refs/heads/} # strip off refs/heads/ if it exists - if [[ "${BRANCH}" == "main" ]]; then - echo "strict=autoupdate/strict" >> "$GITHUB_OUTPUT" - elif [[ "${BRANCH}" =~ ^release-[0-9]+\.[0-9]+$ ]]; then - echo "strict=autoupdate/${BRANCH}-strict" >> "$GITHUB_OUTPUT" - else - echo "Failed to determine matching strict branch for ${BRANCH}" - echo "strict=" >> $GITHUB_OUTPUT - fi - update: - permissions: - contents: write # for Git to git push - runs-on: ubuntu-20.04 - needs: [ prepare ] - if: ${{ needs.prepare.outputs.strict }} - steps: - - name: Harden Runner - uses: step-security/harden-runner@v2 - with: - egress-policy: audit - - name: Sync ${{ github.ref }} to ${{ needs.prepare.outputs.strict }} - uses: actions/checkout@v4 - with: - ssh-key: ${{ secrets.DEPLOY_KEY_TO_UPDATE_STRICT_BRANCH }} - - name: Apply strict patch - run: | - git checkout -b ${{ needs.prepare.outputs.strict }} - ./build-scripts/patches/strict/apply - - name: Push to ${{ needs.prepare.outputs.strict }} - run: | - git push origin --force ${{ needs.prepare.outputs.strict }} diff --git a/.github/workflows/update-branches.yaml b/.github/workflows/update-branches.yaml new file mode 100644 index 000000000..46d33ed73 --- /dev/null +++ b/.github/workflows/update-branches.yaml @@ -0,0 +1,51 @@ +name: Auto-update branches + +on: + push: + branches: + - main + - 'release-[0-9]+.[0-9]+' + +permissions: + contents: read + +jobs: + update: + name: "${{ matrix.patch }}" + permissions: + contents: write # for Git to git push + runs-on: ubuntu-20.04 + strategy: + matrix: + patch: ["strict", "moonray"] + outputs: + branch: ${{ steps.determine.outputs.branch }} + steps: + - name: Harden Runner + uses: step-security/harden-runner@v2 + with: + egress-policy: audit + - name: Determine branch + id: determine + env: + BRANCH: ${{ github.ref }} + run: | + BRANCH=${BRANCH#refs/heads/} # strip off refs/heads/ if it exists + if [[ "${BRANCH}" == "main" ]]; then + echo "branch=autoupdate/${{ matrix.patch }}" >> "$GITHUB_OUTPUT" + elif [[ "${BRANCH}" =~ ^release-[0-9]+\.[0-9]+$ ]]; then + echo "branch=autoupdate/${BRANCH}-${{ matrix.patch }}" >> "$GITHUB_OUTPUT" + else + exit 1 + fi + - name: Sync ${{ github.ref }} to ${{ needs.prepare.outputs.branch }} + uses: actions/checkout@v4 + with: + ssh-key: ${{ secrets.DEPLOY_KEY_TO_UPDATE_STRICT_BRANCH }} + - name: Apply ${{ matrix.patch }} patch + run: | + git checkout -b ${{ needs.prepare.outputs.branch }} + ./build-scripts/patches/${{ matrix.patch }}/apply + - name: Push to ${{ needs.prepare.outputs.branch }} + run: | + git push origin --force ${{ needs.prepare.outputs.branch }} diff --git a/build-scripts/patches/moonray/apply b/build-scripts/patches/moonray/apply index 8f965d03a..def5f242c 100755 --- a/build-scripts/patches/moonray/apply +++ b/build-scripts/patches/moonray/apply @@ -8,6 +8,17 @@ git config user.name k8s-bot # Remove unrelated tests rm "${DIR}/../../../tests/integration/tests/test_cilium_e2e.py" +## TODO: restore when loadbalancer is implemented +rm "${DIR}/../../../tests/integration/tests/test_loadbalancer.py" +## TODO: restore when gateway is implemented +rm "${DIR}/../../../tests/integration/tests/test_gateway.py" +## TODO: restore when ingress is implemented +rm "${DIR}/../../../tests/integration/tests/test_ingress.py" +## TODO: restore when cleanup is implemented +rm "${DIR}/../../../tests/integration/tests/test_cleanup.py" +## TODO: restore when network test is fixed +rm "${DIR}/../../../tests/integration/tests/test_network.py" + git commit -a -m "Remove unrelated tests" # Apply strict patch diff --git a/src/k8s/cmd/k8s/k8s_x_wait_for.go b/src/k8s/cmd/k8s/k8s_x_wait_for.go index 50d0fff76..8aa227c39 100644 --- a/src/k8s/cmd/k8s/k8s_x_wait_for.go +++ b/src/k8s/cmd/k8s/k8s_x_wait_for.go @@ -1,6 +1,9 @@ package k8s import ( + "context" + "time" + cmdutil "github.com/canonical/k8s/cmd/util" "github.com/canonical/k8s/pkg/k8sd/features" "github.com/canonical/k8s/pkg/utils/control" @@ -8,33 +11,50 @@ import ( ) func newXWaitForCmd(env cmdutil.ExecutionEnvironment) *cobra.Command { + var opts struct { + timeout time.Duration + } waitForDNSCmd := &cobra.Command{ Use: "dns", Short: "Wait for DNS to be ready", Run: func(cmd *cobra.Command, args []string) { - err := control.WaitUntilReady(cmd.Context(), func() (bool, error) { - return features.StatusChecks.CheckDNS(cmd.Context(), env.Snap) - }) - if err != nil { - cmd.PrintErrf("Error: failed to wait for DNS to be ready: %v\n", err) + ctx, cancel := context.WithTimeout(cmd.Context(), opts.timeout) + defer cancel() + if err := control.WaitUntilReady(ctx, func() (bool, error) { + ok, err := features.StatusChecks.CheckDNS(cmd.Context(), env.Snap) + if ok { + return true, nil + } + cmd.PrintErrf("DNS not ready yet: %v\n", err.Error()) + return false, nil + }); err != nil { + cmd.PrintErrf("Error: DNS did not become ready: %v\n", err) env.Exit(1) } }, } + waitForDNSCmd.Flags().DurationVar(&opts.timeout, "timeout", 5*time.Minute, "maximum time to wait") waitForNetworkCmd := &cobra.Command{ Use: "network", Short: "Wait for Network to be ready", Run: func(cmd *cobra.Command, args []string) { - err := control.WaitUntilReady(cmd.Context(), func() (bool, error) { - return features.StatusChecks.CheckNetwork(cmd.Context(), env.Snap) - }) - if err != nil { - cmd.PrintErrf("Error: failed to wait for DNS to be ready: %v\n", err) + ctx, cancel := context.WithTimeout(cmd.Context(), opts.timeout) + defer cancel() + if err := control.WaitUntilReady(ctx, func() (bool, error) { + ok, err := features.StatusChecks.CheckNetwork(cmd.Context(), env.Snap) + if ok { + return true, nil + } + cmd.PrintErrf("network not ready yet: %v\n", err.Error()) + return false, nil + }); err != nil { + cmd.PrintErrf("Error: network did not become ready: %v\n", err) env.Exit(1) } }, } + waitForNetworkCmd.Flags().DurationVar(&opts.timeout, "timeout", 5*time.Minute, "maximum time to wait") cmd := &cobra.Command{ Use: "x-wait-for", diff --git a/src/k8s/pkg/client/helm/client.go b/src/k8s/pkg/client/helm/client.go index 99e4d9465..d06e40add 100644 --- a/src/k8s/pkg/client/helm/client.go +++ b/src/k8s/pkg/client/helm/client.go @@ -75,6 +75,7 @@ func (h *client) Apply(ctx context.Context, c InstallableChart, desired State, v install := action.NewInstall(cfg) install.ReleaseName = c.Name install.Namespace = c.Namespace + install.CreateNamespace = true chart, err := loader.Load(path.Join(h.manifestsBaseDir, c.ManifestPath)) if err != nil { diff --git a/src/k8s/pkg/k8sd/features/calico/status.go b/src/k8s/pkg/k8sd/features/calico/status.go index d82c965c3..bf75ca9c2 100644 --- a/src/k8s/pkg/k8sd/features/calico/status.go +++ b/src/k8s/pkg/k8sd/features/calico/status.go @@ -6,42 +6,56 @@ import ( "github.com/canonical/k8s/pkg/snap" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -// CheckNetwork checks the status of the Calico pods in the Kubernetes cluster. -// It verifies if all the Calico pods in the "tigera-operator" namespace are ready. -// If any pod is not ready, it returns false. Otherwise, it returns true. -func CheckNetwork(ctx context.Context, snap snap.Snap) (bool, error) { - client, err := snap.KubernetesClient("calico-system") - if err != nil { - return false, fmt.Errorf("failed to create kubernetes client: %w", err) +func podIsReady(pod v1.Pod) bool { + if pod.Status.Phase != v1.PodRunning { + return false } - operatorReady, err := client.IsPodReady(ctx, "kube-system", "tigera-operator", metav1.ListOptions{}) - if err != nil { - return false, fmt.Errorf("failed to get calico pods: %w", err) - } - if !operatorReady { - return false, nil + for _, condition := range pod.Status.Conditions { + if condition.Type == v1.PodReady && condition.Status == v1.ConditionTrue { + return true + } } - calicoPods, err := client.ListPods(ctx, "calico-system", metav1.ListOptions{}) - if err != nil { - return false, fmt.Errorf("failed to get calico pods: %w", err) - } - calicoApiserverPods, err := client.ListPods(ctx, "calico-apiserver", metav1.ListOptions{}) + return false +} + +// CheckNetwork checks the status of the Calico pods in the Kubernetes cluster. +// We verify that the tigera-operator and calico-node pods are Ready and in Running state. +func CheckNetwork(ctx context.Context, snap snap.Snap) (bool, error) { + client, err := snap.KubernetesClient("calico-system") if err != nil { - return false, fmt.Errorf("failed to get calico-apiserver pods: %w", err) + return false, fmt.Errorf("failed to create kubernetes client: %w", err) } - for _, pod := range append(calicoPods, calicoApiserverPods...) { - isReady, err := client.IsPodReady(ctx, pod.Name, "calico-system", metav1.ListOptions{}) + for _, check := range []struct { + name string + namespace string + labels map[string]string + }{ + // check that the tigera-operator pods are ready + {name: "tigera-operator", namespace: "tigera-operator", labels: map[string]string{"k8s-app": "tigera-operator"}}, + // check that calico-node pods are ready + {name: "calico-node", namespace: "calico-system", labels: map[string]string{"app.kubernetes.io/name": "calico-node"}}, + } { + pods, err := client.ListPods(ctx, check.namespace, metav1.ListOptions{ + LabelSelector: metav1.FormatLabelSelector(&metav1.LabelSelector{MatchLabels: check.labels}), + }) if err != nil { - return false, fmt.Errorf("failed to check if pod %q is ready: %w", pod.Name, err) + return false, fmt.Errorf("failed to get %v pods: %w", check.name, err) } - if !isReady { - return false, nil + if len(pods) == 0 { + return false, fmt.Errorf("no %v pods exist on the cluster", check.name) + } + + for _, pod := range pods { + if !podIsReady(pod) { + return false, fmt.Errorf("%v pod %q not ready", check.name, pod.Name) + } } } diff --git a/src/k8s/pkg/k8sd/features/cilium/status.go b/src/k8s/pkg/k8sd/features/cilium/status.go index a879d1719..ba36c51ee 100644 --- a/src/k8s/pkg/k8sd/features/cilium/status.go +++ b/src/k8s/pkg/k8sd/features/cilium/status.go @@ -26,7 +26,7 @@ func CheckNetwork(ctx context.Context, snap snap.Snap) (bool, error) { return false, fmt.Errorf("failed to check if pod %q is ready: %w", ciliumPod, err) } if !isReady { - return false, nil + return false, fmt.Errorf("cilium pod %q is not yet ready", ciliumPod) } } diff --git a/src/k8s/pkg/k8sd/features/coredns/status.go b/src/k8s/pkg/k8sd/features/coredns/status.go index a22fdac9f..2bb260f7a 100644 --- a/src/k8s/pkg/k8sd/features/coredns/status.go +++ b/src/k8s/pkg/k8sd/features/coredns/status.go @@ -20,6 +20,9 @@ func CheckDNS(ctx context.Context, snap snap.Snap) (bool, error) { if err != nil { return false, fmt.Errorf("failed to wait for CoreDNS pod to be ready: %w", err) } + if !isReady { + return false, fmt.Errorf("coredns pod not ready yet") + } return isReady, nil }