.github/workflows/e2e.yaml

name: e2e
on:
  workflow_dispatch:
  pull_request:
    paths-ignore:
      - "docs/**"
      - "hack/**"
      - "logos/**"
      - "rfcs/**"
      - "tools/**"
      - "*.md"
  push:
    branches:
      - main

permissions:
  contents: read # for actions/checkout to fetch code

jobs:
  e2e:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.0.0
      - name: Setup YQ
        uses: frenck/action-setup-yq@c4b5be8b4a215c536a41d436757d9feb92836d4f # v1.0.2
        with:
          version: 4.14.2
      - name: Setup Go
        uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 # v5.0.1
        with:
          go-version-file: go.mod
          cache-dependency-path: |
            **/go.sum
            **/go.mod
      - name: Cache Docker layers
        uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1
        id: cache
        with:
          path: /tmp/.buildx-cache
          key: ${{ runner.os }}-buildx-ghcache-${{ github.sha }}
          restore-keys: |
            ${{ runner.os }}-buildx-ghcache-
      - name: Setup Kubernetes
        uses: helm/kind-action@0025e74a8c7512023d06dc019c617aa3cf561fde # v1.10.0
        with:
          version: v0.18.0
          node_image: kindest/node:v1.24.12@sha256:1e12918b8bc3d4253bc08f640a231bb0d3b2c5a9b28aa3f2ca1aee93e1e8db16
          cluster_name: kind
      - name: Setup Kustomize
        uses: fluxcd/pkg/actions/kustomize@30c101fc7c9fac4d84937ff4890a3da46a9db2dd # main
      - name: Setup Kubectl
        uses: fluxcd/pkg/actions/kubectl@30c101fc7c9fac4d84937ff4890a3da46a9db2dd # main
      - name: Check if working tree is dirty
        run: |
          if [[ $(git diff --stat) != '' ]]; then
            git --no-pager diff
            echo 'run make test and commit changes'
            exit 1
          fi
      - name: Build the tofu-controller container image
        run: |
          VERSION="e2e-${GITHUB_SHA::8}"
          docker buildx use default
          make docker-buildx MANAGER_IMG=test/tofu-controller RUNNER_IMG=test/tf-runner TAG=$VERSION
      - name: Load test images into KIND
        run: |
          VERSION="e2e-${GITHUB_SHA::8}"

          kind load docker-image test/tofu-controller:$VERSION
          kind load docker-image test/tf-runner:$VERSION
      - name: Install CRDs
        run: make install
      - name: Deploy controllers
        run: |
          VERSION="e2e-${GITHUB_SHA::8}"

          # Patch env RUNNER_POD_IMAGE to be test/tf-runner:$VERSION
          yq -i e ".spec.template.spec.containers[0].env[1].value=\"test/tf-runner:$VERSION\"" config/manager/manager.yaml

          # Dev deploy - do it twice to make sure the CRDs get in first
          make dev-deploy MANAGER_IMG=test/tofu-controller RUNNER_IMG=test/tf-runner TAG=$VERSION || true
          make dev-deploy MANAGER_IMG=test/tofu-controller RUNNER_IMG=test/tf-runner TAG=$VERSION

          # All of these old cert would be cleaned up by GC at the start of the test
          kubectl -n tofu-system apply -f config/testdata/gc-old-certs/test.yaml

          # Increase the concurrency of the controller to speed up tests
          # --cert-rotation-check-frequency=6m0s, then GC will run every 1 minute
          kubectl patch deployment \
            tofu-controller \
            --namespace tofu-system \
            --type='json' \
            -p='[{"op": "replace", "path": "/spec/template/spec/containers/0/args", "value": [
            "--watch-all-namespaces",
            "--log-level=info",
            "--log-encoding=json",
            "--enable-leader-election",
            "--concurrent=10",
            "--cert-rotation-check-frequency=6m0s",
          ]}]'

          kubectl -n tofu-system rollout status deploy/source-controller --timeout=1m
          kubectl -n tofu-system rollout status deploy/tofu-controller --timeout=1m
      - name: Get terraform version
        run: |
          # Terraform binary will be moved from the ToFu-controller image to TF-runner, so we check TF's version there
          VERSION="e2e-${GITHUB_SHA::8}"
          docker run --rm --entrypoint=/usr/local/bin/terraform test/tf-runner:$VERSION version
      - name: Add git repository source
        run: |
          kubectl -n tofu-system apply -f ./config/testdata/source
          kubectl -n tofu-system wait gitrepository/helloworld --for=condition=ready --timeout=4m
          kubectl -n tofu-system wait ocirepository/helloworld-oci --for=condition=ready --timeout=4m
      - name: Run approvePlan tests
        run: |
          kubectl -n tofu-system apply -f ./config/testdata/approve-plan
          kubectl -n tofu-system wait terraform/helloworld-auto-approve --for=condition=ready --timeout=4m
          kubectl -n tofu-system wait terraform/helloworld-oci-auto-approve --for=condition=ready --timeout=4m
          kubectl -n tofu-system wait terraform/helloworld-manual-approve --for=condition=plan=true --timeout=4m

          # delete after tests
          kubectl -n tofu-system delete -f ./config/testdata/approve-plan
      - name: Run plan with pod cleanup tests
        run: |
          kubectl -n tofu-system apply -f ./config/testdata/always-clean-pod
          kubectl -n tofu-system wait terraform/helloworld-always-clean-pod-manual-approve --for=condition=plan=true --timeout=4m

          # negate pod not found to be true
          ! kubectl -n tofu-system get terraform/helloworld-always-clean-pod-manual-approve-tf-runner

          # delete after tests
          kubectl -n tofu-system delete -f ./config/testdata/always-clean-pod
      - name: Run drift detection tests
        run: |
          kubectl -n tofu-system apply -f ./config/testdata/drift-detection

          # apply should be true first
          kubectl -n tofu-system wait terraform/helloworld-drift-detection --for=condition=apply=true --timeout=4m

          # patch .spec.approvePlan to "disable"
          kubectl -n tofu-system patch terraform/helloworld-drift-detection -p '{"spec":{"approvePlan":"disable"}}' --type=merge
          kubectl -n tofu-system wait  terraform/helloworld-drift-detection --for=condition=ready=true --timeout=4m

          # disable drift detection
          # the object should work correctly
          kubectl -n tofu-system wait terraform/helloworld-drift-detection-disable --for=condition=ready --timeout=4m

          # delete after tests
          kubectl -n tofu-system delete -f ./config/testdata/drift-detection
      - name: Run healthchecks tests
        run: |
          kubectl -n tofu-system apply -f ./config/testdata/healthchecks
          kubectl -n tofu-system wait terraform/helloworld-healthchecks --for=condition=ready --timeout=4m

          # delete after tests
          kubectl -n tofu-system delete -f ./config/testdata/healthchecks
      - name: Run vars tests
        run: |
          kubectl -n tofu-system apply -f ./config/testdata/vars
          kubectl -n tofu-system wait terraform/helloworld-vars --for=condition=ready --timeout=4m

          # delete after tests
          kubectl -n tofu-system delete -f ./config/testdata/vars
      - name: Run multi-tenancy test
        run: |
          kubectl -n tofu-system scale --replicas=3 deploy/tofu-controller
          kustomize build ./config/testdata/multi-tenancy/tenant01 | kubectl apply -f -
          kustomize build ./config/testdata/multi-tenancy/tenant02 | kubectl apply -f -
          kubectl -n tf-tenant01-dev wait terraform/helloworld-tenant01-dev --for=condition=ready --timeout=4m
          kubectl -n tf-tenant01-prd wait terraform/helloworld-tenant01-prd --for=condition=ready --timeout=4m
          kubectl -n tf-tenant02-dev wait terraform/helloworld-tenant02-dev --for=condition=ready --timeout=4m
          kubectl -n tf-tenant02-prd wait terraform/helloworld-tenant02-prd --for=condition=ready --timeout=4m

          # delete after tests
          kubectl -n tf-tenant01-dev delete terraform --all
          kubectl -n tf-tenant01-prd delete terraform --all
          kubectl -n tf-tenant02-dev delete terraform --all
          kubectl -n tf-tenant02-prd delete terraform --all

          kubectl -n tf-tenant01-dev delete gitrepository --all
          kubectl -n tf-tenant01-prd delete gitrepository --all
          kubectl -n tf-tenant02-dev delete gitrepository --all
          kubectl -n tf-tenant02-prd delete gitrepository --all

          kubectl delete ns tf-tenant01-dev 
          kubectl delete ns tf-tenant01-prd 
          kubectl delete ns tf-tenant02-dev 
          kubectl delete ns tf-tenant02-prd 

      - name: Set up chaos testing environment
        run: |
          kubectl -n tofu-system scale --replicas=0 deploy/tofu-controller
          sleep 3

          kubectl -n chaos-testing apply -f ./config/testdata/chaos
          kubectl -n chaos-testing apply -f ./config/testdata/source

          # Set up namespace-scoped old certs for GC
          kubectl -n chaos-testing apply -f ./config/testdata/gc-old-certs/test.yaml

          kubectl -n tofu-system scale --replicas=1 deploy/tofu-controller

          sleep 10
      - name: Randomly delete runner pods
        run: |
          # use chaos level 3 at the moment, as we don't have much CPU resources
          seq 5 | shuf | head -3 | xargs -I{} bash -c "kubectl -n chaos-testing delete pod helloworld-chaos0{}-tf-runner || true"
          sleep 10
      - name: Verify chaos testing result
        run: |
          kubectl -n chaos-testing get pods

          kubectl -n chaos-testing wait terraform/helloworld-chaos01 --for=condition=ready --timeout=30m
          kubectl -n chaos-testing wait terraform/helloworld-chaos02 --for=condition=ready --timeout=30m
          kubectl -n chaos-testing wait terraform/helloworld-chaos03 --for=condition=ready --timeout=30m
          kubectl -n chaos-testing wait terraform/helloworld-chaos04 --for=condition=ready --timeout=30m
          kubectl -n chaos-testing wait terraform/helloworld-chaos05 --for=condition=ready --timeout=30m
      - name: Check that all old certs were GCed
        run: |
          echo "wait 120 seconds for GC to happen"
          sleep 120

          (kubectl get secret terraform-runner.tls-0 -n chaos-testing >/dev/null 2>&1 && exit 1 || exit 0)
          (kubectl get secret terraform-runner.tls-1 -n chaos-testing >/dev/null 2>&1 && exit 1 || exit 0)
          (kubectl get secret terraform-runner.tls-2 -n chaos-testing >/dev/null 2>&1 && exit 1 || exit 0)
          (kubectl get secret terraform-runner.tls-3 -n chaos-testing >/dev/null 2>&1 && exit 1 || exit 0)
          (kubectl get secret terraform-runner.tls-4 -n chaos-testing >/dev/null 2>&1 && exit 1 || exit 0)
          (kubectl get secret terraform-runner.tls-5 -n chaos-testing >/dev/null 2>&1 && exit 1 || exit 0)
          (kubectl get secret terraform-runner.tls-6 -n chaos-testing >/dev/null 2>&1 && exit 1 || exit 0)
          (kubectl get secret terraform-runner.tls-7 -n chaos-testing >/dev/null 2>&1 && exit 1 || exit 0)
          (kubectl get secret terraform-runner.tls-8 -n chaos-testing >/dev/null 2>&1 && exit 1 || exit 0)
          (kubectl get secret terraform-runner.tls-9 -n chaos-testing >/dev/null 2>&1 && exit 1 || exit 0)

          (kubectl get secret terraform-runner.tls-0 -n tofu-system >/dev/null 2>&1 && exit 1 || exit 0)
          (kubectl get secret terraform-runner.tls-1 -n tofu-system >/dev/null 2>&1 && exit 1 || exit 0)
          (kubectl get secret terraform-runner.tls-2 -n tofu-system >/dev/null 2>&1 && exit 1 || exit 0)
          (kubectl get secret terraform-runner.tls-3 -n tofu-system >/dev/null 2>&1 && exit 1 || exit 0)
          (kubectl get secret terraform-runner.tls-4 -n tofu-system >/dev/null 2>&1 && exit 1 || exit 0)
          (kubectl get secret terraform-runner.tls-5 -n tofu-system >/dev/null 2>&1 && exit 1 || exit 0)
          (kubectl get secret terraform-runner.tls-6 -n tofu-system >/dev/null 2>&1 && exit 1 || exit 0)
          (kubectl get secret terraform-runner.tls-7 -n tofu-system >/dev/null 2>&1 && exit 1 || exit 0)
          (kubectl get secret terraform-runner.tls-8 -n tofu-system >/dev/null 2>&1 && exit 1 || exit 0)
          (kubectl get secret terraform-runner.tls-9 -n tofu-system >/dev/null 2>&1 && exit 1 || exit 0)

          echo "All tests are true, all of the old secrets were GCed."
      - name: Logs
        run: |
          kubectl -n tofu-system logs deploy/source-controller
          kubectl -n tofu-system logs deploy/tofu-controller
      - name: Debug failure
        if: failure()
        run: |
          which kubectl
          kubectl version
          kustomize version
          kubectl -n tofu-system logs deploy/source-controller
          kubectl -n tofu-system logs deploy/tofu-controller

          ns=(tofu-system tf-tenant01-dev tf-tenant01-prd tf-tenant02-dev tf-tenant02-prd chaos-testing)
          for n in "${ns[@]}"
          do
            kubectl -n $n get gitrepositories -oyaml          
            kubectl -n $n get terraforms -oyaml
            kubectl -n $n get all
          done