canonical · bschimke95 · Aug 22, 2024 · Aug 8, 2024 · Aug 8, 2024 · Aug 8, 2024
@@ -80,9 +80,27 @@ jobs:
       - name: Apply ${{ matrix.patch }} patch
         run: |
           ./build-scripts/patches/${{ matrix.patch }}/apply
+      - name: Generate inspection report
+        run: |
+          sudo k8s/scripts/inspect.sh
+      - name: Upload inspection report artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: inspection-report
+          path: ./inspection-report-*.tar.gz
       - name: Run end to end tests
         run: |
           export TEST_SNAP="$PWD/build/k8s-${{ matrix.patch }}.snap"
           export TEST_SUBSTRATE=lxd
           export TEST_LXD_IMAGE=${{ matrix.os }}
           cd tests/integration && sg lxd -c 'tox -e integration'
+      - name: Generate inspection report
+        if: failure()
+        run: |
+          sudo k8s/scripts/inspect.sh
+      - name: Upload inspection report artifact
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: inspection-report
+          path: ./inspection-report-*.tar.gz
@@ -74,12 +74,31 @@ jobs:
         with:
           name: k8s.snap
           path: build
+      - name: Generate inspection report
+        run: |
+          sudo k8s/scripts/inspect.sh
+      - name: Upload inspection report artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: inspection-report
+          path: ./inspection-report-*.tar.gz
       - name: Run end to end tests
         run: |
           export TEST_SNAP="$PWD/build/k8s.snap"
           export TEST_SUBSTRATE=lxd
           export TEST_LXD_IMAGE=${{ matrix.os }}
-          cd tests/integration && sg lxd -c 'tox -e integration'
+          export TEST_INSPECTION_REPORTs_DIR="$HOME/inspection-reports"
+          cd tests/integration && sg lxd -c 'tox -e integration -- -k test_control_plane_nodes'
+      - name: Compress inspection reports
+        if: failure()
+        run: |
+          tar -czvf inspection-reports.tar.gz -C $HOME inspection-reports
+      - name: Upload inspection report artifact
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: inspection-reports-{{ matrix.os }}
+          path: $HOME/inspection-reports.tar.gz
 
   security-scan:
     permissions:

@@ -1,4 +1,22 @@
 #!/usr/bin/env bash
+#
+# This script collects diagnostics and other relevant information from a Kubernetes
+# node (either control-plane or worker node) and compiles them into a tarball report.
+# The collected data includes service arguments, Kubernetes cluster info, SBOM, system
+# diagnostics, network diagnostics, and more. The script needs to be run with
+# elevated permissions (sudo).
+#
+# Usage:
+#   ./script.sh [output_file]
+#
+# Arguments:
+#   output_file  (Optional) The full path and filename for the generated tarball.
+#                If not provided, a default filename based on the current date
+#                and time will be used.
+#
+# Example:
+#   ./inspect.sh /path/to/output.tar.gz
+#   ./inspect.sh  # This will generate a tarball with a default name.
 
 INSPECT_DUMP=$(pwd)/inspection-report
 
@@ -82,7 +100,7 @@ function collect_service_diagnostics {
   systemctl status "snap.$service" &>"$status_file"
 
   local n_restarts
-  n_restarts=$(systemctl show "snap.$service" -p NRestarts | cut -d'=' -f2) 
+  n_restarts=$(systemctl show "snap.$service" -p NRestarts | cut -d'=' -f2)
 
   printf -- "%s -> %s\n" "$service" "$n_restarts" >> "$INSPECT_DUMP/nrestarts.log"
 
@@ -122,12 +140,19 @@ function check_expected_services {
 }
 
 function build_report_tarball {
+  local output_file
   local now_is
   now_is=$(date +"%Y%m%d_%H%M%S")
 
-  tar -C "$(pwd)" -cf "$(pwd)/inspection-report-${now_is}.tar" inspection-report &>/dev/null
-  gzip "$(pwd)/inspection-report-${now_is}.tar"
-  log_success "Report tarball is at $(pwd)/inspection-report-$now_is.tar.gz"
+  if [ -z "$1" ]; then
+    output_file="$(pwd)/inspection-report-${now_is}.tar.gz"
+  else
+    output_file="$1"
+  fi
+
+  tar -C "$(pwd)" -cf "${output_file%.gz}" inspection-report &>/dev/null
+  gzip "${output_file%.gz}" -f
+  log_success "Report tarball is at $output_file"
 }
 
 if [ "$EUID" -ne 0 ]; then
@@ -181,4 +206,4 @@ if [ -n "$matches" ]; then
 fi
 
 printf -- 'Building the report tarball\n'
-build_report_tarball
+build_report_tarball "$1"
@@ -2,6 +2,7 @@
 # Copyright 2024 Canonical, Ltd.
 #
 import logging
+import os
 from pathlib import Path
 from typing import Generator, List, Union
 
@@ -25,6 +26,27 @@ def _harness_clean(h: harness.Harness):
         h.cleanup()
 
 
+def _generate_inspection_reports(h: harness.Harness):
+    """Generate inspection reports for all instances."""
+    inspection_path = Path(config.INSPECTION_REPORTS_DIR)
+
+    for instance_id in h.get_instances():
+        LOG.debug("Generating inspection report for %s", instance_id)
+        result = h.exec(instance_id, ["/snap/k8s/current/k8s/scripts/inspect.sh", "/inspection-report.tar.gz"], capture_output=True, text=True)
+
+        (inspection_path / instance_id).mkdir(parents=True, exist_ok=True)
+        with open(inspection_path / instance_id / "inspection_report_logs.txt", "w") as f:
+            f.write(result.stdout)
+
+        h.pull_file(
+            instance_id,
+            "/inspection-report.tar.gz",
+            (inspection_path / instance_id / f"inspection_report.tar.gz").as_posix(),
+        )
+
+
+
+
 @pytest.fixture(scope="session")
 def h() -> harness.Harness:
     LOG.debug("Create harness for %s", config.SUBSTRATE)
@@ -43,6 +65,10 @@ def h() -> harness.Harness:
 
     yield h
 
+    if config.INSPECTION_REPORTS_DIR is not None:
+        LOG.debug("Generating inspection reports")
+        _generate_inspection_reports(h)
+
     _harness_clean(h)
 
 
@@ -137,6 +163,7 @@ def instances(
     # remove the session_instance. The harness ensures that everything is cleaned up
     # at the end of the test session.
     for instance in instances:
+        _generate_inspection_reports(h)
         h.delete_instance(instance.id)
 
 

diff --git a/tests/integration/tests/test_clustering.py b/tests/integration/tests/test_clustering.py
@@ -11,7 +11,9 @@
 
 
 @pytest.mark.node_count(2)
+@pytest.mark.disable_k8s_bootstrapping()
 def test_control_plane_nodes(instances: List[harness.Instance]):
+    pytest.xfail("footgun")
     cluster_node = instances[0]
     joining_node = instances[1]
 

@@ -12,10 +12,10 @@
 ETCD_DIR = MANIFESTS_DIR / "etcd"
 
 # ETCD_URL is the url from which the etcd binaries should be downloaded.
-ETCD_URL = os.getenv("ETCD_URL") or "https://github.com/etcd-io/etcd/releases/download"
+ETCD_URL = os.getenv("TEST_ETCD_URL") or "https://github.com/etcd-io/etcd/releases/download"
 
 # ETCD_VERSION is the version of etcd to use.
-ETCD_VERSION = os.getenv("ETCD_VERSION") or "v3.3.8"
+ETCD_VERSION = os.getenv("TEST_ETCD_VERSION") or "v3.3.8"
 
 # SNAP is the absolute path to the snap against which we run the integration tests.
 SNAP = os.getenv("TEST_SNAP")
@@ -28,6 +28,10 @@
 # after the tests complete.
 SKIP_CLEANUP = (os.getenv("TEST_SKIP_CLEANUP") or "") == "1"
 
+# INSPECTION_REPORTS_DIR is the directory where inspection reports are stored.
+# If empty, no reports are generated.
+INSPECTION_REPORTS_DIR = os.getenv("TEST_INSPECTION_REPORTS_DIR")
+
 # LXD_PROFILE_NAME is the profile name to use for LXD containers.
 LXD_PROFILE_NAME = os.getenv("TEST_LXD_PROFILE_NAME") or "k8s-integration"
 

diff --git a/tests/integration/tests/test_util/harness/base.py b/tests/integration/tests/test_util/harness/base.py
@@ -3,6 +3,7 @@
 #
 import subprocess
 from functools import partial
+from typing import Set
 
 
 class HarnessError(Exception):
@@ -52,6 +53,13 @@ def new_instance(self, dualstack: bool = False) -> Instance:
         """
         raise NotImplementedError
 
+    def get_instances(self) -> Set[str]:
+        """Returns a list of instance ids that were created by new_instance().
+
+        If the operation fails, a HarnessError is raised.
+        """
+        raise NotImplementedError
+
     def send_file(self, instance_id: str, source: str, destination: str):
         """Send a local file to the instance.
 

diff --git a/tests/integration/tests/test_util/harness/juju.py b/tests/integration/tests/test_util/harness/juju.py
@@ -7,6 +7,7 @@
 import subprocess
 from pathlib import Path
 
+from typing import Set
 from test_util import config
 from test_util.harness import Harness, HarnessError, Instance
 from test_util.util import run
@@ -93,6 +94,9 @@ def new_instance(self, dualstack: bool = False) -> Instance:
         self.exec(instance_id, ["snap", "wait", "system", "seed.loaded"])
         return Instance(self, instance_id)
 
+    def get_instances(self) -> Set[str]:
+        return self.instances
+
     def send_file(self, instance_id: str, source: str, destination: str):
         if instance_id not in self.instances:
             raise HarnessError(f"unknown instance {instance_id}")

diff --git a/tests/integration/tests/test_util/harness/local.py b/tests/integration/tests/test_util/harness/local.py
@@ -8,6 +8,7 @@
 import socket
 import subprocess
 from pathlib import Path
+from typing import Set
 
 from test_util.harness import Harness, HarnessError, Instance
 from test_util.util import run
@@ -22,27 +23,33 @@ class LocalHarness(Harness):
 
     def __init__(self):
         super(LocalHarness, self).__init__()
-        self.initialized = False
+        self.instance = None
         self.hostname = socket.gethostname().lower()
 
         LOG.debug("Configured local substrate")
 
     def new_instance(self, dualstack: bool = False) -> Instance:
-        if self.initialized:
+        if self.instance is not None:
             raise HarnessError("local substrate only supports up to one instance")
 
         if dualstack:
             raise HarnessError("Dualstack is currently not supported by Local harness")
 
-        self.initialized = True
         LOG.debug("Initializing instance")
         try:
             self.exec(self.hostname, ["snap", "wait", "system", "seed.loaded"])
         except subprocess.CalledProcessError as e:
             raise HarnessError("failed to wait for snapd seed") from e
 
+        self.instance = self.hostname
         return Instance(self, self.hostname)
 
+
+    def get_instances(self) -> Set[str]:
+        if self.instance is None:
+            return set()
+        return set(self.instance)
+
     def send_file(self, _: str, source: str, destination: str):
         if not self.initialized:
             raise HarnessError("no instance initialized")

diff --git a/tests/integration/tests/test_util/harness/lxd.py b/tests/integration/tests/test_util/harness/lxd.py
@@ -6,7 +6,7 @@
 import shlex
 import subprocess
 from pathlib import Path
-from typing import List
+from typing import List, Set
 
 from test_util import config
 from test_util.harness import Harness, HarnessError, Instance
@@ -108,6 +108,9 @@ def new_instance(self, dualstack: bool = False) -> Instance:
         self.exec(instance_id, ["snap", "wait", "system", "seed.loaded"])
         return Instance(self, instance_id)
 
+    def get_instances(self) -> Set[str]:
+        return self.instances
+
     def _configure_profile(self, profile_name: str, profile_config: str):
         LOG.debug("Checking for LXD profile %s", profile_name)
         try:

diff --git a/tests/integration/tests/test_util/harness/multipass.py b/tests/integration/tests/test_util/harness/multipass.py
@@ -6,6 +6,7 @@
 import shlex
 import subprocess
 from pathlib import Path
+from typing import Set
 
 from test_util import config
 from test_util.harness import Harness, HarnessError, Instance
@@ -69,6 +70,9 @@ def new_instance(self, dualstack: bool = False) -> Instance:
         self.exec(instance_id, ["snap", "wait", "system", "seed.loaded"])
         return Instance(self, instance_id)
 
+    def get_instances(self) -> Set[str]:
+        return self.instances
+
     def send_file(self, instance_id: str, source: str, destination: str):
         if instance_id not in self.instances:
             raise HarnessError(f"unknown instance {instance_id}")