Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate inspection report on CI failures #593

Merged
merged 19 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .github/workflows/integration-informing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,27 @@ jobs:
- name: Apply ${{ matrix.patch }} patch
run: |
./build-scripts/patches/${{ matrix.patch }}/apply
- name: Generate inspection report
run: |
sudo k8s/scripts/inspect.sh
- name: Upload inspection report artifact
uses: actions/upload-artifact@v4
with:
name: inspection-report
path: ./inspection-report-*.tar.gz
- name: Run end to end tests
run: |
export TEST_SNAP="$PWD/build/k8s-${{ matrix.patch }}.snap"
export TEST_SUBSTRATE=lxd
export TEST_LXD_IMAGE=${{ matrix.os }}
cd tests/integration && sg lxd -c 'tox -e integration'
- name: Generate inspection report
if: failure()
run: |
sudo k8s/scripts/inspect.sh
- name: Upload inspection report artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: inspection-report
path: ./inspection-report-*.tar.gz
bschimke95 marked this conversation as resolved.
Show resolved Hide resolved
21 changes: 20 additions & 1 deletion .github/workflows/integration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,31 @@ jobs:
with:
name: k8s.snap
path: build
- name: Generate inspection report
run: |
sudo k8s/scripts/inspect.sh
- name: Upload inspection report artifact
uses: actions/upload-artifact@v4
with:
name: inspection-report
path: ./inspection-report-*.tar.gz
- name: Run end to end tests
run: |
export TEST_SNAP="$PWD/build/k8s.snap"
export TEST_SUBSTRATE=lxd
export TEST_LXD_IMAGE=${{ matrix.os }}
cd tests/integration && sg lxd -c 'tox -e integration'
export TEST_INSPECTION_REPORTs_DIR="$HOME/inspection-reports"
cd tests/integration && sg lxd -c 'tox -e integration -- -k test_control_plane_nodes'
- name: Compress inspection reports
if: failure()
run: |
tar -czvf inspection-reports.tar.gz -C $HOME inspection-reports
- name: Upload inspection report artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: inspection-reports-{{ matrix.os }}
path: $HOME/inspection-reports.tar.gz

security-scan:
permissions:
Expand Down
35 changes: 30 additions & 5 deletions k8s/scripts/inspect.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,22 @@
#!/usr/bin/env bash
#
# This script collects diagnostics and other relevant information from a Kubernetes
# node (either control-plane or worker node) and compiles them into a tarball report.
# The collected data includes service arguments, Kubernetes cluster info, SBOM, system
# diagnostics, network diagnostics, and more. The script needs to be run with
# elevated permissions (sudo).
#
# Usage:
# ./script.sh [output_file]
bschimke95 marked this conversation as resolved.
Show resolved Hide resolved
#
# Arguments:
# output_file (Optional) The full path and filename for the generated tarball.
# If not provided, a default filename based on the current date
# and time will be used.
#
# Example:
# ./inspect.sh /path/to/output.tar.gz
# ./inspect.sh # This will generate a tarball with a default name.

INSPECT_DUMP=$(pwd)/inspection-report

Expand Down Expand Up @@ -82,7 +100,7 @@ function collect_service_diagnostics {
systemctl status "snap.$service" &>"$status_file"

local n_restarts
n_restarts=$(systemctl show "snap.$service" -p NRestarts | cut -d'=' -f2)
n_restarts=$(systemctl show "snap.$service" -p NRestarts | cut -d'=' -f2)

printf -- "%s -> %s\n" "$service" "$n_restarts" >> "$INSPECT_DUMP/nrestarts.log"

Expand Down Expand Up @@ -122,12 +140,19 @@ function check_expected_services {
}

function build_report_tarball {
local output_file
local now_is
now_is=$(date +"%Y%m%d_%H%M%S")
bschimke95 marked this conversation as resolved.
Show resolved Hide resolved

tar -C "$(pwd)" -cf "$(pwd)/inspection-report-${now_is}.tar" inspection-report &>/dev/null
gzip "$(pwd)/inspection-report-${now_is}.tar"
log_success "Report tarball is at $(pwd)/inspection-report-$now_is.tar.gz"
if [ -z "$1" ]; then
output_file="$(pwd)/inspection-report-${now_is}.tar.gz"
else
output_file="$1"
fi

tar -C "$(pwd)" -cf "${output_file%.gz}" inspection-report &>/dev/null
gzip "${output_file%.gz}" -f
log_success "Report tarball is at $output_file"
}

if [ "$EUID" -ne 0 ]; then
Expand Down Expand Up @@ -181,4 +206,4 @@ if [ -n "$matches" ]; then
fi

printf -- 'Building the report tarball\n'
build_report_tarball
build_report_tarball "$1"
27 changes: 27 additions & 0 deletions tests/integration/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright 2024 Canonical, Ltd.
#
import logging
import os
from pathlib import Path
from typing import Generator, List, Union

Expand All @@ -25,6 +26,27 @@ def _harness_clean(h: harness.Harness):
h.cleanup()


def _generate_inspection_reports(h: harness.Harness):
"""Generate inspection reports for all instances."""
inspection_path = Path(config.INSPECTION_REPORTS_DIR)

for instance_id in h.get_instances():
LOG.debug("Generating inspection report for %s", instance_id)
result = h.exec(instance_id, ["/snap/k8s/current/k8s/scripts/inspect.sh", "/inspection-report.tar.gz"], capture_output=True, text=True)

(inspection_path / instance_id).mkdir(parents=True, exist_ok=True)
with open(inspection_path / instance_id / "inspection_report_logs.txt", "w") as f:
f.write(result.stdout)

h.pull_file(
instance_id,
"/inspection-report.tar.gz",
(inspection_path / instance_id / f"inspection_report.tar.gz").as_posix(),
)




@pytest.fixture(scope="session")
def h() -> harness.Harness:
LOG.debug("Create harness for %s", config.SUBSTRATE)
Expand All @@ -43,6 +65,10 @@ def h() -> harness.Harness:

yield h

if config.INSPECTION_REPORTS_DIR is not None:
LOG.debug("Generating inspection reports")
_generate_inspection_reports(h)

_harness_clean(h)


Expand Down Expand Up @@ -137,6 +163,7 @@ def instances(
# remove the session_instance. The harness ensures that everything is cleaned up
# at the end of the test session.
for instance in instances:
_generate_inspection_reports(h)
bschimke95 marked this conversation as resolved.
Show resolved Hide resolved
bschimke95 marked this conversation as resolved.
Show resolved Hide resolved
h.delete_instance(instance.id)


Expand Down
2 changes: 2 additions & 0 deletions tests/integration/tests/test_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@


@pytest.mark.node_count(2)
@pytest.mark.disable_k8s_bootstrapping()
def test_control_plane_nodes(instances: List[harness.Instance]):
pytest.xfail("footgun")
cluster_node = instances[0]
joining_node = instances[1]

Expand Down
8 changes: 6 additions & 2 deletions tests/integration/tests/test_util/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
ETCD_DIR = MANIFESTS_DIR / "etcd"

# ETCD_URL is the url from which the etcd binaries should be downloaded.
ETCD_URL = os.getenv("ETCD_URL") or "https://github.com/etcd-io/etcd/releases/download"
ETCD_URL = os.getenv("TEST_ETCD_URL") or "https://github.com/etcd-io/etcd/releases/download"

# ETCD_VERSION is the version of etcd to use.
ETCD_VERSION = os.getenv("ETCD_VERSION") or "v3.3.8"
ETCD_VERSION = os.getenv("TEST_ETCD_VERSION") or "v3.3.8"

bschimke95 marked this conversation as resolved.
Show resolved Hide resolved
# SNAP is the absolute path to the snap against which we run the integration tests.
SNAP = os.getenv("TEST_SNAP")
Expand All @@ -28,6 +28,10 @@
# after the tests complete.
SKIP_CLEANUP = (os.getenv("TEST_SKIP_CLEANUP") or "") == "1"

# INSPECTION_REPORTS_DIR is the directory where inspection reports are stored.
# If empty, no reports are generated.
INSPECTION_REPORTS_DIR = os.getenv("TEST_INSPECTION_REPORTS_DIR")

# LXD_PROFILE_NAME is the profile name to use for LXD containers.
LXD_PROFILE_NAME = os.getenv("TEST_LXD_PROFILE_NAME") or "k8s-integration"

Expand Down
8 changes: 8 additions & 0 deletions tests/integration/tests/test_util/harness/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#
import subprocess
from functools import partial
from typing import Set


class HarnessError(Exception):
Expand Down Expand Up @@ -52,6 +53,13 @@ def new_instance(self, dualstack: bool = False) -> Instance:
"""
raise NotImplementedError

def get_instances(self) -> Set[str]:
"""Returns a list of instance ids that were created by new_instance().

If the operation fails, a HarnessError is raised.
"""
raise NotImplementedError

def send_file(self, instance_id: str, source: str, destination: str):
"""Send a local file to the instance.

Expand Down
4 changes: 4 additions & 0 deletions tests/integration/tests/test_util/harness/juju.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import subprocess
from pathlib import Path

from typing import Set
from test_util import config
from test_util.harness import Harness, HarnessError, Instance
from test_util.util import run
Expand Down Expand Up @@ -93,6 +94,9 @@ def new_instance(self, dualstack: bool = False) -> Instance:
self.exec(instance_id, ["snap", "wait", "system", "seed.loaded"])
return Instance(self, instance_id)

def get_instances(self) -> Set[str]:
return self.instances
bschimke95 marked this conversation as resolved.
Show resolved Hide resolved

def send_file(self, instance_id: str, source: str, destination: str):
if instance_id not in self.instances:
raise HarnessError(f"unknown instance {instance_id}")
Expand Down
13 changes: 10 additions & 3 deletions tests/integration/tests/test_util/harness/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import socket
import subprocess
from pathlib import Path
from typing import Set

from test_util.harness import Harness, HarnessError, Instance
from test_util.util import run
Expand All @@ -22,27 +23,33 @@ class LocalHarness(Harness):

def __init__(self):
super(LocalHarness, self).__init__()
self.initialized = False
self.instance = None
self.hostname = socket.gethostname().lower()

LOG.debug("Configured local substrate")

def new_instance(self, dualstack: bool = False) -> Instance:
if self.initialized:
if self.instance is not None:
raise HarnessError("local substrate only supports up to one instance")

if dualstack:
raise HarnessError("Dualstack is currently not supported by Local harness")

self.initialized = True
LOG.debug("Initializing instance")
try:
self.exec(self.hostname, ["snap", "wait", "system", "seed.loaded"])
except subprocess.CalledProcessError as e:
raise HarnessError("failed to wait for snapd seed") from e

self.instance = self.hostname
return Instance(self, self.hostname)


def get_instances(self) -> Set[str]:
if self.instance is None:
return set()
return set(self.instance)
bschimke95 marked this conversation as resolved.
Show resolved Hide resolved

def send_file(self, _: str, source: str, destination: str):
if not self.initialized:
raise HarnessError("no instance initialized")
Expand Down
5 changes: 4 additions & 1 deletion tests/integration/tests/test_util/harness/lxd.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import shlex
import subprocess
from pathlib import Path
from typing import List
from typing import List, Set

from test_util import config
from test_util.harness import Harness, HarnessError, Instance
Expand Down Expand Up @@ -108,6 +108,9 @@ def new_instance(self, dualstack: bool = False) -> Instance:
self.exec(instance_id, ["snap", "wait", "system", "seed.loaded"])
return Instance(self, instance_id)

def get_instances(self) -> Set[str]:
return self.instances

def _configure_profile(self, profile_name: str, profile_config: str):
LOG.debug("Checking for LXD profile %s", profile_name)
try:
Expand Down
4 changes: 4 additions & 0 deletions tests/integration/tests/test_util/harness/multipass.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import shlex
import subprocess
from pathlib import Path
from typing import Set

from test_util import config
from test_util.harness import Harness, HarnessError, Instance
Expand Down Expand Up @@ -69,6 +70,9 @@ def new_instance(self, dualstack: bool = False) -> Instance:
self.exec(instance_id, ["snap", "wait", "system", "seed.loaded"])
return Instance(self, instance_id)

def get_instances(self) -> Set[str]:
return self.instances

def send_file(self, instance_id: str, source: str, destination: str):
if instance_id not in self.instances:
raise HarnessError(f"unknown instance {instance_id}")
Expand Down
Loading