Skip to content

Commit

Permalink
Generate inspection report on CI failures (#593)
Browse files Browse the repository at this point in the history
---------

Co-authored-by: Angelos Kolaitis <angelos.kolaitis@canonical.com>
  • Loading branch information
bschimke95 and neoaggelos authored Aug 22, 2024
1 parent 4c9cdca commit 0d57dc7
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 7 deletions.
12 changes: 12 additions & 0 deletions .github/workflows/integration-informing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,16 @@ jobs:
export TEST_SNAP="$PWD/build/k8s-${{ matrix.patch }}.snap"
export TEST_SUBSTRATE=lxd
export TEST_LXD_IMAGE=${{ matrix.os }}
export TEST_INSPECTION_REPORTS_DIR="$HOME/inspection-reports"
cd tests/integration && sg lxd -c 'tox -e integration'
- name: Prepare inspection reports
if: failure()
run: |
tar -czvf inspection-reports.tar.gz -C $HOME inspection-reports
echo "artifact_name=inspection-reports-${{ matrix.os }}-${{ matrix.patch }}" | sed 's/:/-/g' >> $GITHUB_ENV
- name: Upload inspection report artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: ${{ env.artifact_name }}
path: inspection-reports.tar.gz
14 changes: 13 additions & 1 deletion .github/workflows/integration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,19 @@ jobs:
export TEST_SNAP="$PWD/build/k8s.snap"
export TEST_SUBSTRATE=lxd
export TEST_LXD_IMAGE=${{ matrix.os }}
cd tests/integration && sg lxd -c 'tox -e integration'
export TEST_INSPECTION_REPORTS_DIR="$HOME/inspection-reports"
cd tests/integration && sg lxd -c 'tox -e integration -- -k test_control_plane_nodes'
- name: Prepare inspection reports
if: failure()
run: |
tar -czvf inspection-reports.tar.gz -C $HOME inspection-reports
echo "artifact_name=inspection-reports-${{ matrix.os }}" | sed 's/:/-/g' >> $GITHUB_ENV
- name: Upload inspection report artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: ${{ env.artifact_name }}
path: inspection-reports.tar.gz

security-scan:
permissions:
Expand Down
37 changes: 31 additions & 6 deletions k8s/scripts/inspect.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,22 @@
#!/usr/bin/env bash
#
# This script collects diagnostics and other relevant information from a Kubernetes
# node (either control-plane or worker node) and compiles them into a tarball report.
# The collected data includes service arguments, Kubernetes cluster info, SBOM, system
# diagnostics, network diagnostics, and more. The script needs to be run with
# elevated permissions (sudo).
#
# Usage:
# ./inspect.sh [output_file]
#
# Arguments:
# output_file (Optional) The full path and filename for the generated tarball.
# If not provided, a default filename based on the current date
# and time will be used.
#
# Example:
# ./inspect.sh /path/to/output.tar.gz
# ./inspect.sh # This will generate a tarball with a default name.

INSPECT_DUMP=$(pwd)/inspection-report

Expand Down Expand Up @@ -82,7 +100,7 @@ function collect_service_diagnostics {
systemctl status "snap.$service" &>"$status_file"

local n_restarts
n_restarts=$(systemctl show "snap.$service" -p NRestarts | cut -d'=' -f2)
n_restarts=$(systemctl show "snap.$service" -p NRestarts | cut -d'=' -f2)

printf -- "%s -> %s\n" "$service" "$n_restarts" >> "$INSPECT_DUMP/nrestarts.log"

Expand Down Expand Up @@ -122,12 +140,19 @@ function check_expected_services {
}

function build_report_tarball {
local output_file
local now_is
now_is=$(date +"%Y%m%d_%H%M%S")
now_is="$(date +'%Y%m%d_%H%M%S')"

tar -C "$(pwd)" -cf "$(pwd)/inspection-report-${now_is}.tar" inspection-report &>/dev/null
gzip "$(pwd)/inspection-report-${now_is}.tar"
log_success "Report tarball is at $(pwd)/inspection-report-$now_is.tar.gz"
if [ -z "$1" ]; then
output_file="$(pwd)/inspection-report-${now_is}.tar.gz"
else
output_file="$1"
fi

tar -C "$(pwd)" -cf "${output_file%.gz}" inspection-report &>/dev/null
gzip "${output_file%.gz}" -f
log_success "Report tarball is at $output_file"
}

if [ "$EUID" -ne 0 ]; then
Expand Down Expand Up @@ -181,4 +206,4 @@ if [ -n "$matches" ]; then
fi

printf -- 'Building the report tarball\n'
build_report_tarball
build_report_tarball "$1"
36 changes: 36 additions & 0 deletions tests/integration/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,33 @@ def _harness_clean(h: harness.Harness):
h.cleanup()


def _generate_inspection_report(h: harness.Harness, instance_id: str):
LOG.debug("Generating inspection report for %s", instance_id)

inspection_path = Path(config.INSPECTION_REPORTS_DIR)
result = h.exec(
instance_id,
["/snap/k8s/current/k8s/scripts/inspect.sh", "/inspection-report.tar.gz"],
capture_output=True,
text=True,
check=False,
)

(inspection_path / instance_id).mkdir(parents=True, exist_ok=True)
(inspection_path / instance_id / "inspection_report_logs.txt").write_text(
result.stdout
)

try:
h.pull_file(
instance_id,
"/inspection-report.tar.gz",
(inspection_path / instance_id / "inspection_report.tar.gz").as_posix(),
)
except harness.HarnessError as e:
LOG.warning("Failed to pull inspection report: %s", e)


@pytest.fixture(scope="session")
def h() -> harness.Harness:
LOG.debug("Create harness for %s", config.SUBSTRATE)
Expand All @@ -43,6 +70,11 @@ def h() -> harness.Harness:

yield h

if config.INSPECTION_REPORTS_DIR is not None:
for instance_id in h.instances:
LOG.debug("Generating inspection reports for session instances")
_generate_inspection_report(h, instance_id)

_harness_clean(h)


Expand Down Expand Up @@ -137,6 +169,10 @@ def instances(
# remove the session_instance. The harness ensures that everything is cleaned up
# at the end of the test session.
for instance in instances:
if config.INSPECTION_REPORTS_DIR is not None:
LOG.debug("Generating inspection reports for test instances")
_generate_inspection_report(h, instance.id)

h.delete_instance(instance.id)


Expand Down
4 changes: 4 additions & 0 deletions tests/integration/tests/test_util/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
# after the tests complete.
SKIP_CLEANUP = (os.getenv("TEST_SKIP_CLEANUP") or "") == "1"

# INSPECTION_REPORTS_DIR is the directory where inspection reports are stored.
# If empty, no reports are generated.
INSPECTION_REPORTS_DIR = os.getenv("TEST_INSPECTION_REPORTS_DIR")

# LXD_PROFILE_NAME is the profile name to use for LXD containers.
LXD_PROFILE_NAME = os.getenv("TEST_LXD_PROFILE_NAME") or "k8s-integration"

Expand Down

0 comments on commit 0d57dc7

Please sign in to comment.