Skip to content

Commit

Permalink
Slim down docker cache size (#3190)
Browse files Browse the repository at this point in the history
* slim down docker cache size

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* remove old centos images

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* troubleshoot test failure

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix wget version ref

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* refactor caching mechanisms

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add cache cleanup steps

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* simplify deleting cache

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix first clone issue

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add tool dep

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

---------

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
  • Loading branch information
wagoodman committed Sep 9, 2024
1 parent deabd41 commit 0a3f513
Show file tree
Hide file tree
Showing 75 changed files with 1,305 additions and 385 deletions.
16 changes: 16 additions & 0 deletions .binny.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,19 @@ tools:
method: github-release
with:
repo: cli/cli

# used to upload test fixture cache
- name: oras
version:
want: v1.2.0
method: github-release
with:
repo: oras-project/oras

# used to upload test fixture cache
- name: yq
version:
want: v4.44.3
method: github-release
with:
repo: mikefarah/yq
23 changes: 14 additions & 9 deletions .github/actions/bootstrap/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,15 @@ inputs:
cache-key-prefix:
description: "Prefix all cache keys with this value"
required: true
default: "1ac8281053"
compute-fingerprints:
description: "Compute test fixture fingerprints"
default: "181053ac82"
download-test-fixture-cache:
description: "Download test fixture cache from OCI and github actions"
required: true
default: "true"
default: "false"
bootstrap-apt-packages:
description: "Space delimited list of tools to install via apt"
default: "libxml2-utils"


runs:
using: "composite"
steps:
Expand Down Expand Up @@ -54,8 +53,14 @@ runs:
run: |
DEBIAN_FRONTEND=noninteractive sudo apt update && sudo -E apt install -y ${{ inputs.bootstrap-apt-packages }}
- name: Create all cache fingerprints
if: inputs.compute-fingerprints == 'true'
shell: bash
run: make fingerprints
- name: Restore ORAS cache from github actions
if: inputs.download-test-fixture-cache == 'true'
uses: actions/cache@704facf57e6136b1bc63b828d79edcd491f0ee84 # v3.3.2
with:
path: ${{ github.workspace }}/.tmp/oras-cache
key: ${{ inputs.cache-key-prefix }}-oras-cache

- name: Download test fixture cache
if: inputs.download-test-fixture-cache == 'true'
shell: bash
run: make download-test-fixture-cache
11 changes: 0 additions & 11 deletions .github/scripts/ci-check.sh

This file was deleted.

135 changes: 135 additions & 0 deletions .github/scripts/find_cache_paths.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#!/usr/bin/env python3
from __future__ import annotations

import os
import glob
import sys
import json
import hashlib


IGNORED_PREFIXES = []


def find_fingerprints_and_check_dirs(base_dir):
all_fingerprints = set(glob.glob(os.path.join(base_dir, '**', 'test*', '**', '*.fingerprint'), recursive=True))

all_fingerprints = {os.path.relpath(fp) for fp in all_fingerprints
if not any(fp.startswith(prefix) for prefix in IGNORED_PREFIXES)}

if not all_fingerprints:
show("No .fingerprint files or cache directories found.")
exit(1)

missing_content = []
valid_paths = set()
fingerprint_contents = []

for fingerprint in all_fingerprints:
path = fingerprint.replace('.fingerprint', '')

if not os.path.exists(path):
missing_content.append(path)
continue

if not os.path.isdir(path):
valid_paths.add(path)
continue

if os.listdir(path):
valid_paths.add(path)
else:
missing_content.append(path)

with open(fingerprint, 'r') as f:
content = f.read().strip()
fingerprint_contents.append((fingerprint, content))

return sorted(valid_paths), missing_content, fingerprint_contents


def parse_fingerprint_contents(fingerprint_content):
input_map = {}
for line in fingerprint_content.splitlines():
digest, path = line.split()
input_map[path] = digest
return input_map


def calculate_sha256(fingerprint_contents):
sorted_fingerprint_contents = sorted(fingerprint_contents, key=lambda x: x[0])

concatenated_contents = ''.join(content for _, content in sorted_fingerprint_contents)

sha256_hash = hashlib.sha256(concatenated_contents.encode()).hexdigest()

return sha256_hash


def calculate_file_sha256(file_path):
sha256_hash = hashlib.sha256()
with open(file_path, 'rb') as f:
for byte_block in iter(lambda: f.read(4096), b""):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()


def show(*s: str):
print(*s, file=sys.stderr)


def main(file_path: str | None):
base_dir = '.'
valid_paths, missing_content, fingerprint_contents = find_fingerprints_and_check_dirs(base_dir)

if missing_content:
show("The following paths are missing or have no content, but have corresponding .fingerprint files:")
for path in sorted(missing_content):
show(f"- {path}")
show("Please ensure these paths exist and have content if they are directories.")
exit(1)

sha256_hash = calculate_sha256(fingerprint_contents)

paths_with_digests = []
for path in sorted(valid_paths):
fingerprint_file = f"{path}.fingerprint"
try:
if os.path.exists(fingerprint_file):
file_digest = calculate_file_sha256(fingerprint_file)

# Parse the fingerprint file to get the digest/path tuples
with open(fingerprint_file, 'r') as f:
fingerprint_content = f.read().strip()
input_map = parse_fingerprint_contents(fingerprint_content)

paths_with_digests.append({
"path": path,
"digest": file_digest,
"input": input_map
})

except Exception as e:
show(f"Error processing {fingerprint_file}: {e}")
raise e


output = {
"digest": sha256_hash,
"paths": paths_with_digests
}

content = json.dumps(output, indent=2, sort_keys=True)

if file_path:
with open(file_path, 'w') as f:
f.write(content)

print(content)


if __name__ == "__main__":
file_path = None
if len(sys.argv) > 1:
file_path = sys.argv[1]
main(file_path)
70 changes: 70 additions & 0 deletions .github/scripts/fingerprint_docker_fixtures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/usr/bin/env python3

import os
import subprocess
import hashlib

BOLD = '\033[1m'
YELLOW = '\033[0;33m'
RESET = '\033[0m'


def print_message(message):
print(f"{YELLOW}{message}{RESET}")


def sha256sum(filepath):
h = hashlib.sha256()
with open(filepath, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b""):
h.update(chunk)
return h.hexdigest()


def is_git_tracked_or_untracked(directory):
"""Returns a sorted list of files in the directory that are tracked or not ignored by Git."""
result = subprocess.run(
["git", "ls-files", "--cached", "--others", "--exclude-standard"],
cwd=directory,
stdout=subprocess.PIPE,
text=True
)
return sorted(result.stdout.strip().splitlines())


def find_test_fixture_dirs_with_images(base_dir):
"""Find directories that contain 'test-fixtures' and at least one 'image-*' directory."""
for root, dirs, files in os.walk(base_dir):
if 'test-fixtures' in root:
image_dirs = [d for d in dirs if d.startswith('image-')]
if image_dirs:
yield os.path.realpath(root)


def generate_fingerprints():
print_message("creating fingerprint files for docker fixtures...")

for test_fixture_dir in find_test_fixture_dirs_with_images('.'):
cache_fingerprint_path = os.path.join(test_fixture_dir, 'cache.fingerprint')

with open(cache_fingerprint_path, 'w') as fingerprint_file:
for image_dir in find_image_dirs(test_fixture_dir):
for file in is_git_tracked_or_untracked(image_dir):
file_path = os.path.join(image_dir, file)
checksum = sha256sum(file_path)
path_from_fixture_dir = os.path.relpath(file_path, test_fixture_dir)
fingerprint_file.write(f"{checksum} {path_from_fixture_dir}\n")


def find_image_dirs(test_fixture_dir):
"""Find all 'image-*' directories inside a given test-fixture directory."""
result = []
for root, dirs, files in os.walk(test_fixture_dir):
for dir_name in dirs:
if dir_name.startswith('image-'):
result.append(os.path.join(root, dir_name))
return sorted(result)


if __name__ == "__main__":
generate_fingerprints()
2 changes: 2 additions & 0 deletions .github/scripts/labeler.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/env python3

from __future__ import annotations

import sys
Expand Down
2 changes: 2 additions & 0 deletions .github/scripts/labeler_test.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/env python3

import unittest
from unittest.mock import patch
import subprocess
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release-version-file.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: "Release"
name: "Release: version file"

on:

Expand Down
39 changes: 39 additions & 0 deletions .github/workflows/test-fixture-cache-publish.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: "Test fixture cache: publish"

on:
workflow_dispatch:
schedule:
# run nightly at 4AM UTC
- cron: "0 4 * * *"

permissions:
contents: read

jobs:

Publish:
name: "Publish test fixture image cache"
# we use this runner to get enough storage space for docker images and fixture cache
runs-on: ubuntu-22.04-4core-16gb
permissions:
packages: write
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 #v4.1.7

- name: Bootstrap environment
uses: ./.github/actions/bootstrap
with:
# we want to rebuild the cache with no previous state
download-test-fixture-cache: false

- name: Run all tests
run: make test
env:
# we want to rebuild the cache with no previous state
DOWNLOAD_TEST_FIXTURE_CACHE: "false"

- name: Login to GitHub Container Registry (ORAS)
run: echo "${{ secrets.GITHUB_TOKEN }}" | .tool/oras login ghcr.io -u ${{ github.actor }} --password-stdin

- name: Publish test fixture cache
run: make upload-test-fixture-cache
1 change: 0 additions & 1 deletion .github/workflows/update-bootstrap-tools.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ jobs:
uses: ./.github/actions/bootstrap
with:
bootstrap-apt-packages: ""
compute-fingerprints: "false"
go-dependencies: false

- name: "Update tool versions"
Expand Down
Loading

0 comments on commit 0a3f513

Please sign in to comment.