Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Slim down docker cache size #3190

Merged
merged 9 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .binny.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,19 @@ tools:
method: github-release
with:
repo: cli/cli

# used to upload test fixture cache
- name: oras
version:
want: v1.2.0
method: github-release
with:
repo: oras-project/oras

# used to upload test fixture cache
- name: yq
version:
want: v4.44.3
method: github-release
with:
repo: mikefarah/yq
23 changes: 14 additions & 9 deletions .github/actions/bootstrap/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,15 @@ inputs:
cache-key-prefix:
description: "Prefix all cache keys with this value"
required: true
default: "1ac8281053"
compute-fingerprints:
description: "Compute test fixture fingerprints"
default: "181053ac82"
download-test-fixture-cache:
description: "Download test fixture cache from OCI and github actions"
required: true
default: "true"
default: "false"
bootstrap-apt-packages:
description: "Space delimited list of tools to install via apt"
default: "libxml2-utils"


runs:
using: "composite"
steps:
Expand Down Expand Up @@ -54,8 +53,14 @@ runs:
run: |
DEBIAN_FRONTEND=noninteractive sudo apt update && sudo -E apt install -y ${{ inputs.bootstrap-apt-packages }}

- name: Create all cache fingerprints
if: inputs.compute-fingerprints == 'true'
shell: bash
run: make fingerprints
- name: Restore ORAS cache from github actions
if: inputs.download-test-fixture-cache == 'true'
uses: actions/cache@704facf57e6136b1bc63b828d79edcd491f0ee84 # v3.3.2
with:
path: ${{ github.workspace }}/.tmp/oras-cache
key: ${{ inputs.cache-key-prefix }}-oras-cache

- name: Download test fixture cache
if: inputs.download-test-fixture-cache == 'true'
shell: bash
run: make download-test-fixture-cache
11 changes: 0 additions & 11 deletions .github/scripts/ci-check.sh

This file was deleted.

135 changes: 135 additions & 0 deletions .github/scripts/find_cache_paths.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#!/usr/bin/env python3
from __future__ import annotations

import os
import glob
import sys
import json
import hashlib


IGNORED_PREFIXES = []


def find_fingerprints_and_check_dirs(base_dir):
all_fingerprints = set(glob.glob(os.path.join(base_dir, '**', 'test*', '**', '*.fingerprint'), recursive=True))

all_fingerprints = {os.path.relpath(fp) for fp in all_fingerprints
if not any(fp.startswith(prefix) for prefix in IGNORED_PREFIXES)}

if not all_fingerprints:
show("No .fingerprint files or cache directories found.")
exit(1)

missing_content = []
valid_paths = set()
fingerprint_contents = []

for fingerprint in all_fingerprints:
path = fingerprint.replace('.fingerprint', '')

if not os.path.exists(path):
missing_content.append(path)
continue

if not os.path.isdir(path):
valid_paths.add(path)
continue

if os.listdir(path):
valid_paths.add(path)
else:
missing_content.append(path)

with open(fingerprint, 'r') as f:
content = f.read().strip()
fingerprint_contents.append((fingerprint, content))

return sorted(valid_paths), missing_content, fingerprint_contents


def parse_fingerprint_contents(fingerprint_content):
input_map = {}
for line in fingerprint_content.splitlines():
digest, path = line.split()
input_map[path] = digest
return input_map


def calculate_sha256(fingerprint_contents):
sorted_fingerprint_contents = sorted(fingerprint_contents, key=lambda x: x[0])

concatenated_contents = ''.join(content for _, content in sorted_fingerprint_contents)

sha256_hash = hashlib.sha256(concatenated_contents.encode()).hexdigest()

return sha256_hash


def calculate_file_sha256(file_path):
sha256_hash = hashlib.sha256()
with open(file_path, 'rb') as f:
for byte_block in iter(lambda: f.read(4096), b""):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()


def show(*s: str):
print(*s, file=sys.stderr)


def main(file_path: str | None):
base_dir = '.'
valid_paths, missing_content, fingerprint_contents = find_fingerprints_and_check_dirs(base_dir)

if missing_content:
show("The following paths are missing or have no content, but have corresponding .fingerprint files:")
for path in sorted(missing_content):
show(f"- {path}")
show("Please ensure these paths exist and have content if they are directories.")
exit(1)

sha256_hash = calculate_sha256(fingerprint_contents)

paths_with_digests = []
for path in sorted(valid_paths):
fingerprint_file = f"{path}.fingerprint"
try:
if os.path.exists(fingerprint_file):
file_digest = calculate_file_sha256(fingerprint_file)

# Parse the fingerprint file to get the digest/path tuples
with open(fingerprint_file, 'r') as f:
fingerprint_content = f.read().strip()
input_map = parse_fingerprint_contents(fingerprint_content)

paths_with_digests.append({
"path": path,
"digest": file_digest,
"input": input_map
})

except Exception as e:
show(f"Error processing {fingerprint_file}: {e}")
raise e


output = {
"digest": sha256_hash,
"paths": paths_with_digests
}

content = json.dumps(output, indent=2, sort_keys=True)

if file_path:
with open(file_path, 'w') as f:
f.write(content)

print(content)


if __name__ == "__main__":
file_path = None
if len(sys.argv) > 1:
file_path = sys.argv[1]
main(file_path)
70 changes: 70 additions & 0 deletions .github/scripts/fingerprint_docker_fixtures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/usr/bin/env python3

import os
import subprocess
import hashlib

BOLD = '\033[1m'
YELLOW = '\033[0;33m'
RESET = '\033[0m'


def print_message(message):
print(f"{YELLOW}{message}{RESET}")


def sha256sum(filepath):
h = hashlib.sha256()
with open(filepath, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b""):
h.update(chunk)
return h.hexdigest()


def is_git_tracked_or_untracked(directory):
"""Returns a sorted list of files in the directory that are tracked or not ignored by Git."""
result = subprocess.run(
["git", "ls-files", "--cached", "--others", "--exclude-standard"],
cwd=directory,
stdout=subprocess.PIPE,
text=True
)
return sorted(result.stdout.strip().splitlines())


def find_test_fixture_dirs_with_images(base_dir):
"""Find directories that contain 'test-fixtures' and at least one 'image-*' directory."""
for root, dirs, files in os.walk(base_dir):
if 'test-fixtures' in root:
image_dirs = [d for d in dirs if d.startswith('image-')]
if image_dirs:
yield os.path.realpath(root)


def generate_fingerprints():
print_message("creating fingerprint files for docker fixtures...")

for test_fixture_dir in find_test_fixture_dirs_with_images('.'):
cache_fingerprint_path = os.path.join(test_fixture_dir, 'cache.fingerprint')

with open(cache_fingerprint_path, 'w') as fingerprint_file:
for image_dir in find_image_dirs(test_fixture_dir):
for file in is_git_tracked_or_untracked(image_dir):
file_path = os.path.join(image_dir, file)
checksum = sha256sum(file_path)
path_from_fixture_dir = os.path.relpath(file_path, test_fixture_dir)
fingerprint_file.write(f"{checksum} {path_from_fixture_dir}\n")


def find_image_dirs(test_fixture_dir):
"""Find all 'image-*' directories inside a given test-fixture directory."""
result = []
for root, dirs, files in os.walk(test_fixture_dir):
for dir_name in dirs:
if dir_name.startswith('image-'):
result.append(os.path.join(root, dir_name))
return sorted(result)


if __name__ == "__main__":
generate_fingerprints()
2 changes: 2 additions & 0 deletions .github/scripts/labeler.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/env python3

from __future__ import annotations

import sys
Expand Down
2 changes: 2 additions & 0 deletions .github/scripts/labeler_test.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/env python3

import unittest
from unittest.mock import patch
import subprocess
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release-version-file.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: "Release"
name: "Release: version file"

on:

Expand Down
39 changes: 39 additions & 0 deletions .github/workflows/test-fixture-cache-publish.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: "Test fixture cache: publish"

on:
workflow_dispatch:
schedule:
# run nightly at 4AM UTC
- cron: "0 4 * * *"

permissions:
contents: read

jobs:

Publish:
name: "Publish test fixture image cache"
# we use this runner to get enough storage space for docker images and fixture cache
runs-on: ubuntu-22.04-4core-16gb
permissions:
packages: write
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 #v4.1.7

- name: Bootstrap environment
uses: ./.github/actions/bootstrap
with:
# we want to rebuild the cache with no previous state
download-test-fixture-cache: false

- name: Run all tests
run: make test
env:
# we want to rebuild the cache with no previous state
DOWNLOAD_TEST_FIXTURE_CACHE: "false"

- name: Login to GitHub Container Registry (ORAS)
run: echo "${{ secrets.GITHUB_TOKEN }}" | .tool/oras login ghcr.io -u ${{ github.actor }} --password-stdin

- name: Publish test fixture cache
run: make upload-test-fixture-cache
1 change: 0 additions & 1 deletion .github/workflows/update-bootstrap-tools.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ jobs:
uses: ./.github/actions/bootstrap
with:
bootstrap-apt-packages: ""
compute-fingerprints: "false"
go-dependencies: false

- name: "Update tool versions"
Expand Down
Loading
Loading