Skip to content

Pin nvidia-driver and cuda packages to working packages (#496) #9

Pin nvidia-driver and cuda packages to working packages (#496)

Pin nvidia-driver and cuda packages to working packages (#496) #9

Workflow file for this run

name: Test extra build
on:
workflow_dispatch:
push:
branches:
- main
paths:
- 'environments/.stackhpc/terraform/cluster_image.auto.tfvars.json'
- 'ansible/roles/doca/**'
- 'ansible/roles/cuda/**'
- '.github/workflows/extra.yml'
pull_request:
paths:
- 'environments/.stackhpc/terraform/cluster_image.auto.tfvars.json'
- 'ansible/roles/doca/**'
- 'ansible/roles/cuda/**'
- '.github/workflows/extra.yml'
jobs:
doca:
name: extra-build
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.build.image_name }} # to branch/PR + OS
cancel-in-progress: true
runs-on: ubuntu-22.04
strategy:
fail-fast: false # allow other matrix jobs to continue even if one fails
matrix: # build RL8, RL9
build:
- image_name: openhpc-extra-RL8
source_image_name_key: RL8 # key into environments/.stackhpc/terraform/cluster_image.auto.tfvars.json
inventory_groups: doca,cuda
volume_size: 30 # needed for cuda
- image_name: openhpc-extra-RL9
source_image_name_key: RL9
inventory_groups: doca,cuda
volume_size: 30 # needed for cuda
env:
ANSIBLE_FORCE_COLOR: True
OS_CLOUD: openstack
CI_CLOUD: ${{ vars.CI_CLOUD }} # default from repo settings
ARK_PASSWORD: ${{ secrets.ARK_PASSWORD }}
steps:
- uses: actions/checkout@v2
- name: Load current fat images into GITHUB_ENV
# see https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#example-of-a-multiline-string
run: |
{
echo 'FAT_IMAGES<<EOF'
cat environments/.stackhpc/terraform/cluster_image.auto.tfvars.json
echo EOF
} >> "$GITHUB_ENV"
- name: Record settings
run: |
echo CI_CLOUD: ${{ env.CI_CLOUD }}
echo FAT_IMAGES: ${FAT_IMAGES}
- name: Setup ssh
run: |
set -x
mkdir ~/.ssh
echo "${{ secrets[format('{0}_SSH_KEY', env.CI_CLOUD)] }}" > ~/.ssh/id_rsa
chmod 0600 ~/.ssh/id_rsa
shell: bash
- name: Add bastion's ssh key to known_hosts
run: cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts
shell: bash
- name: Install ansible etc
run: dev/setup-env.sh
- name: Write clouds.yaml
run: |
mkdir -p ~/.config/openstack/
echo "${{ secrets[format('{0}_CLOUDS_YAML', env.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml
shell: bash
- name: Setup environment
run: |
. venv/bin/activate
. environments/.stackhpc/activate
- name: Build fat image with packer
id: packer_build
run: |
set -x
. venv/bin/activate
. environments/.stackhpc/activate
cd packer/
packer init .
PACKER_LOG=1 packer build \
-on-error=${{ vars.PACKER_ON_ERROR }} \
-var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \
-var "source_image_name=${{ fromJSON(env.FAT_IMAGES)['cluster_image'][matrix.build.source_image_name_key] }}" \
-var "image_name=${{ matrix.build.image_name }}" \
-var "inventory_groups=${{ matrix.build.inventory_groups }}" \
-var "volume_size=${{ matrix.build.volume_size }}" \
openstack.pkr.hcl
- name: Get created image names from manifest
id: manifest
run: |
. venv/bin/activate
IMAGE_ID=$(jq --raw-output '.builds[-1].artifact_id' packer/packer-manifest.json)
while ! openstack image show -f value -c name $IMAGE_ID; do
sleep 5
done
IMAGE_NAME=$(openstack image show -f value -c name $IMAGE_ID)
echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT"
echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT"
echo $IMAGE_ID > image-id.txt
echo $IMAGE_NAME > image-name.txt
- name: Make image usable for further builds
run: |
. venv/bin/activate
openstack image unset --property signature_verified "${{ steps.manifest.outputs.image-id }}"
- name: Delete image for automatically-run workflows
run: |
. venv/bin/activate
openstack image delete "${{ steps.manifest.outputs.image-id }}"
if: ${{ github.event_name != 'workflow_dispatch' }}
- name: Upload manifest artifact
uses: actions/upload-artifact@v4
with:
name: image-details-${{ matrix.build.image_name }}
path: |
./image-id.txt
./image-name.txt
overwrite: true