From c702688435d66436d0ee0d66c32aba99392d719d Mon Sep 17 00:00:00 2001 From: ludamad Date: Tue, 30 Apr 2024 18:18:51 -0500 Subject: [PATCH] chore: ebs attach robustness (#6108) --- .github/workflows/setup-runner.yml | 4 ++-- scripts/attach_ebs_cache.sh | 14 +++++++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.github/workflows/setup-runner.yml b/.github/workflows/setup-runner.yml index a29ccc6d5e5..dff8f10cff3 100644 --- a/.github/workflows/setup-runner.yml +++ b/.github/workflows/setup-runner.yml @@ -78,11 +78,11 @@ jobs: run: | # Compare the checked-out CI configuration files with the reference files if ! git diff --no-index .github/workflows/ci.yml merge-commit-pipeline-files/.github/workflows/ci.yml; then - echo "Error: ci.yml changes in master (or PR base). Please merge these changes." + echo "Error: ci.yml changes in master (or PR base). Please merge these changes. This is to prevent surprises from Github Action's merge behavior." exit 1 fi if ! git diff --no-index .github/workflows/setup-runner.yml merge-commit-pipeline-files/.github/workflows/setup-runner.yml; then - echo "Error: setup-runner.yml changes in master (or PR base). Please merge these changes." + echo "Error: setup-runner.yml changes in master (or PR base). Please merge these changes. This is to prevent surprises from Github Action's merge behavior." exit 1 fi diff --git a/scripts/attach_ebs_cache.sh b/scripts/attach_ebs_cache.sh index 94d5838ca39..76b348fe6af 100755 --- a/scripts/attach_ebs_cache.sh +++ b/scripts/attach_ebs_cache.sh @@ -14,8 +14,20 @@ if [ -f /run/.ebs-cache-mounted ] ; then WAIT_INTERVAL=10 # Interval between checks in seconds elapsed_time=0 # Check for existing mount, assume we can continue if existing + while ! mount | grep -q "/var/lib/docker type ext4"; do - echo "Someone already marked as mounting, waiting for them..." + echo "Someone already marked as mounting, terminating any stopped instances and waiting..." + # Identify and terminate instances in 'STOPPED' state that are using this volume + STOPPED_INSTANCES=$(aws ec2 describe-instances \ + --region $REGION \ + --filters "Name=instance-state-name,Values=stopped" "Name=block-device-mapping.volume-id,Values=$VOLUME_ID" \ + --query "Reservations[*].Instances[*].InstanceId" \ + --output text) + + for instance in $STOPPED_INSTANCES; do + echo "Terminating instance $instance" + aws ec2 terminate-instances --instance-ids $instance + done if [ $elapsed_time -ge $MAX_WAIT_TIME ]; then echo "Cache mount did not become available within $MAX_WAIT_TIME seconds... race condition?" exit 1