From eefc00e59f688bfed98dd81abbb4cede6076f151 Mon Sep 17 00:00:00 2001 From: ludamad Date: Wed, 17 Apr 2024 13:44:17 +0000 Subject: [PATCH 1/5] chore: workaround earthly flake --- .github/workflows/ci.yml | 33 ++++++++++++++++++++++++++------- scripts/earthly-ci | 36 ++++++++++++++++++++++++++++++++++++ scripts/setup_env.sh | 2 +- 3 files changed, 63 insertions(+), 8 deletions(-) create mode 100755 scripts/earthly-ci diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 72b97bb2e1d..bf3d37588c8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: ebs_cache_size_gb: 256 runner_concurrency: 50 subaction: ${{ github.event.inputs.runner_action || 'start' }} - ec2_instance_type: m6a.32xlarge + ec2_instance_type: m6a.48xlarge ec2_ami_id: ami-04d8422a9ba4de80f ec2_instance_ttl: 40 # refreshed by jobs secrets: inherit @@ -44,8 +44,9 @@ jobs: # prepare images locally, tagged by commit hash - name: "Build E2E Image" timeout-minutes: 40 - run: earthly ./yarn-project+export-end-to-end + run: earthly-ci ./yarn-project+export-end-to-end # We base our e2e list used in e2e-x86 off the targets in ./yarn-project/end-to-end + # (Note ARM uses just 2 tests as a smoketest) - name: Create list of end-to-end jobs id: e2e_list run: echo "list=$(earthly ls ./yarn-project/end-to-end | grep -v '+base' | sed 's/+//' | jq -R . | jq -cs .)" >> $GITHUB_OUTPUT @@ -68,7 +69,7 @@ jobs: - name: Test working-directory: ./yarn-project/end-to-end/ timeout-minutes: 25 - run: earthly -P --no-output +${{ matrix.test }} --e2e_mode=cache + run: earthly-ci -P --no-output +${{ matrix.test }} --e2e_mode=cache # TODO # - name: Upload logs # run: BRANCH=${{ github.ref_name }} PULL_REQUEST=${{ github.event.number }} scripts/ci/upload_logs_to_s3 ./yarn-project/end-to-end/log @@ -92,7 +93,7 @@ jobs: working-directory: ./barretenberg/cpp/ timeout-minutes: 25 # limit our parallelism to half our cores - run: earthly --no-output +test --hardware_concurrency=64 + run: earthly-ci --no-output +test --hardware_concurrency=64 # push benchmarking binaries to dockerhub registry bb-bench-binaries: @@ -108,7 +109,7 @@ jobs: if: ${{ github.event.inputs.just_start_spot != 'true' }} timeout-minutes: 15 working-directory: ./barretenberg/cpp/ - run: earthly --push +bench-binaries + run: earthly-ci --push +bench-binaries setup-bench: uses: ./.github/workflows/setup-runner.yml @@ -136,12 +137,30 @@ jobs: - name: Client IVC Bench working-directory: ./barretenberg/cpp/ timeout-minutes: 15 - run: earthly --no-output +bench-client-ivc --bench_mode=cache + run: earthly-ci --no-output +bench-client-ivc --bench_mode=cache - name: Ultrahonk Bench working-directory: ./barretenberg/cpp/ timeout-minutes: 15 - run: earthly --no-output +bench-ultra-honk --bench_mode=cache + run: earthly-ci --artifact +bench/bench.json --bench_mode=cache + + # Utilize github-action-benchmark to automatically update the plots at + # https://aztecprotocol.github.io/aztec-packages/dev/bench/ with new benchmark data. + # This also creates an alert if benchmarks exceed the threshold specified below. + - name: Store benchmark result + uses: benchmark-action/github-action-benchmark@v1 + with: + name: C++ Benchmark + tool: 'googlecpp' + output-file-path: barretenberg/cpp/bench.json + github-token: ${{ secrets.GITHUB_TOKEN }} + # Push and deploy GitHub pages branch on master only + auto-push: ${{ github.ref_name == 'master' }} + # Alert if a bench result is 1.05x worse + alert-threshold: '105%' + comment-on-alert: true + fail-on-alert: false + alert-comment-cc-users: '@ludamad @codygunton' merge-check: runs-on: ${{ github.actor }}-x86 diff --git a/scripts/earthly-ci b/scripts/earthly-ci new file mode 100755 index 00000000000..07be9b64979 --- /dev/null +++ b/scripts/earthly-ci @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# A wrapper for Earthly that is meant to caught signs of known intermittent failures and continue. +# The silver lining is if Earthly does crash, the cache can pick up the buil. +set -eu -o pipefail + +# Flag to determine if -i is present +INTERACTIVE=false +# Check for -i flag in the arguments +for arg in "$@"; do + if [ "$arg" == "-i" ] || [ "$arg" == "--interactive" ]; then + INTERACTIVE=true + break + fi +done + +OUTPUT_FILE=$(mktemp) +# capture output to handle earthly edge cases +if $INTERACTIVE ; then + # don't play nice with tee if interactive + earthly $@ +elif ! earthly $@ 2>&1 | tee $OUTPUT_FILE >&2 ; then + # we try earthly once, capturing output + # if we get one of our (unfortunate) known failures, handle retries + # TODO potentially handle other intermittent errors here + if grep 'failed to get edge: inconsistent graph state' $OUTPUT_FILE >/dev/null ; then + # TODO when earthly is overloaded we sometimes get + # 'failed to solve: failed to get edge: inconsistent graph state' + echo "Got 'inconsistent graph state'. Restarting earthly. See https://github.com/earthly/earthly/issues/2454'" + earthly $@ + # TODO handle + # could not configure satellite: failed getting org: unable to authenticate: failed to execute login request: Post + else + # otherwise, propagate error + exit 1 + fi +fi diff --git a/scripts/setup_env.sh b/scripts/setup_env.sh index 97fb8fd68a1..70ecf9d7654 100755 --- a/scripts/setup_env.sh +++ b/scripts/setup_env.sh @@ -8,6 +8,6 @@ echo FORCE_COLOR=1 >> $GITHUB_ENV echo "Logging in to Docker..." echo $1 | docker login -u aztecprotocolci --password-stdin -# Make earthly-cloud and earthly-cloud-bench scripts available +# Make earthly-ci script available echo "PATH=$(dirname $(realpath $0)):$PATH" >> $GITHUB_ENV echo "EARTHLY_CONFIG=$(git rev-parse --show-toplevel)/.github/earthly-ci-config.yml" >> $GITHUB_ENV \ No newline at end of file From a0f9c1b534f9576375ecbd06666a7fad7781fc3e Mon Sep 17 00:00:00 2001 From: ludamad Date: Wed, 17 Apr 2024 09:44:55 -0400 Subject: [PATCH 2/5] Update ci.yml --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bf3d37588c8..9730ff5f908 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: ebs_cache_size_gb: 256 runner_concurrency: 50 subaction: ${{ github.event.inputs.runner_action || 'start' }} - ec2_instance_type: m6a.48xlarge + ec2_instance_type: m6a.32xlarge ec2_ami_id: ami-04d8422a9ba4de80f ec2_instance_ttl: 40 # refreshed by jobs secrets: inherit From 4bbb2407cfd9bcb76690c1c3842bb503e67cf819 Mon Sep 17 00:00:00 2001 From: ludamad Date: Wed, 17 Apr 2024 09:45:31 -0400 Subject: [PATCH 3/5] Update ci.yml --- .github/workflows/ci.yml | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9730ff5f908..44a736a22c2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -142,25 +142,7 @@ jobs: - name: Ultrahonk Bench working-directory: ./barretenberg/cpp/ timeout-minutes: 15 - run: earthly-ci --artifact +bench/bench.json --bench_mode=cache - - # Utilize github-action-benchmark to automatically update the plots at - # https://aztecprotocol.github.io/aztec-packages/dev/bench/ with new benchmark data. - # This also creates an alert if benchmarks exceed the threshold specified below. - - name: Store benchmark result - uses: benchmark-action/github-action-benchmark@v1 - with: - name: C++ Benchmark - tool: 'googlecpp' - output-file-path: barretenberg/cpp/bench.json - github-token: ${{ secrets.GITHUB_TOKEN }} - # Push and deploy GitHub pages branch on master only - auto-push: ${{ github.ref_name == 'master' }} - # Alert if a bench result is 1.05x worse - alert-threshold: '105%' - comment-on-alert: true - fail-on-alert: false - alert-comment-cc-users: '@ludamad @codygunton' + run: earthly-ci --no-output +bench-ultra-honk --bench_mode=cache merge-check: runs-on: ${{ github.actor }}-x86 From 6088449caceab747f6f2d49cbd16f398603d285a Mon Sep 17 00:00:00 2001 From: ludamad Date: Wed, 17 Apr 2024 13:45:58 +0000 Subject: [PATCH 4/5] cleanup --- scripts/earthly-cloud | 84 ------------------------------------------- 1 file changed, 84 deletions(-) delete mode 100755 scripts/earthly-cloud diff --git a/scripts/earthly-cloud b/scripts/earthly-cloud deleted file mode 100755 index d2a0e39bc2c..00000000000 --- a/scripts/earthly-cloud +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env bash - -# This script uses Earthly cloud satellites based on a runner type and hash of the GITHUB_ACTOR environment variable. -# ARM or x86 can be specified. -# Usage: earthly-cloud [options] -# Arguments: -# runner type: The type of runner, e.g., 'build' or 'bench'. -# architecture: The target architecture, e.g., 'arm' or 'x86'. -set -eu -o pipefail - -# Check if at least two arguments are passed -if [ $# -lt 2 ]; then - echo "Error: Insufficient arguments provided." - echo "Usage: $0 [options]" - exit 1 -fi - -RUNNER_TYPE=$1 -ARCH=$2 -shift 2 - -if [ "$ARCH" == "arm" ]; then - PLATFORM=linux/arm64 -elif [ "$ARCH" == "x86" ]; then - PLATFORM=linux/amd64 -fi - -# default sizes for build type -if [ "$RUNNER_TYPE" == "build" ] ; then - SIZE=4xlarge - NUMBER_OF_RUNNERS=2 - if [ "$ARCH" = arm ] ; then - NUMBER_OF_RUNNERS=1 - fi - # TODO why cant we set this?? - # MAX_PARALLELISM=8 -elif [ "$RUNNER_TYPE" == "bench" ] ; then - SIZE=2xlarge - NUMBER_OF_RUNNERS=1 - # MAX_PARALLELISM=1 -elif [ "$RUNNER_TYPE" == "test" ] ; then - SIZE=4xlarge - NUMBER_OF_RUNNERS=1 -fi - -# Flag to determine if -i is present -INTERACTIVE=false -# Check for -i flag in the arguments -for arg in "$@"; do - if [ "$arg" == "-i" ] || [ "$arg" == "--interactive" ]; then - INTERACTIVE=true - break - fi -done - -# we hash our GITHUB_ACTOR to pick from 1 to NUMBER_RUNNERS (inclusive) as RUNNER_ID -# this means everyone gets assigned to runners based on their user group -NAME_HASH=$(cksum <<< "$GITHUB_ACTOR" | cut -f 1 -d ' ') -RUNNER_ID=$(($NAME_HASH % $NUMBER_OF_RUNNERS + 1)) -RUNNER=$RUNNER_TYPE-$RUNNER_ID-$ARCH -earthly sat --org aztec launch --size $SIZE --platform $PLATFORM $RUNNER || true -# --remote-cache=aztecprotocol/cache:bb-native-tests -EARTHLY_FLAGS="-P --no-output --org aztec --sat $RUNNER" -OUTPUT_FILE=$(mktemp) -# capture output to handle earthly edge cases -if $INTERACTIVE ; then - # don't play nice with tee if interactive - earthly $EARTHLY_FLAGS $@ -elif ! earthly $EARTHLY_FLAGS $@ 2>&1 | tee $OUTPUT_FILE >&2 ; then - # we try earthly once, capturing output - # if we get one of our (unfortunate) known failures, handle retries - # TODO potentially handle other intermittent errors here - if grep 'failed to get edge: inconsistent graph state' $OUTPUT_FILE >/dev/null ; then - # TODO when earthly is overloaded we sometimes get - # 'failed to solve: failed to get edge: inconsistent graph state' - echo "Got 'inconsistent graph state'. Restarting earthly. See https://github.com/earthly/earthly/issues/2454'" - earthly $EARTHLY_FLAGS $@ - # TODO handle - # could not configure satellite: failed getting org: unable to authenticate: failed to execute login request: Post - else - # otherwise, propagate error - exit 1 - fi -fi From 0641d0796506b87bed9b2349e2c2205441b5e9b5 Mon Sep 17 00:00:00 2001 From: ludamad Date: Wed, 17 Apr 2024 10:07:08 -0400 Subject: [PATCH 5/5] Update earthly-ci --- scripts/earthly-ci | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/earthly-ci b/scripts/earthly-ci index 07be9b64979..43eeb9b17aa 100755 --- a/scripts/earthly-ci +++ b/scripts/earthly-ci @@ -1,6 +1,6 @@ #!/usr/bin/env bash # A wrapper for Earthly that is meant to caught signs of known intermittent failures and continue. -# The silver lining is if Earthly does crash, the cache can pick up the buil. +# The silver lining is if Earthly does crash, the cache can pick up the build. set -eu -o pipefail # Flag to determine if -i is present