From 7685a021247acf05ac38af981b1d03bc8d30d9fb Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Fri, 22 Dec 2023 13:57:29 -0600 Subject: [PATCH] Clean up nightly build/deploy w/o nightly branch update. (#3188) * Fix syntax error in build-deploy-pudl workflow. * Rename GITHUB_REF to BUILD_REF * set the nightly tag, so we have something to merge * Allow write permissions for git tagging in nightly builds workflow * Functionalize udpate_nightly_branch() * Temporarily disable nightly branch updates to fix nightly builds. --- .github/workflows/build-deploy-pudl.yml | 29 ++++++++------- docker/gcp_pudl_etl.sh | 48 +++++++++++++------------ 2 files changed, 39 insertions(+), 38 deletions(-) diff --git a/.github/workflows/build-deploy-pudl.yml b/.github/workflows/build-deploy-pudl.yml index e5196392cc..57ea623ddf 100644 --- a/.github/workflows/build-deploy-pudl.yml +++ b/.github/workflows/build-deploy-pudl.yml @@ -9,7 +9,7 @@ on: env: GCP_BILLING_PROJECT: ${{ secrets.GCP_BILLING_PROJECT }} - GITHUB_REF: ${{ github.ref_name }} # This is changed to dev if running on a schedule + BUILD_REF: ${{ github.ref_name }} # This is changed to dev if running on a schedule GCE_INSTANCE: pudl-deployment-tag # This is changed to pudl-deployment-dev if running on a schedule GCE_INSTANCE_ZONE: ${{ secrets.GCE_INSTANCE_ZONE }} GCS_OUTPUT_BUCKET: gs://nightly-build-outputs.catalyst.coop @@ -19,7 +19,7 @@ jobs: name: Build Docker image, push to Docker Hub and deploy to a GCE VM runs-on: ubuntu-latest permissions: - contents: read + contents: write id-token: write steps: - name: Use pudl-deployment-dev vm and dev branch if running on a schedule @@ -27,24 +27,23 @@ jobs: run: | echo "This action was triggered by a schedule." echo "GCE_INSTANCE=pudl-deployment-dev" >> $GITHUB_ENV - echo "GCE_INSTANCE: $GCE_INSTANCE" - echo "GITHUB_REF=dev" >> $GITHUB_ENV - echo "GITHUB_REF: $GITHUB_REF" + echo "BUILD_REF=dev" >> $GITHUB_ENV - name: Checkout Repository uses: actions/checkout@v4 with: - ref: ${{ env.GITHUB_REF }} + ref: ${{ env.BUILD_REF }} - name: Set action environment variables run: | echo "NIGHTLY_TAG=nightly-$(date +%Y-%m-%d)" >> $GITHUB_ENV + echo "BUILD_ID=$(date +%Y-%m-%d-%H%M)-$(git rev-parse --short HEAD)-${BUILD_REF}" >> $GITHUB_ENV + + - name: Show freshly set envvars + run: | + echo "GCE_INSTANCE: $GCE_INSTANCE" + echo "BUILD_REF: $BUILD_REF" echo "NIGHTLY_TAG: $NIGHTLY_TAG" - echo "SHORT_SHA=$(git rev-parse --short HEAD)" >> $GITHUB_ENV - echo "SHORT_SHA: $SHORT_SHA" - echo "BUILD_TIMESTAMP=$(date +%Y-%m-%d-%H%M)" >> $GITHUB_ENV - echo "BUILD_TIMESTAMP: $BUILD_TIMESTAMP" - echo "BUILD_ID=${BUILD_TIMESTAMP}-${SHORT_SHA}-${GITHUB_REF} echo "BUILD_ID: $BUILD_ID" - name: Tag nightly build @@ -52,7 +51,7 @@ jobs: run: | git config user.email "pudl@catalyst.coop" git config user.name "pudlbot" - git tag -a -m "$NIGHTLY_TAG" $NIGHTLY_TAG $GITHUB_REF + git tag -a -m "$NIGHTLY_TAG" $NIGHTLY_TAG $BUILD_REF git push origin $NIGHTLY_TAG - name: Docker Metadata @@ -63,7 +62,7 @@ jobs: flavor: | latest=auto tags: | - type=raw,value=${{ env.GITHUB_REF }} + type=raw,value=${{ env.BUILD_REF}} type=ref,event=tag - name: Set up Docker Buildx @@ -108,7 +107,7 @@ jobs: --metadata-from-file startup-script=./docker/vm_startup_script.sh gcloud compute instances update-container "$GCE_INSTANCE" \ --zone "$GCE_INSTANCE_ZONE" \ - --container-image "docker.io/catalystcoop/pudl-etl:${{ env.GITHUB_REF }}" \ + --container-image "docker.io/catalystcoop/pudl-etl:${{ env.BUILD_REF}}" \ --container-command "micromamba" \ --container-arg="run" \ --container-arg="--prefix" \ @@ -118,7 +117,7 @@ jobs: --container-arg="bash" \ --container-arg="./docker/gcp_pudl_etl.sh" \ --container-env-file="./docker/.env" \ - --container-env GITHUB_REF=${{ env.GITHUB_REF }} \ + --container-env BUILD_REF=${{ env.BUILD_REF}} \ --container-env BUILD_ID=${{ env.BUILD_ID }} \ --container-env NIGHTLY_TAG=${{ env.NIGHTLY_TAG }} \ --container-env GITHUB_ACTION_TRIGGER=${{ github.event_name }} \ diff --git a/docker/gcp_pudl_etl.sh b/docker/gcp_pudl_etl.sh index 1f0d0c77ac..5b6553abef 100644 --- a/docker/gcp_pudl_etl.sh +++ b/docker/gcp_pudl_etl.sh @@ -64,17 +64,17 @@ function copy_outputs_to_gcs() { } function copy_outputs_to_distribution_bucket() { - # Only attempt to update outputs if we have a real value of GITHUB_REF - if [ -n "$GITHUB_REF" ]; then - echo "Removing old $GITHUB_REF outputs from GCP distributon bucket." - gsutil -m -u "$GCP_BILLING_PROJECT" rm -r "gs://pudl.catalyst.coop/$GITHUB_REF" + # Only attempt to update outputs if we have a real value of BUILD_REF + if [ -n "$BUILD_REF" ]; then + echo "Removing old $BUILD_REF outputs from GCP distributon bucket." + gsutil -m -u "$GCP_BILLING_PROJECT" rm -r "gs://pudl.catalyst.coop/$BUILD_REF" echo "Copying outputs to GCP distribution bucket" - gsutil -m -u "$GCP_BILLING_PROJECT" cp -r "$PUDL_OUTPUT/*" "gs://pudl.catalyst.coop/$GITHUB_REF" + gsutil -m -u "$GCP_BILLING_PROJECT" cp -r "$PUDL_OUTPUT/*" "gs://pudl.catalyst.coop/$BUILD_REF" - echo "Removing old $GITHUB_REF outputs from AWS distributon bucket." - aws s3 rm "s3://pudl.catalyst.coop/$GITHUB_REF" --recursive + echo "Removing old $BUILD_REF outputs from AWS distributon bucket." + aws s3 rm "s3://pudl.catalyst.coop/$BUILD_REF" --recursive echo "Copying outputs to AWS distribution bucket" - aws s3 cp "$PUDL_OUTPUT/" "s3://pudl.catalyst.coop/$GITHUB_REF" --recursive + aws s3 cp "$PUDL_OUTPUT/" "s3://pudl.catalyst.coop/$BUILD_REF" --recursive fi } @@ -87,7 +87,7 @@ function notify_slack() { # Notify pudl-builds slack channel of deployment status if [ "$1" = "success" ]; then message=":large_green_circle: :sunglasses: :unicorn_face: :rainbow: The deployment succeeded!! :partygritty: :database_parrot: :blob-dance: :large_green_circle:\n\n " - message+="\n\n" + message+="\n\n" elif [ "$1" = "failure" ]; then message=":large_red_square: Oh bummer the deployment failed ::fiiiiine: :sob: :cry_spin:\n\n " else @@ -99,29 +99,30 @@ function notify_slack() { send_slack_msg "$message" } +function update_nightly_branch() { + git config --unset http.https://github.com/.extraheader + git config user.email "pudl@catalyst.coop" + git config user.name "pudlbot" + git remote set-url origin "https://pudlbot:$PUDL_BOT_PAT@github.com/catalyst-cooperative/pudl.git" + echo "BOGUS: Updating nightly branch to point at $NIGHTLY_TAG." + git fetch origin nightly:nightly + git checkout nightly + git merge --ff-only "$NIGHTLY_TAG" + ETL_SUCCESS=${PIPESTATUS[0]} + git push -u origin +} + # # Run ETL. Copy outputs to GCS and shutdown VM if ETL succeeds or fails # 2>&1 redirects stderr to stdout. run_pudl_etl 2>&1 | tee "$LOGFILE" - ETL_SUCCESS=${PIPESTATUS[0]} copy_outputs_to_gcs # if pipeline is successful, distribute + publish datasette if [[ $ETL_SUCCESS == 0 ]]; then - if [ "$GITHUB_ACTION_TRIGGER" = "schedule" ]; then - # Remove read-only authentication header added by git checkout - git config --unset http.https://github.com/.extraheader - git config user.email "pudl@catalyst.coop" - git config user.name "pudlbot" - git remote set-url origin "https://pudlbot:$PUDL_BOT_PAT@github.com/catalyst-cooperative/pudl.git" - # Update the nightly branch to point at newly successful nightly build tag - git checkout nightly - git merge --ff-only "$NIGHTLY_TAG" - git push - fi # Deploy the updated data to datasette - if [ "$GITHUB_REF" = "dev" ]; then + if [ "$BUILD_REF" = "dev" ]; then python ~/devtools/datasette/publish.py 2>&1 | tee -a "$LOGFILE" ETL_SUCCESS=${PIPESTATUS[0]} fi @@ -137,9 +138,10 @@ if [[ $ETL_SUCCESS == 0 ]]; then # TODO: this behavior should be controlled by on/off switch here and this logic # should be moved to the triggering github action. Having it here feels # fragmented. - if [ "$GITHUB_ACTION_TRIGGER" = "push" ] || [ "$GITHUB_REF" = "dev" ]; then + if [ "$GITHUB_ACTION_TRIGGER" = "push" ] || [ "$BUILD_REF" = "dev" ]; then copy_outputs_to_distribution_bucket ETL_SUCCESS=${PIPESTATUS[0]} + # TEMPORARY: this currently just makes a sandbox release, for testing: zenodo_data_release 2>&1 | tee -a "$LOGFILE" ETL_SUCCESS=${PIPESTATUS[0]} fi