From 73bbe5c2ba0bbbb1018210c8ee42fd22efc9350c Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Mon, 30 Oct 2023 12:25:47 -0700 Subject: [PATCH 1/4] Remove set-branch-ingest-config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recently learned that Snakemake no longer overwrites the entire config when providing nested config values via the `--config` option.¹ This allows us to remove the set-branch-ingest-config script and directly set branch config values via the command line option in our GitHub Action workflows. I've opted to use `jq` to interpolate the branch name into the S3_DST because I didn't want to fiddle with escaping quotes in the JSON string. ¹ https://github.com/nextstrain/dengue/pull/13#discussion_r1375081279 --- .../workflows/fetch-and-ingest-branch.yaml | 14 ++++----- phylogenetic/bin/set-branch-ingest-config | 29 ------------------- 2 files changed, 5 insertions(+), 38 deletions(-) delete mode 100755 phylogenetic/bin/set-branch-ingest-config diff --git a/.github/workflows/fetch-and-ingest-branch.yaml b/.github/workflows/fetch-and-ingest-branch.yaml index ce834c4f..310895b7 100644 --- a/.github/workflows/fetch-and-ingest-branch.yaml +++ b/.github/workflows/fetch-and-ingest-branch.yaml @@ -21,16 +21,12 @@ jobs: cli-version: ">=7.1.0" python-version: "3.10" - - name: install-pyyaml - run: python3 -m pip install pyyaml - - name: run_pipeline run: | + # Create JSON string for the nested upload config GITHUB_BRANCH=${GITHUB_REF#refs/heads/} - ./phylogenetic/bin/set-branch-ingest-config \ - --config-yaml ingest/config/optional.yaml \ - --s3-dst s3://nextstrain-data/files/workflows/monkeypox/branch/"${GITHUB_BRANCH}" \ - > ingest/config/optional-branch.yaml + S3_DST="s3://nextstrain-data/files/workflows/monkeypox/branch/${GITHUB_BRANCH}" + UPLOAD_CONFIG=$(jq -cn --arg S3_DST "$S3_DST" '{"s3": {"dst": $S3_DST }}') nextstrain build \ --aws-batch \ @@ -43,8 +39,8 @@ jobs: --env AWS_SECRET_ACCESS_KEY \ --env GITHUB_RUN_ID \ ingest \ - --configfiles config/config.yaml config/optional-branch.yaml \ - --config trigger_rebuild=False + --configfiles config/config.yaml config/optional.yaml \ + --config trigger_rebuild=False send_slack_notifications=False upload="$UPLOAD_CONFIG" env: AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} diff --git a/phylogenetic/bin/set-branch-ingest-config b/phylogenetic/bin/set-branch-ingest-config deleted file mode 100755 index d1a2d627..00000000 --- a/phylogenetic/bin/set-branch-ingest-config +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python3 -""" -Edits the config YAML file and outputs the new config to stdout. -""" -import argparse -import yaml - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - parser.add_argument("--config-yaml", default="ingest/config/optional.yaml", - help="Path to config YAML file to edit.") - parser.add_argument("--s3-dst", required=True, - help="The S3 destination to add to the config YAML file") - parser.add_argument("--send-slack-notifications", action="store_true", - help="Set the `send_slack_notifications` config to True") - - args = parser.parse_args() - - with open(args.config_yaml, 'r') as config_fh: - config = yaml.safe_load(config_fh) - - config['upload']['s3']['dst'] = args.s3_dst - config['send_slack_notifications'] = args.send_slack_notifications - - print(yaml.dump(config, default_flow_style=False, sort_keys=False)) From fc32c861f7798d0fc3ab5044b2fb2a7cfe67d22e Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Mon, 30 Oct 2023 13:07:40 -0700 Subject: [PATCH 2/4] fetch-and-ingest-branch: Refactor to pathogen-repo-build workflow Since we've removed the need to run set-branch-ingest-config in the GitHub Action workflow in the previous commit, we can refactor the fetch-and-ingest-branch workflow to use the shared pathogen-repo-build workflow. --- .../workflows/fetch-and-ingest-branch.yaml | 60 ++++++++++--------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/.github/workflows/fetch-and-ingest-branch.yaml b/.github/workflows/fetch-and-ingest-branch.yaml index 310895b7..dcee0126 100644 --- a/.github/workflows/fetch-and-ingest-branch.yaml +++ b/.github/workflows/fetch-and-ingest-branch.yaml @@ -8,41 +8,45 @@ on: description: 'Specific container image to use for build (will override the default of "nextstrain build")' required: false -env: - NEXTSTRAIN_DOCKER_IMAGE: ${{ github.event.inputs.image }} jobs: - fetch-and-ingest: + set_config_overrides: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: nextstrain/.github/actions/setup-nextstrain-cli@master - with: - cli-version: ">=7.1.0" - python-version: "3.10" - - - name: run_pipeline + - id: upload_config run: | # Create JSON string for the nested upload config GITHUB_BRANCH=${GITHUB_REF#refs/heads/} S3_DST="s3://nextstrain-data/files/workflows/monkeypox/branch/${GITHUB_BRANCH}" UPLOAD_CONFIG=$(jq -cn --arg S3_DST "$S3_DST" '{"s3": {"dst": $S3_DST }}') - nextstrain build \ - --aws-batch \ - --detach \ - --no-download \ - --cpus 32 \ - --memory 64gib \ - --env AWS_DEFAULT_REGION \ - --env AWS_ACCESS_KEY_ID \ - --env AWS_SECRET_ACCESS_KEY \ - --env GITHUB_RUN_ID \ - ingest \ - --configfiles config/config.yaml config/optional.yaml \ - --config trigger_rebuild=False send_slack_notifications=False upload="$UPLOAD_CONFIG" - env: - AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - GITHUB_RUN_ID: ${{ github.run_id }} + echo "upload_config=$UPLOAD_CONFIG" >> "$GITHUB_OUTPUT" + outputs: + upload_config: ${{ steps.upload_config.outputs.upload_config }} + + fetch-and-ingest: + needs: [set_config_overrides] + permissions: + id-token: write + uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master + secrets: inherit + with: + runtime: aws-batch + env: | + NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.image }} + GITHUB_RUN_ID: ${{ github.run_id }} + UPLOAD_CONFIG: ${{ needs.set_config_overrides.outputs.upload_config }} + run: | + nextstrain build \ + --aws-batch \ + --detach \ + --no-download \ + --cpus 32 \ + --memory 64gib \ + --env AWS_DEFAULT_REGION \ + --env AWS_ACCESS_KEY_ID \ + --env AWS_SECRET_ACCESS_KEY \ + --env GITHUB_RUN_ID \ + ingest \ + --configfiles config/config.yaml config/optional.yaml \ + --config trigger_rebuild=False send_slack_notifications=False upload="$UPLOAD_CONFIG" From a20d021ad123bd7061c5eace554fd0fdf7b4160a Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Mon, 30 Oct 2023 13:45:34 -0700 Subject: [PATCH 3/4] Remove AWS_DEFAULT_REGION references The AWS_DEFAULT_REGION envvar is not required for AWS S3 interactions, so remove it from our workflows and documentation. This will allow us to remove AWS_DEFAULT_REGION from the repo secrets so that our pathogen-repo-build summary no longer points to a masked URL for the AWS Batch console (e.g. https://console.aws.amazon.com/batch/home?region=***#jobs/detail/0d66a150-a010-4761-9561-8085ef1d7ea3). --- .github/workflows/fetch-and-ingest-branch.yaml | 1 - .github/workflows/fetch-and-ingest.yaml | 1 - .github/workflows/rebuild-hmpxv1-big.yaml | 1 - .github/workflows/rebuild-hmpxv1.yaml | 1 - .github/workflows/rebuild-mpxv.yaml | 1 - ingest/README.md | 1 - 6 files changed, 6 deletions(-) diff --git a/.github/workflows/fetch-and-ingest-branch.yaml b/.github/workflows/fetch-and-ingest-branch.yaml index dcee0126..dbadeaeb 100644 --- a/.github/workflows/fetch-and-ingest-branch.yaml +++ b/.github/workflows/fetch-and-ingest-branch.yaml @@ -43,7 +43,6 @@ jobs: --no-download \ --cpus 32 \ --memory 64gib \ - --env AWS_DEFAULT_REGION \ --env AWS_ACCESS_KEY_ID \ --env AWS_SECRET_ACCESS_KEY \ --env GITHUB_RUN_ID \ diff --git a/.github/workflows/fetch-and-ingest.yaml b/.github/workflows/fetch-and-ingest.yaml index 2b5856aa..c2a8e27b 100644 --- a/.github/workflows/fetch-and-ingest.yaml +++ b/.github/workflows/fetch-and-ingest.yaml @@ -49,7 +49,6 @@ jobs: --no-download \ --cpus 32 \ --memory 64gib \ - --env AWS_DEFAULT_REGION \ --env AWS_ACCESS_KEY_ID \ --env AWS_SECRET_ACCESS_KEY \ --env GITHUB_RUN_ID \ diff --git a/.github/workflows/rebuild-hmpxv1-big.yaml b/.github/workflows/rebuild-hmpxv1-big.yaml index b65aa363..947186ce 100644 --- a/.github/workflows/rebuild-hmpxv1-big.yaml +++ b/.github/workflows/rebuild-hmpxv1-big.yaml @@ -39,7 +39,6 @@ jobs: --no-download \ --cpus 8 \ --memory 68gib \ - --env AWS_DEFAULT_REGION \ --env AWS_ACCESS_KEY_ID \ --env AWS_SECRET_ACCESS_KEY \ --env GITHUB_RUN_ID \ diff --git a/.github/workflows/rebuild-hmpxv1.yaml b/.github/workflows/rebuild-hmpxv1.yaml index 4cf2b68d..a2d6e199 100644 --- a/.github/workflows/rebuild-hmpxv1.yaml +++ b/.github/workflows/rebuild-hmpxv1.yaml @@ -39,7 +39,6 @@ jobs: --no-download \ --cpus 8 \ --memory 32gib \ - --env AWS_DEFAULT_REGION \ --env AWS_ACCESS_KEY_ID \ --env AWS_SECRET_ACCESS_KEY \ --env GITHUB_RUN_ID \ diff --git a/.github/workflows/rebuild-mpxv.yaml b/.github/workflows/rebuild-mpxv.yaml index 1522e77b..53bbfcb5 100644 --- a/.github/workflows/rebuild-mpxv.yaml +++ b/.github/workflows/rebuild-mpxv.yaml @@ -39,7 +39,6 @@ jobs: --no-download \ --cpus 8 \ --memory 32gib \ - --env AWS_DEFAULT_REGION \ --env AWS_ACCESS_KEY_ID \ --env AWS_SECRET_ACCESS_KEY \ --env GITHUB_RUN_ID \ diff --git a/ingest/README.md b/ingest/README.md index a94ff53b..cc88f291 100644 --- a/ingest/README.md +++ b/ingest/README.md @@ -70,7 +70,6 @@ The complete ingest pipeline with AWS S3 uploads and Slack notifications uses th #### Required -- `AWS_DEFAULT_REGION` - `AWS_ACCESS_KEY_ID` - `AWS_SECRET_ACCESS_KEY` - `SLACK_TOKEN` From 0f0ba056a6fa64f4271a8251f7b41f2c765d7fd5 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Mon, 30 Oct 2023 15:40:07 -0700 Subject: [PATCH 4/4] fetch-and-ingest-branch: Remove GITHUB_RUN_ID envvar The GITHUB_RUN_ID environment variable is only used for Slack notification messages, so it is not needed for the branch workflows that run without Slack notifications. --- .github/workflows/fetch-and-ingest-branch.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/fetch-and-ingest-branch.yaml b/.github/workflows/fetch-and-ingest-branch.yaml index dbadeaeb..61e0c8c5 100644 --- a/.github/workflows/fetch-and-ingest-branch.yaml +++ b/.github/workflows/fetch-and-ingest-branch.yaml @@ -34,7 +34,6 @@ jobs: runtime: aws-batch env: | NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.image }} - GITHUB_RUN_ID: ${{ github.run_id }} UPLOAD_CONFIG: ${{ needs.set_config_overrides.outputs.upload_config }} run: | nextstrain build \ @@ -45,7 +44,6 @@ jobs: --memory 64gib \ --env AWS_ACCESS_KEY_ID \ --env AWS_SECRET_ACCESS_KEY \ - --env GITHUB_RUN_ID \ ingest \ --configfiles config/config.yaml config/optional.yaml \ --config trigger_rebuild=False send_slack_notifications=False upload="$UPLOAD_CONFIG"