From 4c0264e4a8b3095b3b40c5fe4820a472e3c84474 Mon Sep 17 00:00:00 2001
From: Thomas Sibley <tsibley@fredhutch.org>
Date: Thu, 6 Jul 2023 13:06:01 -0700
Subject: [PATCH 01/26] Initial (empty) commit


From 71fbe29287a77d29de3d32dcd35020e18edc5181 Mon Sep 17 00:00:00 2001
From: Thomas Sibley <tsibley@fredhutch.org>
Date: Thu, 6 Jul 2023 13:06:05 -0700
Subject: [PATCH 02/26] README: Set the scene

---
 README.md | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 README.md

diff --git a/README.md b/README.md
new file mode 100644
index 00000000..2e179ccb
--- /dev/null
+++ b/README.md
@@ -0,0 +1,36 @@
+# ingest
+
+Shared internal tooling for pathogen data ingest.  Used by our individual
+pathogen repos which produce Nextstrain builds.  Expected to be vendored by
+each pathogen repo using `git subtree` (or `git subrepo`).
+
+Some tools may only live here temporarily before finding a permanent home in
+`augur curate` or Nextstrain CLI.  Others may happily live out their days here.
+
+## History
+
+Much of this tooling originated in
+[ncov-ingest](https://github.com/nextstrain/ncov-ingest) and was passaged thru
+[monkeypox's ingest/](https://github.com/nextstrain/monkeypox/tree/@/ingest/).
+It subsequently proliferated from [monkeypox][] to other pathogen repos
+([rsv][], [zika][], [dengue][], [hepatitisB][], [forecasts-ncov][]) primarily
+thru copying.  To [counter that
+proliferation](https://bedfordlab.slack.com/archives/C7SDVPBLZ/p1688577879947079),
+this repo was made.
+
+[monkeypox]: https://github.com/nextstrain/monkeypox
+[rsv]: https://github.com/nextstrain/rsv
+[zika]: https://github.com/nextstrain/zika/pull/24
+[dengue]: https://github.com/nextstrain/dengue/pull/10
+[hepatitisB]: https://github.com/nextstrain/hepatitisB
+[forecasts-ncov]: https://github.com/nextstrain/forecasts-ncov
+
+## Elsewhere
+
+The creation of this repo, in both the abstract and concrete, and the general
+approach to "ingest" has been discussed in various internal places, including:
+
+- https://github.com/nextstrain/private/issues/59
+- [5 July 2023 Slack thread](https://bedfordlab.slack.com/archives/C7SDVPBLZ/p1688577879947079)
+- [6 July 2023 team meeting](https://docs.google.com/document/d/1FPfx-ON5RdqL2wyvODhkrCcjgOVX3nlXgBwCPhIEsco/edit)
+- _…many others_

From 92a8868208cf783435bb901424f8976931b0cd0f Mon Sep 17 00:00:00 2001
From: Thomas Sibley <tsibley@fredhutch.org>
Date: Thu, 6 Jul 2023 14:24:35 -0700
Subject: [PATCH 03/26] README: Link to @joverlee521's workflows document

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 2e179ccb..db9ff4c5 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,7 @@ The creation of this repo, in both the abstract and concrete, and the general
 approach to "ingest" has been discussed in various internal places, including:
 
 - https://github.com/nextstrain/private/issues/59
+- @joverlee521's [workflows document](https://docs.google.com/document/d/1rLWPvEuj0Ayc8MR0O1lfRJZfj9av53xU38f20g8nU_E/edit#heading=h.4g0d3mjvb89i)
 - [5 July 2023 Slack thread](https://bedfordlab.slack.com/archives/C7SDVPBLZ/p1688577879947079)
 - [6 July 2023 team meeting](https://docs.google.com/document/d/1FPfx-ON5RdqL2wyvODhkrCcjgOVX3nlXgBwCPhIEsco/edit)
 - _…many others_

From f27560e8a1501091603f7808ff31aa0f504c66c7 Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Thu, 13 Jul 2023 15:27:59 -0700
Subject: [PATCH 04/26] Copy s3-object-exists from ncov-ingest

Copied from https://github.com/nextstrain/ncov-ingest/blob/8442ba80bc2ab16c345db1ad53233542fca343fe/bin/s3-object-exists

Subsequent copies of this script that I checked for differences:
- https://github.com/nextstrain/monkeypox/blob/5c461dc7e90cd70c1f16b193f82fd1666d4c95e2/ingest/bin/s3-object-exists
- https://github.com/nextstrain/rsv/blob/ba171f4a43110382c38b6154be3febd50408d7bf/ingest/bin/s3-object-exists
- https://github.com/nextstrain/dengue/blob/247b2fd897361f2548627de1d97d45fae4115c5c/ingest/bin/s3-object-exists
- https://github.com/nextstrain/zika/blob/4ac8d526f9f14be10b7e8858ad469a40b72a505e/ingest/bin/s3-object-exists

They have the following one line difference that will be omitted in this repo:
```
@@ -1,4 +1,5 @@
 #!/bin/bash
+# Originally copied from nextstrain/ncov-ingest
 set -euo pipefail

 url="${1#s3://}"
```
---
 README.md        | 6 ++++++
 s3-object-exists | 8 ++++++++
 2 files changed, 14 insertions(+)
 create mode 100755 s3-object-exists

diff --git a/README.md b/README.md
index db9ff4c5..ce570aab 100644
--- a/README.md
+++ b/README.md
@@ -35,3 +35,9 @@ approach to "ingest" has been discussed in various internal places, including:
 - [5 July 2023 Slack thread](https://bedfordlab.slack.com/archives/C7SDVPBLZ/p1688577879947079)
 - [6 July 2023 team meeting](https://docs.google.com/document/d/1FPfx-ON5RdqL2wyvODhkrCcjgOVX3nlXgBwCPhIEsco/edit)
 - _…many others_
+
+## Scripts
+
+Scripts for supporting ingest workflow automation that don’t really belong in any of our existing tools.
+
+- [s3-object-exists](s3-object-exists) - Used to prevent 404 errors during S3 file comparisons in the notify-* scripts
diff --git a/s3-object-exists b/s3-object-exists
new file mode 100755
index 00000000..faac4219
--- /dev/null
+++ b/s3-object-exists
@@ -0,0 +1,8 @@
+#!/bin/bash
+set -euo pipefail
+
+url="${1#s3://}"
+bucket="${url%%/*}"
+key="${url#*/}"
+
+aws s3api head-object --bucket "$bucket" --key "$key" &>/dev/null

From a70ac51519734e25b1954ebec0e1f7fe12004aac Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Thu, 13 Jul 2023 15:45:55 -0700
Subject: [PATCH 05/26] Copy trigger from ncov-ingest

Copied from https://github.com/nextstrain/ncov-ingest/blob/8442ba80bc2ab16c345db1ad53233542fca343fe/bin/trigger

Subsequent copies of this script that I checked for differences:
- https://github.com/nextstrain/monkeypox/blob/5c461dc7e90cd70c1f16b193f82fd1666d4c95e2/ingest/bin/trigger
- https://github.com/nextstrain/forecasts-ncov/blob/b7229bad08b047d480a2b40c4bafb1e33b7fe84b/bin/trigger
- https://github.com/nextstrain/dengue/blob/247b2fd897361f2548627de1d97d45fae4115c5c/ingest/bin/trigger
- https://github.com/nextstrain/zika/blob/4ac8d526f9f14be10b7e8858ad469a40b72a505e/ingest/bin/trigger

They are completely identical to this copy of the script.
---
 README.md |  1 +
 trigger   | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)
 create mode 100755 trigger

diff --git a/README.md b/README.md
index ce570aab..80e6f4e7 100644
--- a/README.md
+++ b/README.md
@@ -41,3 +41,4 @@ approach to "ingest" has been discussed in various internal places, including:
 Scripts for supporting ingest workflow automation that don’t really belong in any of our existing tools.
 
 - [s3-object-exists](s3-object-exists) - Used to prevent 404 errors during S3 file comparisons in the notify-* scripts
+- [trigger](trigger) - Triggers downstream GitHub Actions via the GitHub API using repository_dispatch events.
diff --git a/trigger b/trigger
new file mode 100755
index 00000000..d40553b6
--- /dev/null
+++ b/trigger
@@ -0,0 +1,56 @@
+#!/bin/bash
+set -euo pipefail
+
+: "${PAT_GITHUB_DISPATCH:=}"
+
+repo="${1:?A repository name is required as the first argument.}"
+event_type="${2:?An event type is required as the second argument.}"
+shift 2
+
+if [[ $# -eq 0 && -z $PAT_GITHUB_DISPATCH ]]; then
+    cat >&2 <<.
+You must specify options to curl for your GitHub credentials.  For example, you
+can specify your GitHub username, and will be prompted for your password:
+
+  $0 $repo $event_type --user <your-github-username>
+
+Be sure to enter a personal access token¹ as your password since GitHub has
+discontinued password authentication to the API starting on November 13, 2020².
+
+You can also store your credentials or a personal access token in a netrc
+file³:
+
+  machine api.github.com
+  login <your-username>
+  password <your-token>
+
+and then tell curl to use it:
+
+  $0 $repo $event_type --netrc
+
+which will then not require you to type your password every time.
+
+¹ https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line
+² https://docs.github.com/en/rest/overview/other-authentication-methods#via-username-and-password
+³ https://ec.haxx.se/usingcurl/usingcurl-netrc
+.
+    exit 1
+fi
+
+auth=':'
+if [[ -n $PAT_GITHUB_DISPATCH ]]; then
+  auth="Authorization: Bearer ${PAT_GITHUB_DISPATCH}"
+fi
+
+if curl -fsS "https://api.github.com/repos/nextstrain/${repo}/dispatches" \
+    -H 'Accept: application/vnd.github.v3+json' \
+    -H 'Content-Type: application/json' \
+    -H "$auth" \
+    -d '{"event_type":"'"$event_type"'"}' \
+    "$@"
+then
+    echo "Successfully triggered $event_type"
+else
+    echo "Request failed" >&2
+    exit 1
+fi

From 51970b7acec8d849a0b69fb4c294eae6de9c0d58 Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Thu, 13 Jul 2023 15:58:00 -0700
Subject: [PATCH 06/26] Copy sha256sum from ncov-ingest

Copied from https://github.com/nextstrain/ncov-ingest/blob/8442ba80bc2ab16c345db1ad53233542fca343fe/bin/sha256sum

Subsequent copies of this script that I checked for differences:
- https://github.com/nextstrain/monkeypox/blob/5c461dc7e90cd70c1f16b193f82fd1666d4c95e2/ingest/bin/sha256sum
- https://github.com/nextstrain/rsv/blob/ba171f4a43110382c38b6154be3febd50408d7bf/ingest/bin/sha256sum
- https://github.com/nextstrain/forecasts-ncov/blob/b7229bad08b047d480a2b40c4bafb1e33b7fe84b/ingest/bin/sha256sum
- https://github.com/nextstrain/dengue/blob/247b2fd897361f2548627de1d97d45fae4115c5c/ingest/bin/sha256sum
- https://github.com/nextstrain/zika/blob/4ac8d526f9f14be10b7e8858ad469a40b72a505e/ingest/bin/sha256sum
- https://github.com/nextstrain/ncov/blob/ba9953f8a61d239176e960b9b2efb925b5dff84b/scripts/sha256sum
- https://github.com/nextstrain/seasonal-flu/blob/c1421ff32755a0daa24c247a8e97734a01565473/scripts/sha256sum

The copis in forecasts-ncov and ncov are completely identical to this copy of the script.
The other copies have the following one line difference that will be
omitted in this repo:
```
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+# Originally copied from nextstrain/ncov-ingest repo
 """
 Portable sha256sum utility.
 """
```
---
 README.md |  4 ++++
 sha256sum | 15 +++++++++++++++
 2 files changed, 19 insertions(+)
 create mode 100755 sha256sum

diff --git a/README.md b/README.md
index 80e6f4e7..9dcee206 100644
--- a/README.md
+++ b/README.md
@@ -42,3 +42,7 @@ Scripts for supporting ingest workflow automation that don’t really belong in
 
 - [s3-object-exists](s3-object-exists) - Used to prevent 404 errors during S3 file comparisons in the notify-* scripts
 - [trigger](trigger) - Triggers downstream GitHub Actions via the GitHub API using repository_dispatch events.
+
+Potential Nextstrain CLI scripts
+
+- [sha256sum](sha256sum) - Used to check if files are identical in upload-to-s3 and download-from-s3 scripts. 
diff --git a/sha256sum b/sha256sum
new file mode 100755
index 00000000..32d7ef82
--- /dev/null
+++ b/sha256sum
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+"""
+Portable sha256sum utility.
+"""
+from hashlib import sha256
+from sys import stdin
+
+chunk_size = 5 * 1024**2 # 5 MiB
+
+h = sha256()
+
+for chunk in iter(lambda: stdin.buffer.read(chunk_size), b""):
+    h.update(chunk)
+
+print(h.hexdigest())

From 6d39c87991b5369b847340f63b6431361d206110 Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Thu, 13 Jul 2023 16:12:57 -0700
Subject: [PATCH 07/26] Copy cloudfront-invalidate from ncov-ingest

Copied from https://github.com/nextstrain/ncov-ingest/blob/8442ba80bc2ab16c345db1ad53233542fca343fe/bin/cloudfront-invalidate

Subsequent copies of this script that I checked for differences:
- https://github.com/nextstrain/monkeypox/blob/5c461dc7e90cd70c1f16b193f82fd1666d4c95e2/ingest/bin/cloudfront-invalidate
- https://github.com/nextstrain/rsv/blob/ba171f4a43110382c38b6154be3febd50408d7bf/ingest/bin/cloudfront-invalidate
- https://github.com/nextstrain/forecasts-ncov/blob/b7229bad08b047d480a2b40c4bafb1e33b7fe84b/ingest/bin/cloudfront-invalidate
- https://github.com/nextstrain/dengue/blob/247b2fd897361f2548627de1d97d45fae4115c5c/ingest/bin/cloudfront-invalidate
- https://github.com/nextstrain/zika/blob/4ac8d526f9f14be10b7e8858ad469a40b72a505e/ingest/bin/cloudfront-invalidate

They are completely identical to this copy of the script.
---
 README.md             |  4 +++-
 cloudfront-invalidate | 42 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 1 deletion(-)
 create mode 100755 cloudfront-invalidate

diff --git a/README.md b/README.md
index 9dcee206..07cb54e8 100644
--- a/README.md
+++ b/README.md
@@ -45,4 +45,6 @@ Scripts for supporting ingest workflow automation that don’t really belong in
 
 Potential Nextstrain CLI scripts
 
-- [sha256sum](sha256sum) - Used to check if files are identical in upload-to-s3 and download-from-s3 scripts. 
+- [sha256sum](sha256sum) - Used to check if files are identical in upload-to-s3 and download-from-s3 scripts.
+- [cloudfront-invalidate](cloudfront-invalidate) - CloudFront invalidation is already supported in the [nextstrain remote command for S3 files](https://github.com/nextstrain/cli/blob/a5dda9c0579ece7acbd8e2c32a4bbe95df7c0bce/nextstrain/cli/remote/s3.py#L104).
+  This exists as a separate script to support CloudFront invalidation when using the upload-to-s3 script.
diff --git a/cloudfront-invalidate b/cloudfront-invalidate
new file mode 100755
index 00000000..dec48529
--- /dev/null
+++ b/cloudfront-invalidate
@@ -0,0 +1,42 @@
+#!/bin/bash
+# Originally from @tsibley's gist: https://gist.github.com/tsibley/a66262d341dedbea39b02f27e2837ea8
+set -euo pipefail
+
+main() {
+    local domain="$1"
+    shift
+    local paths=("$@")
+    local distribution invalidation
+
+    echo "-> Finding CloudFront distribution"
+    distribution=$(
+        aws cloudfront list-distributions \
+            --query "DistributionList.Items[?contains(Aliases.Items, \`$domain\`)] | [0].Id" \
+            --output text
+    )
+
+    if [[ -z $distribution || $distribution == None ]]; then
+        exec >&2
+        echo "Unable to find CloudFront distribution id for $domain"
+        echo
+        echo "Are your AWS CLI credentials for the right account?"
+        exit 1
+    fi
+
+    echo "-> Creating CloudFront invalidation for distribution $distribution"
+    invalidation=$(
+        aws cloudfront create-invalidation \
+            --distribution-id "$distribution" \
+            --paths "${paths[@]}" \
+            --query Invalidation.Id \
+            --output text
+    )
+
+    echo "-> Waiting for CloudFront invalidation $invalidation to complete"
+    echo "   Ctrl-C to stop waiting."
+    aws cloudfront wait invalidation-completed \
+        --distribution-id "$distribution" \
+        --id "$invalidation"
+}
+
+main "$@"

From 154d16a0676fc06292aedaf3c72fa692f4e62770 Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Thu, 13 Jul 2023 17:08:53 -0700
Subject: [PATCH 08/26] Copy merge-user-metadata from monkeypox

Copied from https://github.com/nextstrain/monkeypox/blob/5c461dc7e90cd70c1f16b193f82fd1666d4c95e2/ingest/bin/merge-user-metadata

Subsequent copies of this script that I checked for differences:
- https://github.com/nextstrain/rsv/blob/ba171f4a43110382c38b6154be3febd50408d7bf/ingest/bin/merge-user-metadata
- https://github.com/nextstrain/dengue/blob/247b2fd897361f2548627de1d97d45fae4115c5c/ingest/bin/merge-user-metadata
- https://github.com/nextstrain/zika/blob/ingest/ingest/bin/merge-user-metadata

They are completely identical to this copy of the script.
---
 README.md           |  4 ++++
 merge-user-metadata | 55 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)
 create mode 100755 merge-user-metadata

diff --git a/README.md b/README.md
index 07cb54e8..78804ef7 100644
--- a/README.md
+++ b/README.md
@@ -48,3 +48,7 @@ Potential Nextstrain CLI scripts
 - [sha256sum](sha256sum) - Used to check if files are identical in upload-to-s3 and download-from-s3 scripts.
 - [cloudfront-invalidate](cloudfront-invalidate) - CloudFront invalidation is already supported in the [nextstrain remote command for S3 files](https://github.com/nextstrain/cli/blob/a5dda9c0579ece7acbd8e2c32a4bbe95df7c0bce/nextstrain/cli/remote/s3.py#L104).
   This exists as a separate script to support CloudFront invalidation when using the upload-to-s3 script.
+
+Potential augur curate scripts
+
+- [merge-user-metadata](merge-user-metadata) - Merges user annotations with NDJSON records
diff --git a/merge-user-metadata b/merge-user-metadata
new file mode 100755
index 00000000..341c2dfa
--- /dev/null
+++ b/merge-user-metadata
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+"""
+Merges user curated annotations with the NDJSON records from stdin, with the user
+curations overwriting the existing fields. The modified records are output
+to stdout. This does not do any additional transformations on top of the user
+curations.
+"""
+import argparse
+import csv
+import json
+from collections import defaultdict
+from sys import exit, stdin, stderr, stdout
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument("--annotations", metavar="TSV", required=True,
+        help="Manually curated annotations TSV file. " +
+             "The TSV should not have a header and should have exactly three columns: " +
+             "id to match existing metadata, field name, and field value. " +
+             "If there are multiple annotations for the same id and field, then the last value is used. " +
+             "Lines starting with '#' are treated as comments. " +
+             "Any '#' after the field value are treated as comments.")
+    parser.add_argument("--id-field", default="accession",
+        help="The ID field in the metadata to use to merge with the annotations.")
+
+    args = parser.parse_args()
+
+    annotations = defaultdict(dict)
+    with open(args.annotations, 'r') as annotations_fh:
+        csv_reader = csv.reader(annotations_fh, delimiter='\t')
+        for row in csv_reader:
+            if not row or row[0].lstrip()[0] == '#':
+                    continue
+            elif len(row) != 3:
+                print("WARNING: Could not decode annotation line " + "\t".join(row), file=stderr)
+                continue
+            id, field, value = row
+            annotations[id][field] = value.partition('#')[0].rstrip()
+
+    for record in stdin:
+        record = json.loads(record)
+
+        record_id = record.get(args.id_field)
+        if record_id is None:
+            print(f"ERROR: ID field {args.id_field!r} does not exist in record", file=stderr)
+            exit(1)
+
+        record.update(annotations.get(record_id, {}))
+
+        json.dump(record, stdout, allow_nan=False, indent=None, separators=',:')
+        print()

From 608a3e0fa1755fe6506b666b5d6c1bb0faacdfae Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Thu, 13 Jul 2023 17:16:19 -0700
Subject: [PATCH 09/26] Copy transform-authors from monkeypox

Copied from https://github.com/nextstrain/monkeypox/blob/5c461dc7e90cd70c1f16b193f82fd1666d4c95e2/ingest/bin/transform-authors

Subsequent copies of this script that I checked for differences:
- https://github.com/nextstrain/rsv/blob/ba171f4a43110382c38b6154be3febd50408d7bf/ingest/bin/transform-authors
- https://github.com/nextstrain/dengue/blob/247b2fd897361f2548627de1d97d45fae4115c5c/ingest/bin/transform-authors
- https://github.com/nextstrain/zika/blob/4ac8d526f9f14be10b7e8858ad469a40b72a505e/ingest/bin/transform-authors

They are completely identical to this copy of the script.
---
 README.md         |  1 +
 transform-authors | 66 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)
 create mode 100755 transform-authors

diff --git a/README.md b/README.md
index 78804ef7..1832ad1b 100644
--- a/README.md
+++ b/README.md
@@ -52,3 +52,4 @@ Potential Nextstrain CLI scripts
 Potential augur curate scripts
 
 - [merge-user-metadata](merge-user-metadata) - Merges user annotations with NDJSON records
+- [transform-authors](transform-authors) - Abbreviates full author lists to '<first author> et al.'
diff --git a/transform-authors b/transform-authors
new file mode 100755
index 00000000..0bade20e
--- /dev/null
+++ b/transform-authors
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+"""
+Abbreviates a full list of authors to be '<first author> et al.' of the NDJSON
+record from stdin and outputs modified records to stdout.
+
+Note: This is a "best effort" approach and can potentially mangle the author name.
+"""
+import argparse
+import json
+import re
+from sys import stderr, stdin, stdout
+
+
+def parse_authors(record: dict, authors_field: str, default_value: str,
+    index: int, abbr_authors_field: str = None) -> dict:
+    # Strip and normalize whitespace
+    new_authors = re.sub(r'\s+', ' ', record[authors_field])
+
+    if new_authors == "":
+        new_authors = default_value
+    else:
+        # Split authors list on comma/semicolon
+        # OR "and"/"&" with at least one space before and after
+        new_authors = re.split(r'(?:\s*[,，;；]\s*|\s+(?:and|&)\s+)', new_authors)[0]
+
+        # if it does not already end with " et al.", add it
+        if not new_authors.strip('. ').endswith(" et al"):
+            new_authors += ' et al'
+
+    if abbr_authors_field:
+        if record.get(abbr_authors_field):
+            print(
+                f"WARNING: the {abbr_authors_field!r} field already exists",
+                f"in record {index} and will be overwritten!",
+                file=stderr
+            )
+
+        record[abbr_authors_field] = new_authors
+    else:
+        record[authors_field] = new_authors
+
+    return record
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument("--authors-field", default="authors",
+        help="The field containing list of authors.")
+    parser.add_argument("--default-value", default="?",
+        help="Default value to use if authors list is empty.")
+    parser.add_argument("--abbr-authors-field",
+        help="The field for the generated abbreviated authors. " +
+             "If not provided, the original authors field will be modified.")
+
+    args = parser.parse_args()
+
+    for index, record in enumerate(stdin):
+        record = json.loads(record)
+
+        parse_authors(record, args.authors_field, args.default_value, index, args.abbr_authors_field)
+
+        json.dump(record, stdout, allow_nan=False, indent=None, separators=',:')
+        print()

From b4034d6a6591d7a7140e7a302bad54ade95cd524 Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Fri, 14 Jul 2023 12:20:59 -0700
Subject: [PATCH 10/26] Copy transform-field-names from monkeypox

Copied from https://github.com/nextstrain/monkeypox/blob/5c461dc7e90cd70c1f16b193f82fd1666d4c95e2/ingest/bin/transform-field-names

Subsequent copies of this script that I checked for differences:
- https://github.com/nextstrain/rsv/blob/ba171f4a43110382c38b6154be3febd50408d7bf/ingest/bin/transform-field-names
- https://github.com/nextstrain/dengue/blob/247b2fd897361f2548627de1d97d45fae4115c5c/ingest/bin/transform-field-names
- https://github.com/nextstrain/zika/blob/4ac8d526f9f14be10b7e8858ad469a40b72a505e/ingest/bin/transform-field-names

They are completely identical to this copy of the script.
---
 README.md             |  1 +
 transform-field-names | 48 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+)
 create mode 100755 transform-field-names

diff --git a/README.md b/README.md
index 1832ad1b..d8f9db82 100644
--- a/README.md
+++ b/README.md
@@ -53,3 +53,4 @@ Potential augur curate scripts
 
 - [merge-user-metadata](merge-user-metadata) - Merges user annotations with NDJSON records
 - [transform-authors](transform-authors) - Abbreviates full author lists to '<first author> et al.'
+- [transform-field-names](transform-field-names) - Rename fields of NDJSON records
diff --git a/transform-field-names b/transform-field-names
new file mode 100755
index 00000000..fde223fc
--- /dev/null
+++ b/transform-field-names
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+"""
+Renames fields of the NDJSON record from stdin and outputs modified records
+to stdout.
+"""
+import argparse
+import json
+from sys import stderr, stdin, stdout
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument("--field-map", nargs="+",
+        help="Fields names in the NDJSON record mapped to new field names, " +
+             "formatted as '{old_field_name}={new_field_name}'. " +
+             "If the old field does not exist in record, the new field will be added with an empty string value." +
+             "If the new field already exists in record, then the renaming of the old field will be skipped.")
+    parser.add_argument("--force", action="store_true",
+        help="Force renaming of old field even if the new field already exists. " +
+             "Please keep in mind this will overwrite the value of the new field.")
+
+    args = parser.parse_args()
+
+    field_map = {}
+    for field in args.field_map:
+        old_name, new_name = field.split('=')
+        field_map[old_name] = new_name
+
+    for record in stdin:
+        record = json.loads(record)
+
+        for old_field, new_field in field_map.items():
+
+            if record.get(new_field) and not args.force:
+                print(
+                    f"WARNING: skipping rename of {old_field} because record",
+                    f"already has a field named {new_field}.",
+                    file=stderr
+                )
+                continue
+
+            record[new_field] = record.pop(old_field, '')
+
+        json.dump(record, stdout, allow_nan=False, indent=None, separators=',:')
+        print()

From 41f137cf6a6ca20ad7aef650ccf04f4416e9e717 Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Fri, 14 Jul 2023 12:07:28 -0700
Subject: [PATCH 11/26] Copy tranform-genbank-location from monkeypox

Copied from https://github.com/nextstrain/monkeypox/blob/5c461dc7e90cd70c1f16b193f82fd1666d4c95e2/ingest/bin/transform-genbank-location

Subsequent copies of this script that I checked for differences:
- https://github.com/nextstrain/rsv/blob/ba171f4a43110382c38b6154be3febd50408d7bf/ingest/bin/transform-genbank-location
- https://github.com/nextstrain/dengue/blob/247b2fd897361f2548627de1d97d45fae4115c5c/ingest/bin/transform-genbank-location
- https://github.com/nextstrain/zika/blob/4ac8d526f9f14be10b7e8858ad469a40b72a505e/ingest/bin/transform-genbank-location

They are completely identical to this copy of the script.
---
 README.md                  |  1 +
 transform-genbank-location | 43 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100755 transform-genbank-location

diff --git a/README.md b/README.md
index d8f9db82..c7d1a39a 100644
--- a/README.md
+++ b/README.md
@@ -54,3 +54,4 @@ Potential augur curate scripts
 - [merge-user-metadata](merge-user-metadata) - Merges user annotations with NDJSON records
 - [transform-authors](transform-authors) - Abbreviates full author lists to '<first author> et al.'
 - [transform-field-names](transform-field-names) - Rename fields of NDJSON records
+- [transform-genbank-location](transform-genbank-location) - Parses `location` field with the expected pattern `"<country_value>[:<region>][, <locality>]"` based on [GenBank's country field](https://www.ncbi.nlm.nih.gov/genbank/collab/country/)
diff --git a/transform-genbank-location b/transform-genbank-location
new file mode 100755
index 00000000..70ba56fb
--- /dev/null
+++ b/transform-genbank-location
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+"""
+Parses GenBank's 'location' field of the NDJSON record from stdin to 3 separate
+fields: 'country', 'division', and 'location'. Checks that a record is from
+GenBank by verifying that the 'database' field has a value of "GenBank" or "RefSeq".
+
+Outputs the modified record to stdout.
+"""
+import json
+from sys import stdin, stdout
+
+
+def parse_location(record: dict) -> dict:
+    # Expected pattern for the location field is "<country_value>[:<region>][, <locality>]"
+    # See GenBank docs for their "country" field:
+    # https://www.ncbi.nlm.nih.gov/genbank/collab/country/
+    geographic_data = record['location'].split(':')
+
+    country = geographic_data[0]
+    division = ''
+    location = ''
+
+    if len(geographic_data) == 2:
+        division , _ , location = geographic_data[1].partition(',')
+
+    record['country'] = country.strip()
+    record['division'] = division.strip()
+    record['location'] = location.strip()
+
+    return record
+
+
+if __name__ == '__main__':
+
+    for record in stdin:
+        record = json.loads(record)
+
+        database = record.get('database', '')
+        if database in {'GenBank', 'RefSeq'}:
+            parse_location(record)
+
+        json.dump(record, stdout, allow_nan=False, indent=None, separators=',:')
+        print()

From 84047cc68bd2094254b70b09fc1bf1957b276440 Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Tue, 18 Jul 2023 12:24:22 -0700
Subject: [PATCH 12/26] Add CI workflow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add minimum CI workflow that currently only runs a shellcheck job like
the shellcheck workflow in the ncov-ingest repo¹.

We may choose to add more tests in the future, but shellcheck is enough
for now to catch errors in bash scripts. This will be helpful as I
centralize the scripts that have diverged in the various ingest
workflows.

¹ https://github.com/nextstrain/ncov-ingest/blob/6fd5a9b1d87e59fab35173dbedf376632154943b/.github/workflows/shellcheck.yml
---
 .github/workflows/ci.yaml | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 .github/workflows/ci.yaml

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
new file mode 100644
index 00000000..dcb3b898
--- /dev/null
+++ b/.github/workflows/ci.yaml
@@ -0,0 +1,13 @@
+name: CI
+
+on:
+  - push
+  - pull_request
+  - workflow_dispatch
+
+jobs:
+  shellcheck:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: nextstrain/.github/actions/shellcheck@master

From 193c311cee05d4ca212dfc388cb0e2b4f313120d Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Fri, 14 Jul 2023 16:10:15 -0700
Subject: [PATCH 13/26] Copy notify-slack from ncov-ingest

Copied from https://github.com/nextstrain/ncov-ingest/blob/8442ba80bc2ab16c345db1ad53233542fca343fe/bin/notify-slack

Subsequent copies of this script that are functionally identical:
- https://github.com/nextstrain/forecasts-ncov/blob/b7229bad08b047d480a2b40c4bafb1e33b7fe84b/ingest/bin/notify-slack
- https://github.com/nextstrain/rsv/blob/ba171f4a43110382c38b6154be3febd50408d7bf/ingest/bin/notify-slack

There was a one line difference in the rsv script that will be omitted
in this repo:
```
@@ -1,4 +1,5 @@
 #!/bin/bash
+# Originally copied from nextstrain/ncov-ingest repo
 set -euo pipefail

 : "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}"
```
---
 README.md    |  1 +
 notify-slack | 44 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)
 create mode 100755 notify-slack

diff --git a/README.md b/README.md
index c7d1a39a..af09812a 100644
--- a/README.md
+++ b/README.md
@@ -40,6 +40,7 @@ approach to "ingest" has been discussed in various internal places, including:
 
 Scripts for supporting ingest workflow automation that don’t really belong in any of our existing tools.
 
+- [notify-slack](notify-slack) - Send message or file to Slack
 - [s3-object-exists](s3-object-exists) - Used to prevent 404 errors during S3 file comparisons in the notify-* scripts
 - [trigger](trigger) - Triggers downstream GitHub Actions via the GitHub API using repository_dispatch events.
 
diff --git a/notify-slack b/notify-slack
new file mode 100755
index 00000000..6695d83f
--- /dev/null
+++ b/notify-slack
@@ -0,0 +1,44 @@
+#!/bin/bash
+set -euo pipefail
+
+: "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}"
+: "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}"
+
+upload=0
+args=()
+
+for arg; do
+    case "$arg" in
+        --upload)
+            upload=1;;
+        *)
+            args+=("$arg");;
+    esac
+done
+
+set -- "${args[@]}"
+
+text="${1:?Some message text is required.}"
+
+if [[ "$upload" == 1 ]]; then
+    echo "Uploading data to Slack with the message: $text"
+    curl https://slack.com/api/files.upload \
+        --header "Authorization: Bearer $SLACK_TOKEN" \
+        --form-string channels="$SLACK_CHANNELS" \
+        --form-string title="$text" \
+        --form-string filename="$text" \
+        --form file=@/dev/stdin \
+        --form filetype=text \
+        --fail --silent --show-error \
+        --http1.1 \
+        --output /dev/null
+else
+    echo "Posting Slack message: $text"
+    curl https://slack.com/api/chat.postMessage \
+        --header "Authorization: Bearer $SLACK_TOKEN" \
+        --form-string channel="$SLACK_CHANNELS" \
+        --form-string text="$text" \
+        --fail --silent --show-error \
+        --http1.1 \
+        --output /dev/null
+fi

From ba0769b1da6d9124c4ffbd93181cc128a59fa19d Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Fri, 29 Jul 2022 14:42:50 -0700
Subject: [PATCH 14/26] notify-slack: support threaded messages

Changes copied from https://github.com/nextstrain/monkeypox/commit/875789012f09ae7c07db3648b5c8fd5b7d5af3ed

Subsequent copies of this script that contain identical changes:
- https://github.com/nextstrain/dengue/blob/247b2fd897361f2548627de1d97d45fae4115c5c/ingest/bin/notify-slack
- https://github.com/nextstrain/zika/blob/4ac8d526f9f14be10b7e8858ad469a40b72a505e/ingest/bin/notify-slack

These changes use optional args so they will not require changes to
calls of the previous version of the script in ncov-ingest,
forecasts-ncov, and rsv.
---
 notify-slack | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/notify-slack b/notify-slack
index 6695d83f..7fcb4f7f 100755
--- a/notify-slack
+++ b/notify-slack
@@ -5,12 +5,21 @@ set -euo pipefail
 : "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}"
 
 upload=0
+output=/dev/null
+thread_ts=""
+broadcast=0
 args=()
 
 for arg; do
     case "$arg" in
         --upload)
             upload=1;;
+        --output=*)
+            output="${arg#*=}";;
+        --thread-ts=*)
+            thread_ts="${arg#*=}";;
+        --broadcast)
+            broadcast=1;;
         *)
             args+=("$arg");;
     esac
@@ -27,18 +36,22 @@ if [[ "$upload" == 1 ]]; then
         --form-string channels="$SLACK_CHANNELS" \
         --form-string title="$text" \
         --form-string filename="$text" \
+        --form-string thread_ts="$thread_ts" \
+        --form-string reply_broadcast="$broadcast" \
         --form file=@/dev/stdin \
         --form filetype=text \
         --fail --silent --show-error \
         --http1.1 \
-        --output /dev/null
+        --output "$output"
 else
     echo "Posting Slack message: $text"
     curl https://slack.com/api/chat.postMessage \
         --header "Authorization: Bearer $SLACK_TOKEN" \
         --form-string channel="$SLACK_CHANNELS" \
         --form-string text="$text" \
+        --form-string thread_ts="$thread_ts" \
+        --form-string reply_broadcast="$broadcast" \
         --fail --silent --show-error \
         --http1.1 \
-        --output /dev/null
+        --output "$output"
 fi

From 83e871d6558c5c6e2ea9dfbdbd14eba28698967b Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Fri, 14 Jul 2023 17:14:06 -0700
Subject: [PATCH 15/26] Copy notify-on-job-start from monkeypox
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Copied from https://github.com/nextstrain/monkeypox/blob/5c461dc7e90cd70c1f16b193f82fd1666d4c95e2/ingest/bin/notify-on-job-start

I decided to copy the version in monkeypox because it is easier to
update to be a generalized script than the ncov-ingest version which has
unique changes to support the run-nextclade-full scripts.¹

¹ https://github.com/nextstrain/ncov-ingest/compare/88be153e02acc5f318c970dcf44e8745128d469f...e898400a0ebf599dcb9e69f1f93dc8c108c97796
---
 README.md           |  1 +
 notify-on-job-start | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+)
 create mode 100755 notify-on-job-start

diff --git a/README.md b/README.md
index af09812a..51638c50 100644
--- a/README.md
+++ b/README.md
@@ -40,6 +40,7 @@ approach to "ingest" has been discussed in various internal places, including:
 
 Scripts for supporting ingest workflow automation that don’t really belong in any of our existing tools.
 
+- [notify-on-job-start](notify-on-job-start) - Send Slack message with details about workflow job on GitHub Actions and/or AWS Batch
 - [notify-slack](notify-slack) - Send message or file to Slack
 - [s3-object-exists](s3-object-exists) - Used to prevent 404 errors during S3 file comparisons in the notify-* scripts
 - [trigger](trigger) - Triggers downstream GitHub Actions via the GitHub API using repository_dispatch events.
diff --git a/notify-on-job-start b/notify-on-job-start
new file mode 100755
index 00000000..9410fa38
--- /dev/null
+++ b/notify-on-job-start
@@ -0,0 +1,24 @@
+#!/bin/bash
+set -euo pipefail
+
+: "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}"
+: "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}"
+
+: "${AWS_BATCH_JOB_ID:=}"
+: "${GITHUB_RUN_ID:=}"
+
+bin="$(dirname "$0")"
+
+echo "Notifying Slack about started ingest job."
+message="🐵 Monkeypox ingest job has started."
+
+if [[ -n "${GITHUB_RUN_ID}" ]]; then
+  message+=" The job was submitted by GitHub Action <https://github.com/nextstrain/monkeypox/actions/runs/${GITHUB_RUN_ID}?check_suite_focus=true|${GITHUB_RUN_ID}>."
+fi
+
+if [[ -n "${AWS_BATCH_JOB_ID}" ]]; then
+  message+=" The job was launched as AWS Batch job \`${AWS_BATCH_JOB_ID}\` (<https://console.aws.amazon.com/batch/v2/home?region=us-east-1#jobs/detail/${AWS_BATCH_JOB_ID}|link>)."
+  message+=" Follow along in your local \`monkeypox\` repo with: "'```'"nextstrain build --aws-batch --no-download --attach ${AWS_BATCH_JOB_ID} ingest/"'```'
+fi
+
+"$bin"/notify-slack "$message"

From 98b235a70254db8f0a22ccfc0bbada8aff5bd86b Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Fri, 14 Jul 2023 17:49:04 -0700
Subject: [PATCH 16/26] notify-on-job-start: Add job_name and repo_name args

Generalize the script by using new args to customize the Slack message
and point to the appropriate GitHub Action URL.

These changes were made based on diffs with subsequent copies of the
script that edited the Slack message:
- https://github.com/nextstrain/rsv/blob/ba171f4a43110382c38b6154be3febd50408d7bf/ingest/bin/notify-on-job-start
- https://github.com/nextstrain/forecasts-ncov/blob/70bf78f459a3706dd817ae5f711af3b74887d7b1/bin/notify-on-job-start
- https://github.com/nextstrain/dengue/blob/247b2fd897361f2548627de1d97d45fae4115c5c/ingest/bin/notify-on-job-start

This will require all calls of the script to be updated since job_name
and repo_name are required args.
---
 notify-on-job-start | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/notify-on-job-start b/notify-on-job-start
index 9410fa38..8404311c 100755
--- a/notify-on-job-start
+++ b/notify-on-job-start
@@ -8,17 +8,19 @@ set -euo pipefail
 : "${GITHUB_RUN_ID:=}"
 
 bin="$(dirname "$0")"
+job_name="${1:?A job name is required as the first argument}"
+repo_name="${2:?A repository name is required as the second argument}"
 
-echo "Notifying Slack about started ingest job."
-message="🐵 Monkeypox ingest job has started."
+echo "Notifying Slack about started ${job_name} job."
+message="${job_name} job has started."
 
 if [[ -n "${GITHUB_RUN_ID}" ]]; then
-  message+=" The job was submitted by GitHub Action <https://github.com/nextstrain/monkeypox/actions/runs/${GITHUB_RUN_ID}?check_suite_focus=true|${GITHUB_RUN_ID}>."
+  message+=" The job was submitted by GitHub Action <https://github.com/nextstrain/${repo_name}/actions/runs/${GITHUB_RUN_ID}?check_suite_focus=true|${GITHUB_RUN_ID}>."
 fi
 
 if [[ -n "${AWS_BATCH_JOB_ID}" ]]; then
   message+=" The job was launched as AWS Batch job \`${AWS_BATCH_JOB_ID}\` (<https://console.aws.amazon.com/batch/v2/home?region=us-east-1#jobs/detail/${AWS_BATCH_JOB_ID}|link>)."
-  message+=" Follow along in your local \`monkeypox\` repo with: "'```'"nextstrain build --aws-batch --no-download --attach ${AWS_BATCH_JOB_ID} ingest/"'```'
+  message+=" Follow along in your local \`${repo_name}\` repo with: "'```'"nextstrain build --aws-batch --no-download --attach ${AWS_BATCH_JOB_ID} ingest"'```'
 fi
 
 "$bin"/notify-slack "$message"

From 00472555fb969899d4a8b4d5c2c5c32cdb06fe57 Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Tue, 18 Jul 2023 11:30:02 -0700
Subject: [PATCH 17/26] notify-on-job-start: Add optional build_dir arg

The default value is "ingest" which is the expected ingest directory
for standard pathogen repos.

Adding the optional arg to support customizations for historical repos
such as ncov-ingest that do not follow the new standardized repo structure.
---
 notify-on-job-start | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/notify-on-job-start b/notify-on-job-start
index 8404311c..3b8ae65a 100755
--- a/notify-on-job-start
+++ b/notify-on-job-start
@@ -10,6 +10,7 @@ set -euo pipefail
 bin="$(dirname "$0")"
 job_name="${1:?A job name is required as the first argument}"
 repo_name="${2:?A repository name is required as the second argument}"
+build_dir="${3:-ingest}"
 
 echo "Notifying Slack about started ${job_name} job."
 message="${job_name} job has started."
@@ -20,7 +21,7 @@ fi
 
 if [[ -n "${AWS_BATCH_JOB_ID}" ]]; then
   message+=" The job was launched as AWS Batch job \`${AWS_BATCH_JOB_ID}\` (<https://console.aws.amazon.com/batch/v2/home?region=us-east-1#jobs/detail/${AWS_BATCH_JOB_ID}|link>)."
-  message+=" Follow along in your local \`${repo_name}\` repo with: "'```'"nextstrain build --aws-batch --no-download --attach ${AWS_BATCH_JOB_ID} ingest"'```'
+  message+=" Follow along in your local \`${repo_name}\` repo with: "'```'"nextstrain build --aws-batch --no-download --attach ${AWS_BATCH_JOB_ID} ${build_dir}"'```'
 fi
 
 "$bin"/notify-slack "$message"

From 192600841852273bd595c69775b075ce105dc5d7 Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Mon, 17 Jul 2023 16:33:14 -0700
Subject: [PATCH 18/26] Copy notify-on-job-fail from ncov-ingest

Copied from https://github.com/nextstrain/ncov-ingest/blob/6fd5a9b1d87e59fab35173dbedf376632154943b/bin/notify-on-job-fail

The following commits will update and generalize the script to
incorporate changes made subsequent copies.
---
 README.md          |  1 +
 notify-on-job-fail | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+)
 create mode 100755 notify-on-job-fail

diff --git a/README.md b/README.md
index 51638c50..84b855c1 100644
--- a/README.md
+++ b/README.md
@@ -40,6 +40,7 @@ approach to "ingest" has been discussed in various internal places, including:
 
 Scripts for supporting ingest workflow automation that don’t really belong in any of our existing tools.
 
+- [notify-on-job-fail](notify-on-job-fail) - Send Slack message with details about failed workflow job on GitHub Actions and/or AWS Batch
 - [notify-on-job-start](notify-on-job-start) - Send Slack message with details about workflow job on GitHub Actions and/or AWS Batch
 - [notify-slack](notify-slack) - Send message or file to Slack
 - [s3-object-exists](s3-object-exists) - Used to prevent 404 errors during S3 file comparisons in the notify-* scripts
diff --git a/notify-on-job-fail b/notify-on-job-fail
new file mode 100755
index 00000000..3d49b934
--- /dev/null
+++ b/notify-on-job-fail
@@ -0,0 +1,21 @@
+#!/bin/bash
+set -euo pipefail
+
+: "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}"
+: "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}"
+
+bin="$(dirname "$0")"
+
+aws_batch_job_id="${AWS_BATCH_JOB_ID:-}"
+github_run_id="${GITHUB_RUN_ID:-}"
+
+echo "Notifying Slack about failed ingest job."
+message="❌ Ingest job has FAILED 😞 "
+
+if [ -n "${aws_batch_job_id}" ]; then
+    message+="See AWS Batch job \`${aws_batch_job_id}\` (<https://console.aws.amazon.com/batch/v2/home?region=us-east-1#jobs/detail/${aws_batch_job_id}|link>) for error details. "
+elif [ -n "${github_run_id}" ]; then
+    message+="See GitHub Action <https://github.com/nextstrain/ncov-ingest/actions/runs/${github_run_id}?check_suite_focus=true|${github_run_id}> for error details. "
+fi
+
+"$bin"/notify-slack "$message"

From 309ebbf4688ed933d226db0f0803b38c0b0a6ae0 Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Mon, 17 Jul 2023 18:03:07 -0700
Subject: [PATCH 19/26] notify-on-job-fail: stylistic updates

Follow patterns set in notify-on-job-start where the environment
variables `AWS_BATCH_JOB_ID` and `GITHUB_RUN_ID` are used directly
instead of reassigning them to local variables and use double square
brackets.
---
 notify-on-job-fail | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/notify-on-job-fail b/notify-on-job-fail
index 3d49b934..af3bd760 100755
--- a/notify-on-job-fail
+++ b/notify-on-job-fail
@@ -4,18 +4,18 @@ set -euo pipefail
 : "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}"
 : "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}"
 
-bin="$(dirname "$0")"
+: "${AWS_BATCH_JOB_ID:=}"
+: "${GITHUB_RUN_ID:=}"
 
-aws_batch_job_id="${AWS_BATCH_JOB_ID:-}"
-github_run_id="${GITHUB_RUN_ID:-}"
+bin="$(dirname "$0")"
 
 echo "Notifying Slack about failed ingest job."
 message="❌ Ingest job has FAILED 😞 "
 
-if [ -n "${aws_batch_job_id}" ]; then
-    message+="See AWS Batch job \`${aws_batch_job_id}\` (<https://console.aws.amazon.com/batch/v2/home?region=us-east-1#jobs/detail/${aws_batch_job_id}|link>) for error details. "
-elif [ -n "${github_run_id}" ]; then
-    message+="See GitHub Action <https://github.com/nextstrain/ncov-ingest/actions/runs/${github_run_id}?check_suite_focus=true|${github_run_id}> for error details. "
+if [[ -n "${AWS_BATCH_JOB_ID}" ]]; then
+    message+="See AWS Batch job \`${AWS_BATCH_JOB_ID}\` (<https://console.aws.amazon.com/batch/v2/home?region=us-east-1#jobs/detail/${AWS_BATCH_JOB_ID}|link>) for error details. "
+elif [[ -n "${GITHUB_RUN_ID}" ]]; then
+    message+="See GitHub Action <https://github.com/nextstrain/ncov-ingest/actions/runs/${GITHUB_RUN_ID}?check_suite_focus=true|${GITHUB_RUN_ID}> for error details. "
 fi
 
 "$bin"/notify-slack "$message"

From b4b406fd6f96dfc15404721cf3dd1e6af9079248 Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Mon, 17 Jul 2023 18:12:57 -0700
Subject: [PATCH 20/26] notify-on-job-fail: Add job_name and repo_name args

Generalize the script by using new args to customize the Slack message
and point to the appropriate GitHub Action URL.

The repo_name arg was added based on diffs with subsequent copies of the
script that edited the Slack message:
- https://github.com/nextstrain/monkeypox/blob/5c461dc7e90cd70c1f16b193f82fd1666d4c95e2/ingest/bin/notify-on-job-fail
- https://github.com/nextstrain/forecasts-ncov/blob/70bf78f459a3706dd817ae5f711af3b74887d7b1/ingest/bin/notify-on-job-fail
- https://github.com/nextstrain/rsv/blob/ba171f4a43110382c38b6154be3febd50408d7bf/ingest/bin/notify-on-job-fail
- https://github.com/nextstrain/dengue/blob/247b2fd897361f2548627de1d97d45fae4115c5c/ingest/bin/notify-on-job-fail
- https://github.com/nextstrain/zika/blob/4ac8d526f9f14be10b7e8858ad469a40b72a505e/ingest/bin/notify-on-job-fail

Although the job_name arg is not necessary based on diffs with
subsequent copies of the script, it's nice to be able to customize the
Slack message for different ingest jobs. It's also a plus to mirror the
args for notify-on-job-start.

This will require all calls of the script to be update since job_name
and repo_name are required args.
---
 notify-on-job-fail | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/notify-on-job-fail b/notify-on-job-fail
index af3bd760..65aab6ed 100755
--- a/notify-on-job-fail
+++ b/notify-on-job-fail
@@ -8,14 +8,16 @@ set -euo pipefail
 : "${GITHUB_RUN_ID:=}"
 
 bin="$(dirname "$0")"
+job_name="${1:?A job name is required as the first argument}"
+repo_name="${2:?A repository name is required as the second argument}"
 
-echo "Notifying Slack about failed ingest job."
-message="❌ Ingest job has FAILED 😞 "
+echo "Notifying Slack about failed ${job_name} job."
+message="❌ ${job_name} job has FAILED 😞 "
 
 if [[ -n "${AWS_BATCH_JOB_ID}" ]]; then
     message+="See AWS Batch job \`${AWS_BATCH_JOB_ID}\` (<https://console.aws.amazon.com/batch/v2/home?region=us-east-1#jobs/detail/${AWS_BATCH_JOB_ID}|link>) for error details. "
 elif [[ -n "${GITHUB_RUN_ID}" ]]; then
-    message+="See GitHub Action <https://github.com/nextstrain/ncov-ingest/actions/runs/${GITHUB_RUN_ID}?check_suite_focus=true|${GITHUB_RUN_ID}> for error details. "
+    message+="See GitHub Action <https://github.com/nextstrain/${repo_name}/actions/runs/${GITHUB_RUN_ID}?check_suite_focus=true|${GITHUB_RUN_ID}> for error details. "
 fi
 
 "$bin"/notify-slack "$message"

From b663e17211949c375ad2675abc50a9d5c24a715f Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Tue, 18 Jul 2023 14:56:04 -0700
Subject: [PATCH 21/26] notify-slack: remove reply_broadcast option for uploads
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I realized in my testing of the script that broadcasting does not
work for file uploads. Confirmed that this is not an argument for the
files.upload API.¹

Including the argument did not cause an error, but better to remove it
so it doesn't confuse us in the future.

¹ https://api.slack.com/methods/files.upload
---
 notify-slack | 1 -
 1 file changed, 1 deletion(-)

diff --git a/notify-slack b/notify-slack
index 7fcb4f7f..db98bfb8 100755
--- a/notify-slack
+++ b/notify-slack
@@ -37,7 +37,6 @@ if [[ "$upload" == 1 ]]; then
         --form-string title="$text" \
         --form-string filename="$text" \
         --form-string thread_ts="$thread_ts" \
-        --form-string reply_broadcast="$broadcast" \
         --form file=@/dev/stdin \
         --form filetype=text \
         --fail --silent --show-error \

From b2a0de70342cef1b2b27f3f139af262910466b24 Mon Sep 17 00:00:00 2001
From: Jover <joverlee521@gmail.com>
Date: Wed, 26 Jul 2023 15:31:15 -0700
Subject: [PATCH 22/26] notify-on-job-start/fail: replace repo_name with
 github_repo

Using GitHub repos by org/name pair to make scripts more generalizable.
This will also work really well with the `GITHUB_REPOSITORY` variable
available for GitHub Action workflows.

Co-authored-by: Victor Lin <13424970+victorlin@users.noreply.github.com>
---
 notify-on-job-fail  | 4 ++--
 notify-on-job-start | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/notify-on-job-fail b/notify-on-job-fail
index 65aab6ed..02cb6bad 100755
--- a/notify-on-job-fail
+++ b/notify-on-job-fail
@@ -9,7 +9,7 @@ set -euo pipefail
 
 bin="$(dirname "$0")"
 job_name="${1:?A job name is required as the first argument}"
-repo_name="${2:?A repository name is required as the second argument}"
+github_repo="${2:?A GitHub repository with owner and repository name is required as the second argument}"
 
 echo "Notifying Slack about failed ${job_name} job."
 message="❌ ${job_name} job has FAILED 😞 "
@@ -17,7 +17,7 @@ message="❌ ${job_name} job has FAILED 😞 "
 if [[ -n "${AWS_BATCH_JOB_ID}" ]]; then
     message+="See AWS Batch job \`${AWS_BATCH_JOB_ID}\` (<https://console.aws.amazon.com/batch/v2/home?region=us-east-1#jobs/detail/${AWS_BATCH_JOB_ID}|link>) for error details. "
 elif [[ -n "${GITHUB_RUN_ID}" ]]; then
-    message+="See GitHub Action <https://github.com/nextstrain/${repo_name}/actions/runs/${GITHUB_RUN_ID}?check_suite_focus=true|${GITHUB_RUN_ID}> for error details. "
+    message+="See GitHub Action <https://github.com/${github_repo}/actions/runs/${GITHUB_RUN_ID}?check_suite_focus=true|${GITHUB_RUN_ID}> for error details. "
 fi
 
 "$bin"/notify-slack "$message"
diff --git a/notify-on-job-start b/notify-on-job-start
index 3b8ae65a..3e44bb09 100755
--- a/notify-on-job-start
+++ b/notify-on-job-start
@@ -9,19 +9,19 @@ set -euo pipefail
 
 bin="$(dirname "$0")"
 job_name="${1:?A job name is required as the first argument}"
-repo_name="${2:?A repository name is required as the second argument}"
+github_repo="${2:?A GitHub repository with owner and repository name is required as the second argument}"
 build_dir="${3:-ingest}"
 
 echo "Notifying Slack about started ${job_name} job."
 message="${job_name} job has started."
 
 if [[ -n "${GITHUB_RUN_ID}" ]]; then
-  message+=" The job was submitted by GitHub Action <https://github.com/nextstrain/${repo_name}/actions/runs/${GITHUB_RUN_ID}?check_suite_focus=true|${GITHUB_RUN_ID}>."
+  message+=" The job was submitted by GitHub Action <https://github.com/${github_repo}/actions/runs/${GITHUB_RUN_ID}?check_suite_focus=true|${GITHUB_RUN_ID}>."
 fi
 
 if [[ -n "${AWS_BATCH_JOB_ID}" ]]; then
   message+=" The job was launched as AWS Batch job \`${AWS_BATCH_JOB_ID}\` (<https://console.aws.amazon.com/batch/v2/home?region=us-east-1#jobs/detail/${AWS_BATCH_JOB_ID}|link>)."
-  message+=" Follow along in your local \`${repo_name}\` repo with: "'```'"nextstrain build --aws-batch --no-download --attach ${AWS_BATCH_JOB_ID} ${build_dir}"'```'
+  message+=" Follow along in your local clone of ${github_repo} with: "'```'"nextstrain build --aws-batch --no-download --attach ${AWS_BATCH_JOB_ID} ${build_dir}"'```'
 fi
 
 "$bin"/notify-slack "$message"

From 7d261c70370c2c129391d7fb398b579aeea920fe Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Fri, 28 Jul 2023 13:39:39 -0700
Subject: [PATCH 23/26] Describe subtree setup

The previous merge commit and its attached commits were created by the
following command:

    git subtree add --prefix ingest/vendored https://github.com/nextstrain/ingest HEAD

Add a section in the README on how to use this directory in the future.
---
 ingest/README.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/ingest/README.md b/ingest/README.md
index a8911985..d4e7d00f 100644
--- a/ingest/README.md
+++ b/ingest/README.md
@@ -85,3 +85,16 @@ These are optional environment variables used in our automated pipeline for prov
 
 GenBank sequences and metadata are fetched via NCBI Virus.
 The exact URL used to fetch data is constructed in `bin/genbank-url`.
+
+## `ingest/vendored`
+
+This repository uses `git subtree` to manage copies of ingest scripts in `ingest/vendored`, from [nextstrain/ingest](https://github.com/nextstrain/ingest). To pull new changes from the central ingest repository, run:
+
+```sh
+git subtree pull --prefix ingest/vendored https://github.com/nextstrain/ingest HEAD
+```
+
+Changes should not be pushed using `git subtree push`.
+
+1. For pathogen-specific changes, make them in this repository via a pull request.
+2. For pathogen-agnostic changes, make them on [nextstrain/ingest](https://github.com/nextstrain/ingest) via pull request there, then use `git subtree pull` to add those changes to this repository.

From e7736352d62f2a8078c9c50a4c5d5aa11790b49f Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Fri, 28 Jul 2023 13:42:48 -0700
Subject: [PATCH 24/26] Use centralized scripts that are functionally identical

Remove the copies in this repo and update references.
---
 .github/workflows/rebuild-all.yaml            |  2 +-
 ingest/bin/cloudfront-invalidate              | 42 ------------
 ingest/bin/download-from-s3                   |  4 +-
 ingest/bin/merge-user-metadata                | 55 ----------------
 ingest/bin/notify-on-diff                     |  3 +-
 ingest/bin/notify-on-record-change            |  3 +-
 ingest/bin/s3-object-exists                   |  9 ---
 ingest/bin/sha256sum                          | 16 -----
 ingest/bin/transform-authors                  | 66 -------------------
 ingest/bin/transform-field-names              | 48 --------------
 ingest/bin/transform-genbank-location         | 43 ------------
 ingest/bin/trigger                            | 56 ----------------
 ingest/bin/trigger-on-new-data                |  3 +-
 ingest/bin/upload-to-s3                       |  5 +-
 ingest/workflow/snakemake_rules/transform.smk |  8 +--
 15 files changed, 16 insertions(+), 347 deletions(-)
 delete mode 100755 ingest/bin/cloudfront-invalidate
 delete mode 100755 ingest/bin/merge-user-metadata
 delete mode 100755 ingest/bin/s3-object-exists
 delete mode 100755 ingest/bin/sha256sum
 delete mode 100755 ingest/bin/transform-authors
 delete mode 100755 ingest/bin/transform-field-names
 delete mode 100755 ingest/bin/transform-genbank-location
 delete mode 100755 ingest/bin/trigger

diff --git a/.github/workflows/rebuild-all.yaml b/.github/workflows/rebuild-all.yaml
index cf7984b8..b687e153 100644
--- a/.github/workflows/rebuild-all.yaml
+++ b/.github/workflows/rebuild-all.yaml
@@ -9,6 +9,6 @@ jobs:
     steps:
       - uses: actions/checkout@v3
       - name: Repository Dispatch
-        run: ./ingest/bin/trigger monkeypox rebuild
+        run: ./ingest/vendored/trigger monkeypox rebuild
         env:
           PAT_GITHUB_DISPATCH: ${{ secrets.GH_TOKEN_NEXTSTRAIN_BOT_WORKFLOW_DISPATCH }}
diff --git a/ingest/bin/cloudfront-invalidate b/ingest/bin/cloudfront-invalidate
deleted file mode 100755
index dec48529..00000000
--- a/ingest/bin/cloudfront-invalidate
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-# Originally from @tsibley's gist: https://gist.github.com/tsibley/a66262d341dedbea39b02f27e2837ea8
-set -euo pipefail
-
-main() {
-    local domain="$1"
-    shift
-    local paths=("$@")
-    local distribution invalidation
-
-    echo "-> Finding CloudFront distribution"
-    distribution=$(
-        aws cloudfront list-distributions \
-            --query "DistributionList.Items[?contains(Aliases.Items, \`$domain\`)] | [0].Id" \
-            --output text
-    )
-
-    if [[ -z $distribution || $distribution == None ]]; then
-        exec >&2
-        echo "Unable to find CloudFront distribution id for $domain"
-        echo
-        echo "Are your AWS CLI credentials for the right account?"
-        exit 1
-    fi
-
-    echo "-> Creating CloudFront invalidation for distribution $distribution"
-    invalidation=$(
-        aws cloudfront create-invalidation \
-            --distribution-id "$distribution" \
-            --paths "${paths[@]}" \
-            --query Invalidation.Id \
-            --output text
-    )
-
-    echo "-> Waiting for CloudFront invalidation $invalidation to complete"
-    echo "   Ctrl-C to stop waiting."
-    aws cloudfront wait invalidation-completed \
-        --distribution-id "$distribution" \
-        --id "$invalidation"
-}
-
-main "$@"
diff --git a/ingest/bin/download-from-s3 b/ingest/bin/download-from-s3
index 762fe581..99424c77 100755
--- a/ingest/bin/download-from-s3
+++ b/ingest/bin/download-from-s3
@@ -2,7 +2,7 @@
 # Originally copied from nextstrain/ncov-ingest repo
 set -euo pipefail
 
-bin="$(dirname "$0")"
+vendored="$(dirname "$0")"/../vendored
 
 main() {
     local src="${1:?A source s3:// URL is required as the first argument.}"
@@ -13,7 +13,7 @@ main() {
     local key="${s3path#*/}"
 
     local src_hash dst_hash no_hash=0000000000000000000000000000000000000000000000000000000000000000
-    dst_hash="$("$bin/sha256sum" < "$dst" || true)"
+    dst_hash="$("$vendored/sha256sum" < "$dst" || true)"
     src_hash="$(aws s3api head-object --bucket "$bucket" --key "$key" --query Metadata.sha256sum --output text 2>/dev/null || echo "$no_hash")"
 
     echo "[ INFO] Downloading $src → $dst"
diff --git a/ingest/bin/merge-user-metadata b/ingest/bin/merge-user-metadata
deleted file mode 100755
index 341c2dfa..00000000
--- a/ingest/bin/merge-user-metadata
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python3
-"""
-Merges user curated annotations with the NDJSON records from stdin, with the user
-curations overwriting the existing fields. The modified records are output
-to stdout. This does not do any additional transformations on top of the user
-curations.
-"""
-import argparse
-import csv
-import json
-from collections import defaultdict
-from sys import exit, stdin, stderr, stdout
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description=__doc__,
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument("--annotations", metavar="TSV", required=True,
-        help="Manually curated annotations TSV file. " +
-             "The TSV should not have a header and should have exactly three columns: " +
-             "id to match existing metadata, field name, and field value. " +
-             "If there are multiple annotations for the same id and field, then the last value is used. " +
-             "Lines starting with '#' are treated as comments. " +
-             "Any '#' after the field value are treated as comments.")
-    parser.add_argument("--id-field", default="accession",
-        help="The ID field in the metadata to use to merge with the annotations.")
-
-    args = parser.parse_args()
-
-    annotations = defaultdict(dict)
-    with open(args.annotations, 'r') as annotations_fh:
-        csv_reader = csv.reader(annotations_fh, delimiter='\t')
-        for row in csv_reader:
-            if not row or row[0].lstrip()[0] == '#':
-                    continue
-            elif len(row) != 3:
-                print("WARNING: Could not decode annotation line " + "\t".join(row), file=stderr)
-                continue
-            id, field, value = row
-            annotations[id][field] = value.partition('#')[0].rstrip()
-
-    for record in stdin:
-        record = json.loads(record)
-
-        record_id = record.get(args.id_field)
-        if record_id is None:
-            print(f"ERROR: ID field {args.id_field!r} does not exist in record", file=stderr)
-            exit(1)
-
-        record.update(annotations.get(record_id, {}))
-
-        json.dump(record, stdout, allow_nan=False, indent=None, separators=',:')
-        print()
diff --git a/ingest/bin/notify-on-diff b/ingest/bin/notify-on-diff
index c304d6b5..32464801 100755
--- a/ingest/bin/notify-on-diff
+++ b/ingest/bin/notify-on-diff
@@ -6,6 +6,7 @@ set -euo pipefail
 : "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}"
 
 bin="$(dirname "$0")"
+vendored="$(dirname "$0")"/../vendored
 
 src="${1:?A source file is required as the first argument.}"
 dst="${2:?A destination s3:// URL is required as the second argument.}"
@@ -16,7 +17,7 @@ diff="$(mktemp -t diff-XXXXXX)"
 trap "rm -f '$dst_local' '$diff'" EXIT
 
 # if the file is not already present, just exit
-"$bin"/s3-object-exists "$dst" || exit 0
+"$vendored"/s3-object-exists "$dst" || exit 0
 
 "$bin"/download-from-s3 "$dst" "$dst_local"
 
diff --git a/ingest/bin/notify-on-record-change b/ingest/bin/notify-on-record-change
index 595835b5..6487fbcb 100755
--- a/ingest/bin/notify-on-record-change
+++ b/ingest/bin/notify-on-record-change
@@ -6,13 +6,14 @@ set -euo pipefail
 : "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}"
 
 bin="$(dirname "$0")"
+vendored="$(dirname "$0")"/../vendored
 
 src="${1:?A source ndjson file is required as the first argument.}"
 dst="${2:?A destination ndjson s3:// URL is required as the second argument.}"
 source_name=${3:?A record source name is required as the third argument.}
 
 # if the file is not already present, just exit
-"$bin"/s3-object-exists "$dst" || exit 0
+"$vendored"/s3-object-exists "$dst" || exit 0
 
 s3path="${dst#s3://}"
 bucket="${s3path%%/*}"
diff --git a/ingest/bin/s3-object-exists b/ingest/bin/s3-object-exists
deleted file mode 100755
index d586d0b8..00000000
--- a/ingest/bin/s3-object-exists
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-# Originally copied from nextstrain/ncov-ingest
-set -euo pipefail
-
-url="${1#s3://}"
-bucket="${url%%/*}"
-key="${url#*/}"
-
-aws s3api head-object --bucket "$bucket" --key "$key" &>/dev/null
diff --git a/ingest/bin/sha256sum b/ingest/bin/sha256sum
deleted file mode 100755
index aa05af00..00000000
--- a/ingest/bin/sha256sum
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/usr/bin/env python3
-# Originally copied from nextstrain/ncov-ingest repo
-"""
-Portable sha256sum utility.
-"""
-from hashlib import sha256
-from sys import stdin
-
-chunk_size = 5 * 1024**2 # 5 MiB
-
-h = sha256()
-
-for chunk in iter(lambda: stdin.buffer.read(chunk_size), b""):
-    h.update(chunk)
-
-print(h.hexdigest())
diff --git a/ingest/bin/transform-authors b/ingest/bin/transform-authors
deleted file mode 100755
index 0bade20e..00000000
--- a/ingest/bin/transform-authors
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/env python3
-"""
-Abbreviates a full list of authors to be '<first author> et al.' of the NDJSON
-record from stdin and outputs modified records to stdout.
-
-Note: This is a "best effort" approach and can potentially mangle the author name.
-"""
-import argparse
-import json
-import re
-from sys import stderr, stdin, stdout
-
-
-def parse_authors(record: dict, authors_field: str, default_value: str,
-    index: int, abbr_authors_field: str = None) -> dict:
-    # Strip and normalize whitespace
-    new_authors = re.sub(r'\s+', ' ', record[authors_field])
-
-    if new_authors == "":
-        new_authors = default_value
-    else:
-        # Split authors list on comma/semicolon
-        # OR "and"/"&" with at least one space before and after
-        new_authors = re.split(r'(?:\s*[,，;；]\s*|\s+(?:and|&)\s+)', new_authors)[0]
-
-        # if it does not already end with " et al.", add it
-        if not new_authors.strip('. ').endswith(" et al"):
-            new_authors += ' et al'
-
-    if abbr_authors_field:
-        if record.get(abbr_authors_field):
-            print(
-                f"WARNING: the {abbr_authors_field!r} field already exists",
-                f"in record {index} and will be overwritten!",
-                file=stderr
-            )
-
-        record[abbr_authors_field] = new_authors
-    else:
-        record[authors_field] = new_authors
-
-    return record
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description=__doc__,
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument("--authors-field", default="authors",
-        help="The field containing list of authors.")
-    parser.add_argument("--default-value", default="?",
-        help="Default value to use if authors list is empty.")
-    parser.add_argument("--abbr-authors-field",
-        help="The field for the generated abbreviated authors. " +
-             "If not provided, the original authors field will be modified.")
-
-    args = parser.parse_args()
-
-    for index, record in enumerate(stdin):
-        record = json.loads(record)
-
-        parse_authors(record, args.authors_field, args.default_value, index, args.abbr_authors_field)
-
-        json.dump(record, stdout, allow_nan=False, indent=None, separators=',:')
-        print()
diff --git a/ingest/bin/transform-field-names b/ingest/bin/transform-field-names
deleted file mode 100755
index fde223fc..00000000
--- a/ingest/bin/transform-field-names
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/usr/bin/env python3
-"""
-Renames fields of the NDJSON record from stdin and outputs modified records
-to stdout.
-"""
-import argparse
-import json
-from sys import stderr, stdin, stdout
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description=__doc__,
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument("--field-map", nargs="+",
-        help="Fields names in the NDJSON record mapped to new field names, " +
-             "formatted as '{old_field_name}={new_field_name}'. " +
-             "If the old field does not exist in record, the new field will be added with an empty string value." +
-             "If the new field already exists in record, then the renaming of the old field will be skipped.")
-    parser.add_argument("--force", action="store_true",
-        help="Force renaming of old field even if the new field already exists. " +
-             "Please keep in mind this will overwrite the value of the new field.")
-
-    args = parser.parse_args()
-
-    field_map = {}
-    for field in args.field_map:
-        old_name, new_name = field.split('=')
-        field_map[old_name] = new_name
-
-    for record in stdin:
-        record = json.loads(record)
-
-        for old_field, new_field in field_map.items():
-
-            if record.get(new_field) and not args.force:
-                print(
-                    f"WARNING: skipping rename of {old_field} because record",
-                    f"already has a field named {new_field}.",
-                    file=stderr
-                )
-                continue
-
-            record[new_field] = record.pop(old_field, '')
-
-        json.dump(record, stdout, allow_nan=False, indent=None, separators=',:')
-        print()
diff --git a/ingest/bin/transform-genbank-location b/ingest/bin/transform-genbank-location
deleted file mode 100755
index 70ba56fb..00000000
--- a/ingest/bin/transform-genbank-location
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env python3
-"""
-Parses GenBank's 'location' field of the NDJSON record from stdin to 3 separate
-fields: 'country', 'division', and 'location'. Checks that a record is from
-GenBank by verifying that the 'database' field has a value of "GenBank" or "RefSeq".
-
-Outputs the modified record to stdout.
-"""
-import json
-from sys import stdin, stdout
-
-
-def parse_location(record: dict) -> dict:
-    # Expected pattern for the location field is "<country_value>[:<region>][, <locality>]"
-    # See GenBank docs for their "country" field:
-    # https://www.ncbi.nlm.nih.gov/genbank/collab/country/
-    geographic_data = record['location'].split(':')
-
-    country = geographic_data[0]
-    division = ''
-    location = ''
-
-    if len(geographic_data) == 2:
-        division , _ , location = geographic_data[1].partition(',')
-
-    record['country'] = country.strip()
-    record['division'] = division.strip()
-    record['location'] = location.strip()
-
-    return record
-
-
-if __name__ == '__main__':
-
-    for record in stdin:
-        record = json.loads(record)
-
-        database = record.get('database', '')
-        if database in {'GenBank', 'RefSeq'}:
-            parse_location(record)
-
-        json.dump(record, stdout, allow_nan=False, indent=None, separators=',:')
-        print()
diff --git a/ingest/bin/trigger b/ingest/bin/trigger
deleted file mode 100755
index d40553b6..00000000
--- a/ingest/bin/trigger
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/bin/bash
-set -euo pipefail
-
-: "${PAT_GITHUB_DISPATCH:=}"
-
-repo="${1:?A repository name is required as the first argument.}"
-event_type="${2:?An event type is required as the second argument.}"
-shift 2
-
-if [[ $# -eq 0 && -z $PAT_GITHUB_DISPATCH ]]; then
-    cat >&2 <<.
-You must specify options to curl for your GitHub credentials.  For example, you
-can specify your GitHub username, and will be prompted for your password:
-
-  $0 $repo $event_type --user <your-github-username>
-
-Be sure to enter a personal access token¹ as your password since GitHub has
-discontinued password authentication to the API starting on November 13, 2020².
-
-You can also store your credentials or a personal access token in a netrc
-file³:
-
-  machine api.github.com
-  login <your-username>
-  password <your-token>
-
-and then tell curl to use it:
-
-  $0 $repo $event_type --netrc
-
-which will then not require you to type your password every time.
-
-¹ https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line
-² https://docs.github.com/en/rest/overview/other-authentication-methods#via-username-and-password
-³ https://ec.haxx.se/usingcurl/usingcurl-netrc
-.
-    exit 1
-fi
-
-auth=':'
-if [[ -n $PAT_GITHUB_DISPATCH ]]; then
-  auth="Authorization: Bearer ${PAT_GITHUB_DISPATCH}"
-fi
-
-if curl -fsS "https://api.github.com/repos/nextstrain/${repo}/dispatches" \
-    -H 'Accept: application/vnd.github.v3+json' \
-    -H 'Content-Type: application/json' \
-    -H "$auth" \
-    -d '{"event_type":"'"$event_type"'"}' \
-    "$@"
-then
-    echo "Successfully triggered $event_type"
-else
-    echo "Request failed" >&2
-    exit 1
-fi
diff --git a/ingest/bin/trigger-on-new-data b/ingest/bin/trigger-on-new-data
index 760a0187..86f40e2f 100755
--- a/ingest/bin/trigger-on-new-data
+++ b/ingest/bin/trigger-on-new-data
@@ -4,6 +4,7 @@ set -euo pipefail
 : "${PAT_GITHUB_DISPATCH:?The PAT_GITHUB_DISPATCH environment variable is required.}"
 
 bin="$(dirname "$0")"
+vendored="$(dirname "$0")"/../vendored
 
 metadata="${1:?A metadata upload output file is required as the first argument.}"
 sequences="${2:?An sequence FASTA upload output file is required as the second argument.}"
@@ -17,7 +18,7 @@ slack_message=""
 # grep exit status 0 for found match, 1 for no match, 2 if an error occurred
 if [[ $new_metadata -eq 1 || $new_sequences -eq 1 ]]; then
     slack_message="Triggering new builds due to updated metadata and/or sequences"
-    "$bin"/trigger "monkeypox" "rebuild"
+    "$vendored"/trigger "monkeypox" "rebuild"
 elif [[ $new_metadata -eq 0 && $new_sequences -eq 0 ]]; then
     slack_message="Skipping trigger of rebuild: Both metadata TSV and sequences FASTA are identical to S3 files."
 else
diff --git a/ingest/bin/upload-to-s3 b/ingest/bin/upload-to-s3
index 78b35edd..2c0394f8 100755
--- a/ingest/bin/upload-to-s3
+++ b/ingest/bin/upload-to-s3
@@ -3,6 +3,7 @@
 set -euo pipefail
 
 bin="$(dirname "$0")"
+vendored="$(dirname "$0")"/../vendored
 
 main() {
     local quiet=0
@@ -26,7 +27,7 @@ main() {
     local key="${s3path#*/}"
 
     local src_hash dst_hash no_hash=0000000000000000000000000000000000000000000000000000000000000000
-    src_hash="$("$bin/sha256sum" < "$src")"
+    src_hash="$("$vendored/sha256sum" < "$src")"
     dst_hash="$(aws s3api head-object --bucket "$bucket" --key "$key" --query Metadata.sha256sum --output text 2>/dev/null || echo "$no_hash")"
 
     if [[ $src_hash != "$dst_hash" ]]; then
@@ -46,7 +47,7 @@ main() {
 
         if [[ -n $cloudfront_domain ]]; then
             echo "Creating CloudFront invalidation for $cloudfront_domain/$key"
-            if ! "$bin"/cloudfront-invalidate "$cloudfront_domain" "/$key"; then
+            if ! "$vendored"/cloudfront-invalidate "$cloudfront_domain" "/$key"; then
                 echo "CloudFront invalidation failed, but exiting with success anyway."
             fi
         fi
diff --git a/ingest/workflow/snakemake_rules/transform.smk b/ingest/workflow/snakemake_rules/transform.smk
index b08fc816..1a1726e2 100644
--- a/ingest/workflow/snakemake_rules/transform.smk
+++ b/ingest/workflow/snakemake_rules/transform.smk
@@ -65,7 +65,7 @@ rule transform:
     shell:
         """
         (cat {input.sequences_ndjson} \
-            | ./bin/transform-field-names \
+            | ./vendored/transform-field-names \
                 --field-map {params.field_map} \
             | augur curate normalize-strings \
             | ./bin/transform-strain-names \
@@ -74,18 +74,18 @@ rule transform:
             | ./bin/transform-date-fields \
                 --date-fields {params.date_fields} \
                 --expected-date-formats {params.expected_date_formats} \
-            | ./bin/transform-genbank-location \
+            | ./vendored/transform-genbank-location \
             | ./bin/transform-string-fields \
                 --titlecase-fields {params.titlecase_fields} \
                 --articles {params.articles} \
                 --abbreviations {params.abbreviations} \
-            | ./bin/transform-authors \
+            | ./vendored/transform-authors \
                 --authors-field {params.authors_field} \
                 --default-value {params.authors_default_value} \
                 --abbr-authors-field {params.abbr_authors_field} \
             | ./bin/apply-geolocation-rules \
                 --geolocation-rules {input.all_geolocation_rules} \
-            | ./bin/merge-user-metadata \
+            | ./vendored/merge-user-metadata \
                 --annotations {input.annotations} \
                 --id-field {params.annotations_id} \
             | ./bin/ndjson-to-tsv-and-fasta \

From 30279f9020d2ce8ae98e4321fc4f8688da1bc413 Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Fri, 28 Jul 2023 13:50:15 -0700
Subject: [PATCH 25/26] Use centralized Slack notification scripts

Remove the copies in this repo and update references.

Add new positional arguments required by the centralized scripts.
---
 .github/workflows/fetch-and-ingest.yaml       |  2 +-
 bin/notify-on-deploy                          |  4 +-
 bin/notify-on-error                           |  4 +-
 bin/notify-on-start                           |  4 +-
 bin/notify-on-success                         |  4 +-
 ingest/bin/notify-on-diff                     |  4 +-
 ingest/bin/notify-on-job-fail                 | 21 -------
 ingest/bin/notify-on-job-start                | 24 --------
 ingest/bin/notify-on-record-change            |  2 +-
 ingest/bin/notify-slack                       | 58 -------------------
 ingest/bin/trigger-on-new-data                |  2 +-
 ingest/bin/upload-to-s3                       |  2 +-
 .../snakemake_rules/slack_notifications.smk   |  4 +-
 13 files changed, 16 insertions(+), 119 deletions(-)
 delete mode 100755 ingest/bin/notify-on-job-fail
 delete mode 100755 ingest/bin/notify-on-job-start
 delete mode 100755 ingest/bin/notify-slack

diff --git a/.github/workflows/fetch-and-ingest.yaml b/.github/workflows/fetch-and-ingest.yaml
index 22cbf667..5c789677 100644
--- a/.github/workflows/fetch-and-ingest.yaml
+++ b/.github/workflows/fetch-and-ingest.yaml
@@ -73,4 +73,4 @@ jobs:
 
     - name: notify_pipeline_failed
       if: ${{ failure() }}
-      run: ./ingest/bin/notify-on-job-fail
+      run: ./ingest/vendored/notify-on-job-fail Ingest nextstrain/monkeypox
diff --git a/bin/notify-on-deploy b/bin/notify-on-deploy
index 77763e74..e279a340 100755
--- a/bin/notify-on-deploy
+++ b/bin/notify-on-deploy
@@ -5,11 +5,11 @@ set -euo pipefail
 : "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}"
 
 base="$(realpath "$(dirname "$0")/..")"
-ingest_bin="$base/ingest/bin"
+ingest_vendored="$base/ingest/vendored"
 
 deployment_url="${1:?A deployment url is required as the first argument.}"
 slack_ts_file="${2:?A Slack thread timestamp file is required as the second argument.}"
 
 echo "Notifying Slack about deployed builds."
-"$ingest_bin"/notify-slack "Deployed this build to $deployment_url" \
+"$ingest_vendored"/notify-slack "Deployed this build to $deployment_url" \
     --thread-ts="$(cat "$slack_ts_file")"
diff --git a/bin/notify-on-error b/bin/notify-on-error
index bbf82a91..810ce8b1 100755
--- a/bin/notify-on-error
+++ b/bin/notify-on-error
@@ -8,7 +8,7 @@ set -euo pipefail
 : "${GITHUB_RUN_ID:=}"
 
 base="$(realpath "$(dirname "$0")/..")"
-ingest_bin="$base/ingest/bin"
+ingest_vendored="$base/ingest/vendored"
 
 slack_ts_file="${1:-}"
 
@@ -26,6 +26,6 @@ elif [[ -n "${GITHUB_RUN_ID}" ]]; then
   message+="See GitHub Action <https://github.com/nextstrain/monkeypox/actions/runs/${GITHUB_RUN_ID}?check_suite_focus=true|${GITHUB_RUN_ID}> for error details."
 fi
 
-"$ingest_bin"/notify-slack "$message" \
+"$ingest_vendored"/notify-slack "$message" \
   --thread-ts="$thread_ts" \
   --broadcast
diff --git a/bin/notify-on-start b/bin/notify-on-start
index 6042e060..c0695ea4 100755
--- a/bin/notify-on-start
+++ b/bin/notify-on-start
@@ -8,7 +8,7 @@ set -euo pipefail
 : "${GITHUB_RUN_ID:=}"
 
 base="$(realpath "$(dirname "$0")/..")"
-ingest_bin="$base/ingest/bin"
+ingest_vendored="$base/ingest/vendored"
 
 build_name="${1:?A build name is required as the first argument.}"
 slack_ts_output="${2:?A Slack thread timestamp file is required as the second argument}"
@@ -29,7 +29,7 @@ if [[ -n "${AWS_BATCH_JOB_ID}" ]]; then
   message+=" Follow along in your local \`monkeypox\` repo with: "'```'"nextstrain build --aws-batch --no-download --attach ${AWS_BATCH_JOB_ID} . "'```'
 fi
 
-"$ingest_bin"/notify-slack "$message" --output="$slack_response"
+"$ingest_vendored"/notify-slack "$message" --output="$slack_response"
 
 echo "Saving Slack thread timestamp to '$slack_ts_output'."
 
diff --git a/bin/notify-on-success b/bin/notify-on-success
index f4b21c37..69b85c8f 100755
--- a/bin/notify-on-success
+++ b/bin/notify-on-success
@@ -5,10 +5,10 @@ set -euo pipefail
 : "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}"
 
 base="$(realpath "$(dirname "$0")/..")"
-ingest_bin="$base/ingest/bin"
+ingest_vendored="$base/ingest/vendored"
 
 slack_ts_file="${1:?A Slack thread timestamp file is required as the first argument.}"
 
 echo "Notifying Slack about successful build."
-"$ingest_bin"/notify-slack "✅ This pipeline has successfully finished 🎉" \
+"$ingest_vendored"/notify-slack "✅ This pipeline has successfully finished 🎉" \
   --thread-ts="$(cat "$slack_ts_file")"
diff --git a/ingest/bin/notify-on-diff b/ingest/bin/notify-on-diff
index 32464801..e401f124 100755
--- a/ingest/bin/notify-on-diff
+++ b/ingest/bin/notify-on-diff
@@ -27,10 +27,10 @@ diff "$dst_local" "$src" > "$diff" || diff_exit_code=$?
 
 if [[ "$diff_exit_code" -eq 1 ]]; then
     echo "Notifying Slack about diff."
-    "$bin"/notify-slack --upload "$src.diff" < "$diff"
+    "$vendored"/notify-slack --upload "$src.diff" < "$diff"
 elif [[ "$diff_exit_code" -gt 1 ]]; then
     echo "Notifying Slack about diff failure"
-    "$bin"/notify-slack "Diff failed for $src"
+    "$vendored"/notify-slack "Diff failed for $src"
 else
     echo "No change in $src."
 fi
diff --git a/ingest/bin/notify-on-job-fail b/ingest/bin/notify-on-job-fail
deleted file mode 100755
index 23d3a926..00000000
--- a/ingest/bin/notify-on-job-fail
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-set -euo pipefail
-
-: "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}"
-: "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}"
-
-: "${AWS_BATCH_JOB_ID:=}"
-: "${GITHUB_RUN_ID:=}"
-
-bin="$(dirname "$0")"
-
-echo "Notifying Slack about failed ingest job."
-message="❌ Ingest job has FAILED 😞 "
-
-if [ -n "${AWS_BATCH_JOB_ID}" ]; then
-    message+="See AWS Batch job \`${AWS_BATCH_JOB_ID}\` (<https://console.aws.amazon.com/batch/v2/home?region=us-east-1#jobs/detail/${AWS_BATCH_JOB_ID}|link>) for error details. "
-elif [ -n "${GITHUB_RUN_ID}" ]; then
-    message+="See GitHub Action <https://github.com/nextstrain/monkeypox/actions/runs/${GITHUB_RUN_ID}?check_suite_focus=true|${GITHUB_RUN_ID}> for error details. "
-fi
-
-"$bin"/notify-slack "$message"
diff --git a/ingest/bin/notify-on-job-start b/ingest/bin/notify-on-job-start
deleted file mode 100755
index 9410fa38..00000000
--- a/ingest/bin/notify-on-job-start
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-set -euo pipefail
-
-: "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}"
-: "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}"
-
-: "${AWS_BATCH_JOB_ID:=}"
-: "${GITHUB_RUN_ID:=}"
-
-bin="$(dirname "$0")"
-
-echo "Notifying Slack about started ingest job."
-message="🐵 Monkeypox ingest job has started."
-
-if [[ -n "${GITHUB_RUN_ID}" ]]; then
-  message+=" The job was submitted by GitHub Action <https://github.com/nextstrain/monkeypox/actions/runs/${GITHUB_RUN_ID}?check_suite_focus=true|${GITHUB_RUN_ID}>."
-fi
-
-if [[ -n "${AWS_BATCH_JOB_ID}" ]]; then
-  message+=" The job was launched as AWS Batch job \`${AWS_BATCH_JOB_ID}\` (<https://console.aws.amazon.com/batch/v2/home?region=us-east-1#jobs/detail/${AWS_BATCH_JOB_ID}|link>)."
-  message+=" Follow along in your local \`monkeypox\` repo with: "'```'"nextstrain build --aws-batch --no-download --attach ${AWS_BATCH_JOB_ID} ingest/"'```'
-fi
-
-"$bin"/notify-slack "$message"
diff --git a/ingest/bin/notify-on-record-change b/ingest/bin/notify-on-record-change
index 6487fbcb..3987a192 100755
--- a/ingest/bin/notify-on-record-change
+++ b/ingest/bin/notify-on-record-change
@@ -52,4 +52,4 @@ fi
 
 slack_message+=" (Total record count: $src_record_count)"
 
-"$bin"/notify-slack "$slack_message"
+"$vendored"/notify-slack "$slack_message"
diff --git a/ingest/bin/notify-slack b/ingest/bin/notify-slack
deleted file mode 100755
index 6ca20dec..00000000
--- a/ingest/bin/notify-slack
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/bash
-# Originally copied from nextstrain/ncov-ingest repo
-set -euo pipefail
-
-: "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}"
-: "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}"
-
-upload=0
-output=/dev/null
-thread_ts=""
-broadcast=0
-args=()
-
-for arg; do
-    case "$arg" in
-        --upload)
-            upload=1;;
-        --output=*)
-            output="${arg#*=}";;
-        --thread-ts=*)
-            thread_ts="${arg#*=}";;
-        --broadcast)
-            broadcast=1;;
-        *)
-            args+=("$arg");;
-    esac
-done
-
-set -- "${args[@]}"
-
-text="${1:?Some message text is required.}"
-
-if [[ "$upload" == 1 ]]; then
-    echo "Uploading data to Slack with the message: $text"
-    curl https://slack.com/api/files.upload \
-        --header "Authorization: Bearer $SLACK_TOKEN" \
-        --form-string channels="$SLACK_CHANNELS" \
-        --form-string title="$text" \
-        --form-string filename="$text" \
-        --form-string thread_ts="$thread_ts" \
-        --form-string reply_broadcast="$broadcast" \
-        --form file=@/dev/stdin \
-        --form filetype=text \
-        --fail --silent --show-error \
-        --http1.1 \
-        --output "$output"
-else
-    echo "Posting Slack message: $text"
-    curl https://slack.com/api/chat.postMessage \
-        --header "Authorization: Bearer $SLACK_TOKEN" \
-        --form-string channel="$SLACK_CHANNELS" \
-        --form-string text="$text" \
-        --form-string thread_ts="$thread_ts" \
-        --form-string reply_broadcast="$broadcast" \
-        --fail --silent --show-error \
-        --http1.1 \
-        --output "$output"
-fi
diff --git a/ingest/bin/trigger-on-new-data b/ingest/bin/trigger-on-new-data
index 86f40e2f..97c85ff7 100755
--- a/ingest/bin/trigger-on-new-data
+++ b/ingest/bin/trigger-on-new-data
@@ -26,6 +26,6 @@ else
 fi
 
 
-if ! "$bin"/notify-slack "$slack_message"; then
+if ! "$vendored"/notify-slack "$slack_message"; then
     echo "Notifying Slack failed, but exiting with success anyway."
 fi
diff --git a/ingest/bin/upload-to-s3 b/ingest/bin/upload-to-s3
index 2c0394f8..cc867755 100755
--- a/ingest/bin/upload-to-s3
+++ b/ingest/bin/upload-to-s3
@@ -57,7 +57,7 @@ main() {
             exit 0
         fi
 
-        if ! "$bin"/notify-slack "Updated $dst available."; then
+        if ! "$vendored"/notify-slack "Updated $dst available."; then
             echo "Notifying Slack failed, but exiting with success anyway."
         fi
     else
diff --git a/ingest/workflow/snakemake_rules/slack_notifications.smk b/ingest/workflow/snakemake_rules/slack_notifications.smk
index b4a54753..d19b848b 100644
--- a/ingest/workflow/snakemake_rules/slack_notifications.smk
+++ b/ingest/workflow/snakemake_rules/slack_notifications.smk
@@ -48,8 +48,8 @@ rule notify_on_metadata_diff:
 
 
 onstart:
-    shell("./bin/notify-on-job-start")
+    shell("./vendored/notify-on-job-start Ingest nextstrain/monkeypox")
 
 
 onerror:
-    shell("./bin/notify-on-job-fail")
+    shell("./vendored/notify-on-job-fail Ingest nextstrain/monkeypox")

From a8dd9f682ea8ccdc28e48343b7b0f00262b52220 Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Fri, 28 Jul 2023 14:37:56 -0700
Subject: [PATCH 26/26] Remove unused bin variable

---
 ingest/bin/trigger-on-new-data | 1 -
 ingest/bin/upload-to-s3        | 1 -
 2 files changed, 2 deletions(-)

diff --git a/ingest/bin/trigger-on-new-data b/ingest/bin/trigger-on-new-data
index 97c85ff7..1f3e0bac 100755
--- a/ingest/bin/trigger-on-new-data
+++ b/ingest/bin/trigger-on-new-data
@@ -3,7 +3,6 @@ set -euo pipefail
 
 : "${PAT_GITHUB_DISPATCH:?The PAT_GITHUB_DISPATCH environment variable is required.}"
 
-bin="$(dirname "$0")"
 vendored="$(dirname "$0")"/../vendored
 
 metadata="${1:?A metadata upload output file is required as the first argument.}"
diff --git a/ingest/bin/upload-to-s3 b/ingest/bin/upload-to-s3
index cc867755..d913182f 100755
--- a/ingest/bin/upload-to-s3
+++ b/ingest/bin/upload-to-s3
@@ -2,7 +2,6 @@
 # Originally copied from nextstrain/ncov-ingest repo
 set -euo pipefail
 
-bin="$(dirname "$0")"
 vendored="$(dirname "$0")"/../vendored
 
 main() {