curate: Update Cram tests according to #1133

nextstrain · Jun 12, 2023 · 58fa744 · 58fa744
1 parent c0e3084
commit 58fa744
Show file tree

Hide file tree

Showing 8 changed files with 78 additions and 84 deletions.
diff --git a/tests/functional/curate/cram/_setup.sh b/tests/functional/curate/cram/_setup.sh
@@ -0,0 +1 @@
+export AUGUR="${AUGUR:-$TESTDIR/../../../../bin/augur}"
diff --git a/tests/functional/curate/cram/format_dates.t b/tests/functional/curate/cram/format_dates.t
@@ -1,17 +1,16 @@
 Setup
 
-  $ pushd "$TESTDIR" > /dev/null
-  $ export AUGUR="${AUGUR:-../../../../bin/augur}"
+  $ source "$TESTDIR"/_setup.sh
 
 Create NDJSON file for testing format_dates with different forms
 
-  $ cat >$TMP/records.ndjson <<~~
+  $ cat >records.ndjson <<~~
   > {"record": 1, "date": "2020", "collectionDate": "2020-01", "releaseDate": "2020-01","updateDate": "2020-07-18T00:00:00Z"}
   > ~~
 
 Test output with matching expected date formats.
 
-  $ cat $TMP/records.ndjson \
+  $ cat records.ndjson \
   >   | ${AUGUR} curate format-dates \
   >     --date-fields "date" "collectionDate" "releaseDate" "updateDate" \
   >     --expected-date-formats "%Y" "%Y-%m" "%Y-%m-%dT%H:%M:%SZ"
@@ -20,7 +19,7 @@ Test output with matching expected date formats.
 Test output with unmatched expected date formats with default `ERROR_FIRST` failure reporting.
 This is expected to fail with an error, so redirecting stdout since we don't care about the output.
 
-  $ cat $TMP/records.ndjson \
+  $ cat records.ndjson \
   >   | ${AUGUR} curate format-dates \
   >     --date-fields "date" "collectionDate" "releaseDate" "updateDate" \
   >     --expected-date-formats "%Y" "%Y-%m-%dT%H:%M:%SZ" 1> /dev/null
@@ -30,7 +29,7 @@ This is expected to fail with an error, so redirecting stdout since we don't car
 Test output with unmatched expected date formats with `ERROR_ALL` failure reporting.
 This is expected to fail with an error, so redirecting stdout since we don't care about the output.
 
-  $ cat $TMP/records.ndjson \
+  $ cat records.ndjson \
   >   | ${AUGUR} curate format-dates \
   >     --date-fields "date" "collectionDate" "releaseDate" "updateDate" \
   >     --expected-date-formats "%Y" "%Y-%m-%dT%H:%M:%SZ" \
@@ -43,7 +42,7 @@ This is expected to fail with an error, so redirecting stdout since we don't car
 Test output with unmatched expected date formats while warning on failures.
 This is expected to print warnings for failures and return the masked date strings for failures.
 
-  $ cat $TMP/records.ndjson \
+  $ cat records.ndjson \
   >   | ${AUGUR} curate format-dates \
   >     --date-fields "date" "collectionDate" "releaseDate" "updateDate" \
   >     --expected-date-formats "%Y" "%Y-%m-%dT%H:%M:%SZ" \
@@ -58,7 +57,7 @@ This is expected to print warnings for failures and return the masked date strin
 Test output with unmatched expected date formats while silencing failures.
 This is expected to return the masked date strings for failures.
 
-  $ cat $TMP/records.ndjson \
+  $ cat records.ndjson \
   >   | ${AUGUR} curate format-dates \
   >     --date-fields "date" "collectionDate" "releaseDate" "updateDate" \
   >     --expected-date-formats "%Y" "%Y-%m-%dT%H:%M:%SZ" \
@@ -68,7 +67,7 @@ This is expected to return the masked date strings for failures.
 Test output with unmatched expected date formats while silencing failures with `--no-mask-failure`.
 This is expected to return the date strings in their original format.
 
-  $ cat $TMP/records.ndjson \
+  $ cat records.ndjson \
   >   | ${AUGUR} curate format-dates \
   >     --date-fields "date" "collectionDate" "releaseDate" "updateDate" \
   >     --expected-date-formats "%Y" "%Y-%m-%dT%H:%M:%SZ" \
@@ -80,7 +79,7 @@ Test output with multiple matching expected date formats.
 Date with multiple matches will be parsed according to first matching format.
 The "collectionDate" and "releaseDate" will match the first "%Y-%j" format, which is a complete date.
 
-  $ cat $TMP/records.ndjson \
+  $ cat records.ndjson \
   >   | ${AUGUR} curate format-dates \
   >     --date-fields "date" "collectionDate" "releaseDate" "updateDate" \
   >     --expected-date-formats "%Y" "%Y-%j" "%Y-%m" "%Y-%m-%dT%H:%M:%SZ"
@@ -90,7 +89,7 @@ Test output with chained format-dates commands that parses different fields with
 Since "collectionDate" and "releaseDate" have expected formats overlap,
 we can split them into two chained commands that parses them with different expected formats to produce the desired results.
 
-  $ cat $TMP/records.ndjson \
+  $ cat records.ndjson \
   >   | ${AUGUR} curate format-dates \
   >     --date-fields "date" "releaseDate" "updateDate" \
   >     --expected-date-formats "%Y" "%Y-%m" "%Y-%m-%dT%H:%M:%SZ" \

diff --git a/tests/functional/curate/cram/metadata-and-fasta-input.t b/tests/functional/curate/cram/metadata-and-fasta-input.t
@@ -1,14 +1,13 @@
 Setup
 
-  $ pushd "$TESTDIR" > /dev/null
-  $ export AUGUR="${AUGUR:-../../../../bin/augur}"
+  $ source "$TESTDIR"/_setup.sh
 
 Testing combined metadata and FASTA inputs for the curate command.
 Running the `passthru` subcommand since it does not do any data transformations.
 
 Create FASTA file for testing.
 
-  $ cat >$TMP/sequences.fasta <<~~
+  $ cat >sequences.fasta <<~~
   > >sequence_A
   > ATCG
   > >sequence_B
@@ -19,7 +18,7 @@ Create FASTA file for testing.
 
 Create metadata TSV file for testing.
 
-  $ cat >$TMP/metadata.tsv <<~~
+  $ cat >metadata.tsv <<~~
   > strain	country	date
   > sequence_A	USA	2020-10-01
   > sequence_B	USA	2020-10-02
@@ -30,7 +29,7 @@ Test metadata input with extra FASTA input options without a FASTA file.
 This is expected to fail with an error.
 
   $ ${AUGUR} curate passthru \
-  > --metadata $TMP/metadata.tsv \
+  > --metadata metadata.tsv \
   > --seq-id-column name \
   > --seq-field sequences
   ERROR: The --seq-id-column and --seq-field options should only be used when providing a FASTA file.
@@ -41,16 +40,16 @@ Test metadata and FASTA inputs without required FASTA input options.
 This is expected to fail with an error.
 
   $ ${AUGUR} curate passthru \
-  > --metadata $TMP/metadata.tsv \
-  > --fasta $TMP/sequences.fasta
+  > --metadata metadata.tsv \
+  > --fasta sequences.fasta
   ERROR: The --seq-id-column and --seq-field options are required for a FASTA file input.
   [2]
 
 Test metadata and FASTA inputs with required FASTA input options.
 
   $ ${AUGUR} curate passthru \
-  > --metadata $TMP/metadata.tsv \
-  > --fasta $TMP/sequences.fasta \
+  > --metadata metadata.tsv \
+  > --fasta sequences.fasta \
   > --seq-id-column strain \
   > --seq-field seq
   {"strain": "sequence_A", "country": "USA", "date": "2020-10-01", "seq": "ATCG"}
@@ -59,17 +58,17 @@ Test metadata and FASTA inputs with required FASTA input options.
 
 Create new metadata file with duplicate and extra metadata records.
 
-  $ cp $TMP/metadata.tsv $TMP/metadata-with-duplicate-and-unmatched-records.tsv
-  $ cat >>$TMP/metadata-with-duplicate-and-unmatched-records.tsv <<~~
+  $ cp metadata.tsv metadata-with-duplicate-and-unmatched-records.tsv
+  $ cat >>metadata-with-duplicate-and-unmatched-records.tsv <<~~
   > sequence_A	USA	2020-10-XX
   > extra_metadata_A	USA	2020-10-01
   > extra_metadata_B	USA	2020-10-02
   > ~~
 
 Create new FASTA file with duplicate and extra sequence records.
 
-  $ cp $TMP/sequences.fasta $TMP/sequences-with-duplicate-and-unmatched-records.fasta
-  $ cat >>$TMP/sequences-with-duplicate-and-unmatched-records.fasta <<~~
+  $ cp sequences.fasta sequences-with-duplicate-and-unmatched-records.fasta
+  $ cat >>sequences-with-duplicate-and-unmatched-records.fasta <<~~
   > >sequence_A
   > NNNN
   > >extra_sequence_A
@@ -82,8 +81,8 @@ Test metadata and FASTA inputs with duplicate and extra records and default `ERR
 This is expected to fail with an error, so redirecting stdout since we don't care about the output.
 
   $ ${AUGUR} curate passthru \
-  > --metadata $TMP/metadata-with-duplicate-and-unmatched-records.tsv \
-  > --fasta $TMP/sequences-with-duplicate-and-unmatched-records.fasta \
+  > --metadata metadata-with-duplicate-and-unmatched-records.tsv \
+  > --fasta sequences-with-duplicate-and-unmatched-records.fasta \
   > --seq-id-column strain \
   > --seq-field seq 1> /dev/null
   ERROR: Encountered sequence record with duplicate id 'sequence_A'.
@@ -93,8 +92,8 @@ Test metadata and FASTA inputs with duplicate and extra records with `ERROR_ALL`
 This is expected to fail with an error, so redirecting stdout since we don't care about the output.
 
   $ ${AUGUR} curate passthru \
-  > --metadata $TMP/metadata-with-duplicate-and-unmatched-records.tsv \
-  > --fasta $TMP/sequences-with-duplicate-and-unmatched-records.fasta \
+  > --metadata metadata-with-duplicate-and-unmatched-records.tsv \
+  > --fasta sequences-with-duplicate-and-unmatched-records.fasta \
   > --seq-id-column strain \
   > --seq-field seq \
   > --unmatched-reporting error_all \
@@ -119,8 +118,8 @@ This is expected run without error and only print a warning.
 Notice the duplicate sequence "sequence_A" will always use the first sequence in the FASTA file because of pyfastx.
 
   $ ${AUGUR} curate passthru \
-  > --metadata $TMP/metadata-with-duplicate-and-unmatched-records.tsv \
-  > --fasta $TMP/sequences-with-duplicate-and-unmatched-records.fasta \
+  > --metadata metadata-with-duplicate-and-unmatched-records.tsv \
+  > --fasta sequences-with-duplicate-and-unmatched-records.fasta \
   > --seq-id-column strain \
   > --seq-field seq \
   > --unmatched-reporting warn \
@@ -150,8 +149,8 @@ Test metadata and FASTA inputs with unmatched records in both, but ask to silent
 Notice the duplicate sequence "sequence_A" will always use the first sequence in the FASTA file because of pyfastx.
 
   $ ${AUGUR} curate passthru \
-  > --metadata $TMP/metadata-with-duplicate-and-unmatched-records.tsv \
-  > --fasta $TMP/sequences-with-duplicate-and-unmatched-records.fasta \
+  > --metadata metadata-with-duplicate-and-unmatched-records.tsv \
+  > --fasta sequences-with-duplicate-and-unmatched-records.fasta \
   > --seq-id-column strain \
   > --seq-field seq \
   > --unmatched-reporting silent \

diff --git a/tests/functional/curate/cram/metadata-and-fasta-output.t b/tests/functional/curate/cram/metadata-and-fasta-output.t
@@ -1,52 +1,51 @@
 Setup
 
-  $ pushd "$TESTDIR" > /dev/null
-  $ export AUGUR="${AUGUR:-../../../../bin/augur}"
+  $ source "$TESTDIR"/_setup.sh
 
 Testing combined metadata and FASTA output for the curate command.
 Running the `passthru` subcommand since it does not do any data transformations.
 
 Create NDJSON file for testing.
 
-  $ cat >$TMP/records.ndjson <<~~
+  $ cat >records.ndjson <<~~
   > {"strain": "sequence_A", "country": "USA", "date": "2020-10-01", "sequence": "AAAA"}
   > {"strain": "sequence_T", "country": "USA", "date": "2020-10-02", "sequence": "TTTT"}
   > {"strain": "sequence_C", "country": "USA", "date": "2020-10-03", "sequence": "CCCC"}
   > ~~
 
 Test metadata output with extra FASTA output options.
 This is expected to fail immediately with an error.
-  $ cat $TMP/records.ndjson \
+  $ cat records.ndjson \
   >   | ${AUGUR} curate passthru \
-  >     --output-metadata $TMP/metadata.tsv \
+  >     --output-metadata metadata.tsv \
   >     --output-id-field strain \
   >     --output-seq-field sequence
   ERROR: The --output-id-field and --output-seq-field options should only be used when requesting a FASTA output.
   [2]
 
 Test metadata and FASTA outputs without requried FASTA output options.
 This is expected to fail immediately with an error.
-  $ cat $TMP/records.ndjson \
+  $ cat records.ndjson \
   >   | ${AUGUR} curate passthru \
-  >     --output-metadata $TMP/metadata.tsv \
-  >     --output-fasta $TMP/sequences.fasta
+  >     --output-metadata metadata.tsv \
+  >     --output-fasta sequences.fasta
   ERROR: The --output-id-field and --output-seq-field options are required for a FASTA output.
   [2]
 
 Test metadata and FASTA outputs
 
-  $ cat $TMP/records.ndjson \
+  $ cat records.ndjson \
   >   | ${AUGUR} curate passthru \
-  >     --output-metadata $TMP/metadata.tsv \
-  >     --output-fasta $TMP/sequences.fasta \
+  >     --output-metadata metadata.tsv \
+  >     --output-fasta sequences.fasta \
   >     --output-id-field strain \
   >     --output-seq-field sequence
-  $ cat $TMP/metadata.tsv
+  $ cat metadata.tsv
   strain\tcountry\tdate (esc)
   sequence_A\tUSA\t2020-10-01 (esc)
   sequence_T\tUSA\t2020-10-02 (esc)
   sequence_C\tUSA\t2020-10-03 (esc)
-  $ cat $TMP/sequences.fasta
+  $ cat sequences.fasta
   >sequence_A (esc)
   AAAA (esc)
   >sequence_T (esc)
@@ -56,12 +55,12 @@ Test metadata and FASTA outputs
 
 Test FASTA output without metadata output.
 
-  $ cat $TMP/records.ndjson \
+  $ cat records.ndjson \
   >   | ${AUGUR} curate passthru \
-  >     --output-fasta $TMP/sequences.fasta \
+  >     --output-fasta sequences.fasta \
   >     --output-id-field strain \
   >     --output-seq-field sequence
-  $ cat $TMP/sequences.fasta
+  $ cat sequences.fasta
   >sequence_A (esc)
   AAAA (esc)
   >sequence_T (esc)
@@ -72,9 +71,9 @@ Test FASTA output without metadata output.
 Test FASTA output with bad output id field.
 This is expected to fail with an error.
 
-  $ cat $TMP/records.ndjson \
+  $ cat records.ndjson \
   >   | ${AUGUR} curate passthru \
-  >     --output-fasta $TMP/sequences.fasta \
+  >     --output-fasta sequences.fasta \
   >     --output-id-field bogus_id \
   >     --output-seq-field sequence
   ERROR: Provided sequence identifier field 'bogus_id' does not exist.
@@ -83,9 +82,9 @@ This is expected to fail with an error.
 Test FASTA output with bad output sequence field.
 This is expected to fail with an error.
 
-  $ cat $TMP/records.ndjson \
+  $ cat records.ndjson \
   >   | ${AUGUR} curate passthru \
-  >     --output-fasta $TMP/sequences.fasta \
+  >     --output-fasta sequences.fasta \
   >     --output-id-field strain \
   >     --output-seq-field bogus_sequence
   ERROR: Provided sequence field 'bogus_sequence' does not exist.