filter: Update working directory of cram tests

$TMP is the test runner's temp directory, which is shared across all individual tests¹. This means files must be removed before running the next test to ensure a clean slate. On the other hand, the initial working directory of each test² is a directory within $TMP which is truly temporary per test file. I updated all tests to use this initial working directory as a temporary directory. A summary of the changes: 1. Change the working directory from "$TESTDIR/../../" (tests/functional/) to the default initial working directory. 2. Update references to files in tests/functional/filter/data, since those had relied on the parent folder as the working directory. 3. Remove directory changes in the "setup" section, simplifying that to one command creating the AUGUR alias relative to $TESTDIR. 4. Remove rm commands used to clean up output files, since the working directory is a per-test temporary directory. 5. filter-metadata-sequence-strains-mismatch.t: Update the diff check to a direct check of file contents, since the resolved $TESTDIR must be matched by regex. ¹ https://github.com/brodie/cram/blob/61212ab78a88ce4a18eee4e26c89bfe086b28e78/cram/_main.py#L185 ² https://github.com/brodie/cram/blob/61212ab78a88ce4a18eee4e26c89bfe086b28e78/cram/_run.py#L67-L70
nextstrain · Jan 23, 2023 · d2ea4f0 · d2ea4f0
1 parent e4614f2
commit d2ea4f0
Show file tree

Hide file tree

Showing 34 changed files with 250 additions and 300 deletions.
diff --git a/tests/functional/filter/cram/_setup.sh b/tests/functional/filter/cram/_setup.sh
diff --git a/tests/functional/filter/cram/filter-exclude-include.t b/tests/functional/filter/cram/filter-exclude-include.t
@@ -1,17 +1,15 @@
 Setup
 
-  $ pushd "$TESTDIR" > /dev/null
-  $ source _setup.sh
+  $ export AUGUR="$TESTDIR/../../../../bin/augur"
 
 Filter with exclude query for two regions that comprise all but one strain.
 This filter should leave a single record from Oceania.
 Force include one South American record by country to get two total records.
 
   $ ${AUGUR} filter \
-  >  --metadata filter/data/metadata.tsv \
+  >  --metadata "$TESTDIR/../data/metadata.tsv" \
   >  --exclude-where "region=South America" "region=North America" "region=Southeast Asia" \
   >  --include-where "country=Ecuador" \
-  >  --output-strains "$TMP/filtered_strains.txt" > /dev/null
-  $ wc -l "$TMP/filtered_strains.txt"
+  >  --output-strains filtered_strains.txt > /dev/null
+  $ wc -l filtered_strains.txt
   \s*2 .* (re)
-  $ rm -f "$TMP/filtered_strains.txt"
diff --git a/tests/functional/filter/cram/filter-force-include-no-duplicates.t b/tests/functional/filter/cram/filter-force-include-no-duplicates.t
@@ -1,22 +1,21 @@
 Setup
 
-  $ pushd "$TESTDIR" > /dev/null
-  $ source _setup.sh
+  $ export AUGUR="$TESTDIR/../../../../bin/augur"
 
 
 Test that a force-included strain is only output once.
 
 
 Create some files for testing.
 
-  $ cat >$TMP/metadata.tsv <<~~
+  $ cat >metadata.tsv <<~~
   > strain	col
   > a	1
   > b	2
   > c	3
   > d	4
   > ~~
-  $ cat >$TMP/sequences.fasta <<~~
+  $ cat >sequences.fasta <<~~
   > >a
   > NNNN
   > >b
@@ -30,26 +29,26 @@ Create some files for testing.
 Test all outputs with --include-where.
 
   $ ${AUGUR} filter \
-  >   --metadata $TMP/metadata.tsv \
-  >   --sequences $TMP/sequences.fasta \
+  >   --metadata metadata.tsv \
+  >   --sequences sequences.fasta \
   >   --subsample-max-sequences 4 \
   >   --include-where col=1 \
   >   --subsample-seed 0 \
-  >   --output-metadata $TMP/metadata-filtered.tsv \
-  >   --output-strains $TMP/strains-filtered.txt \
-  >   --output-sequences $TMP/sequences-filtered.fasta \
+  >   --output-metadata metadata-filtered.tsv \
+  >   --output-strains strains-filtered.txt \
+  >   --output-sequences sequences-filtered.fasta \
   >   > /dev/null 2>&1
-  $ cat $TMP/metadata-filtered.tsv | tail -n+2 | sort -k1
+  $ cat metadata-filtered.tsv | tail -n+2 | sort -k1
   a\t1 (esc)
   b\t2 (esc)
   c\t3 (esc)
   d\t4 (esc)
-  $ cat $TMP/strains-filtered.txt | sort
+  $ cat strains-filtered.txt | sort
   a
   b
   c
   d
-  $ cat $TMP/sequences-filtered.fasta
+  $ cat sequences-filtered.fasta
   >a
   NNNN
   >b
@@ -61,30 +60,30 @@ Test all outputs with --include-where.
 
 Test all outputs with --include.
 
-  $ cat >$TMP/include.txt <<~~
+  $ cat >include.txt <<~~
   > a
   > ~~
   $ ${AUGUR} filter \
-  >   --metadata $TMP/metadata.tsv \
-  >   --sequences $TMP/sequences.fasta \
+  >   --metadata metadata.tsv \
+  >   --sequences sequences.fasta \
   >   --subsample-max-sequences 4 \
-  >   --include $TMP/include.txt \
+  >   --include include.txt \
   >   --subsample-seed 0 \
-  >   --output-metadata $TMP/metadata-filtered.tsv \
-  >   --output-strains $TMP/strains-filtered.txt \
-  >   --output-sequences $TMP/sequences-filtered.fasta \
+  >   --output-metadata metadata-filtered.tsv \
+  >   --output-strains strains-filtered.txt \
+  >   --output-sequences sequences-filtered.fasta \
   >   > /dev/null 2>&1
-  $ cat $TMP/metadata-filtered.tsv | tail -n+2 | sort -k1
+  $ cat metadata-filtered.tsv | tail -n+2 | sort -k1
   a\t1 (esc)
   b\t2 (esc)
   c\t3 (esc)
   d\t4 (esc)
-  $ cat $TMP/strains-filtered.txt | sort
+  $ cat strains-filtered.txt | sort
   a
   b
   c
   d
-  $ cat $TMP/sequences-filtered.fasta
+  $ cat sequences-filtered.fasta
   >a
   NNNN
   >b

diff --git a/tests/functional/filter/cram/filter-metadata-duplicates-error.t b/tests/functional/filter/cram/filter-metadata-duplicates-error.t
@@ -1,11 +1,10 @@
 Setup
 
-  $ pushd "$TESTDIR" > /dev/null
-  $ source _setup.sh
+  $ export AUGUR="$TESTDIR/../../../../bin/augur"
 
 Error on duplicates in metadata within same chunk.
 
-  $ cat >$TMP/metadata-duplicates.tsv <<~~
+  $ cat >metadata-duplicates.tsv <<~~
   > strain	date
   > a	2010-10-10
   > a	2010-10-10
@@ -14,31 +13,31 @@ Error on duplicates in metadata within same chunk.
   > d	2010-10-10
   > ~~
   $ ${AUGUR} filter \
-  >   --metadata $TMP/metadata-duplicates.tsv \
+  >   --metadata metadata-duplicates.tsv \
   >   --group-by year \
   >   --sequences-per-group 2 \
   >   --subsample-seed 0 \
   >   --metadata-chunk-size 10 \
-  >   --output-metadata $TMP/metadata-filtered.tsv > /dev/null
+  >   --output-metadata metadata-filtered.tsv > /dev/null
   ERROR: The following strains are duplicated in .* (re)
   a
   [2]
-  $ cat $TMP/metadata-filtered.tsv
+  $ cat metadata-filtered.tsv
   cat: .*: No such file or directory (re)
   [1]
 
 Error on duplicates in metadata in separate chunks.
 
   $ ${AUGUR} filter \
-  >   --metadata $TMP/metadata-duplicates.tsv \
+  >   --metadata metadata-duplicates.tsv \
   >   --group-by year \
   >   --sequences-per-group 2 \
   >   --subsample-seed 0 \
   >   --metadata-chunk-size 1 \
-  >   --output-metadata $TMP/metadata-filtered.tsv > /dev/null
+  >   --output-metadata metadata-filtered.tsv > /dev/null
   ERROR: The following strains are duplicated in .* (re)
   a
   [2]
-  $ cat $TMP/metadata-filtered.tsv
+  $ cat metadata-filtered.tsv
   cat: .*: No such file or directory (re)
   [1]
diff --git a/tests/functional/filter/cram/filter-metadata-not-found-error.t b/tests/functional/filter/cram/filter-metadata-not-found-error.t
@@ -1,14 +1,13 @@
 Setup
 
-  $ pushd "$TESTDIR" > /dev/null
-  $ source _setup.sh
+  $ export AUGUR="$TESTDIR/../../../../bin/augur"
 
 Try to filter on an metadata file that does not exist.
 
   $ ${AUGUR} filter \
   >  --metadata file-does-not-exist.tsv \
   >  --group-by year month \
   >  --sequences-per-group 1 \
-  >  --output-strains "$TMP/filtered_strains.txt" > /dev/null
+  >  --output-strains filtered_strains.txt > /dev/null
   ERROR: No such file or directory: 'file-does-not-exist.tsv'
   [2]
diff --git a/tests/functional/filter/cram/filter-metadata-sequence-strains-mismatch.t b/tests/functional/filter/cram/filter-metadata-sequence-strains-mismatch.t
@@ -1,7 +1,6 @@
 Setup
 
-  $ pushd "$TESTDIR" > /dev/null
-  $ source _setup.sh
+  $ export AUGUR="$TESTDIR/../../../../bin/augur"
 
 Confirm that filtering omits strains without metadata or sequences.
 The input sequences are missing one strain that is in the metadata.
@@ -10,20 +9,25 @@ The list of strains to include has one strain with no metadata/sequence and one
 The query initially filters 3 strains from Colombia, one of which is added back by the include.
 
   $ ${AUGUR} filter \
-  >  --sequence-index filter/data/sequence_index.tsv \
-  >  --metadata filter/data/metadata.tsv \
+  >  --sequence-index "$TESTDIR/../data/sequence_index.tsv" \
+  >  --metadata "$TESTDIR/../data/metadata.tsv" \
   >  --query "country != 'Colombia'" \
   >  --non-nucleotide \
   >  --exclude-ambiguous-dates-by year \
-  >  --include filter/data/include.txt \
-  >  --output-strains "$TMP/filtered_strains.txt" \
-  >  --output-log "$TMP/filtered_log.tsv"
+  >  --include "$TESTDIR/../data/include.txt" \
+  >  --output-strains filtered_strains.txt \
+  >  --output-log filtered_log.tsv
   4 strains were dropped during filtering
   \t1 had no metadata (esc)
   \t1 had no sequence data (esc)
   \t3 of these were filtered out by the query: "country != 'Colombia'" (esc)
-  \t1 strains were added back because they were in filter/data/include.txt (esc)
+  \\t1 strains were added back because they were in .*include\.txt.* (re)
   9 strains passed all filters
 
-  $ diff -u <(sort -k 1,1 filter/data/filtered_log.tsv) <(sort -k 1,1 "$TMP/filtered_log.tsv")
-  $ rm -f "$TMP/filtered_strains.txt"
+  $ sort -k 1,1 filtered_log.tsv
+  COL/FLR_00008/2015\tfilter_by_query\t"[[""query"", ""country != 'Colombia'""]]" (esc)
+  COL\/FLR_00008\/2015\\tforce_include_strains\\t"\[\[""include_file"", "".*""\]\]".* (re)
+  COL/FLR_00024/2015\tfilter_by_query\t"[[""query"", ""country != 'Colombia'""]]" (esc)
+  Colombia/2016/ZC204Se\tfilter_by_query\t"[[""query"", ""country != 'Colombia'""]]" (esc)
+  HND/2016/HU_ME59\tfilter_by_sequence_index\t[] (esc)
+  strain\tfilter\tkwargs (esc)
diff --git a/tests/functional/filter/cram/filter-min-date.t b/tests/functional/filter/cram/filter-min-date.t
@@ -1,13 +1,11 @@
 Setup
 
-  $ pushd "$TESTDIR" > /dev/null
-  $ source _setup.sh
+  $ export AUGUR="$TESTDIR/../../../../bin/augur"
 
 Filter using only metadata without a sequence index.
 This should work because the requested filters don't rely on sequence information.
 
   $ ${AUGUR} filter \
-  >  --metadata filter/data/metadata.tsv \
+  >  --metadata "$TESTDIR/../data/metadata.tsv" \
   >  --min-date 2012 \
-  >  --output-strains "$TMP/filtered_strains.txt" > /dev/null
-  $ rm -f "$TMP/filtered_strains.txt"
+  >  --output-strains filtered_strains.txt > /dev/null
diff --git a/tests/functional/filter/cram/filter-min-length-no-sequence-index-error.t b/tests/functional/filter/cram/filter-min-length-no-sequence-index-error.t
@@ -1,14 +1,13 @@
 Setup
 
-  $ pushd "$TESTDIR" > /dev/null
-  $ source _setup.sh
+  $ export AUGUR="$TESTDIR/../../../../bin/augur"
 
 Try to filter using only metadata without a sequence index.
 This should fail because the requested filters rely on sequence information.
 
   $ ${AUGUR} filter \
-  >  --metadata filter/data/metadata.tsv \
+  >  --metadata "$TESTDIR/../data/metadata.tsv" \
   >  --min-length 10000 \
-  >  --output-strains "$TMP/filtered_strains.txt" > /dev/null
+  >  --output-strains filtered_strains.txt > /dev/null
   ERROR: You need to provide a sequence index or sequences to filter on sequence-specific information.
   [2]
diff --git a/tests/functional/filter/cram/filter-min-length-output-metadata.t b/tests/functional/filter/cram/filter-min-length-output-metadata.t
@@ -1,19 +1,18 @@
 Setup
 
-  $ pushd "$TESTDIR" > /dev/null
-  $ source _setup.sh
+  $ export AUGUR="$TESTDIR/../../../../bin/augur"
 
 Filter using only metadata without sequence input or output and save results as filtered metadata.
 
   $ ${AUGUR} filter \
-  >  --sequence-index filter/data/sequence_index.tsv \
-  >  --metadata filter/data/metadata.tsv \
+  >  --sequence-index "$TESTDIR/../data/sequence_index.tsv" \
+  >  --metadata "$TESTDIR/../data/metadata.tsv" \
   >  --min-date 2012 \
   >  --min-length 10500 \
-  >  --output-metadata "$TMP/filtered_metadata.tsv" > /dev/null
+  >  --output-metadata filtered_metadata.tsv > /dev/null
 
 Output should include the 8 sequences matching the filters and a header line.
 
-  $ wc -l "$TMP/filtered_metadata.tsv"
+  $ wc -l filtered_metadata.tsv
   \s*9 .* (re)
-  $ rm -f "$TMP/filtered_metadata.tsv"
+  $ rm -f filtered_metadata.tsv
diff --git a/tests/functional/filter/cram/filter-min-length-output-strains.t b/tests/functional/filter/cram/filter-min-length-output-strains.t
@@ -1,19 +1,17 @@
 Setup
 
-  $ pushd "$TESTDIR" > /dev/null
-  $ source _setup.sh
+  $ export AUGUR="$TESTDIR/../../../../bin/augur"
 
 Filter using only metadata and save results as a list of filtered strains.
 
   $ ${AUGUR} filter \
-  >  --sequence-index filter/data/sequence_index.tsv \
-  >  --metadata filter/data/metadata.tsv \
+  >  --sequence-index "$TESTDIR/../data/sequence_index.tsv" \
+  >  --metadata "$TESTDIR/../data/metadata.tsv" \
   >  --min-date 2012 \
   >  --min-length 10500 \
-  >  --output-strains "$TMP/filtered_strains.txt" > /dev/null
+  >  --output-strains filtered_strains.txt > /dev/null
 
 Output should include only the 8 sequences matching the filters (without a header line).
 
-  $ wc -l "$TMP/filtered_strains.txt"
+  $ wc -l filtered_strains.txt
   \s*8 .* (re)
-  $ rm -f "$TMP/filtered_strains.txt"
diff --git a/tests/functional/filter/cram/filter-min-max-date-output.t b/tests/functional/filter/cram/filter-min-max-date-output.t
@@ -1,15 +1,14 @@
 Setup
 
-  $ pushd "$TESTDIR" > /dev/null
-  $ source _setup.sh
+  $ export AUGUR="$TESTDIR/../../../../bin/augur"
 
 Check output of min/max date filters.
 
   $ ${AUGUR} filter \
-  >  --metadata filter/data/metadata.tsv \
+  >  --metadata "$TESTDIR/../data/metadata.tsv" \
   >  --min-date 2015-01-01 \
   >  --max-date 2016-02-01 \
-  >  --output-metadata "$TMP/filtered_metadata.tsv"
+  >  --output-metadata filtered_metadata.tsv
   8 strains were dropped during filtering
   \t1 of these were dropped because they were earlier than 2015.0 or missing a date (esc)
   \t7 of these were dropped because they were later than 2016.09 or missing a date (esc)