From fc6c6cc45c50d8c73c9c0a6ebe652741cfbe58e0 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Fri, 2 Feb 2024 12:31:08 -0800 Subject: [PATCH] fixup: add two tests (1) use 'name' (2) use first column --- tests/functional/parse.t | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/functional/parse.t b/tests/functional/parse.t index a79b9deb8..43f9b00ed 100644 --- a/tests/functional/parse.t +++ b/tests/functional/parse.t @@ -75,6 +75,39 @@ Parse Zika sequences into sequences and metadata, preferred default ids is 'stra $ diff -u "parse/sequences.fasta" "$TMP/sequences.fasta" $ rm -f "$TMP/sequences.fasta" "$TMP/metadata.tsv" +Parse Zika sequences into sequences and metadata when there is no 'strain' field. +This should use the 2nd entry in DEFAULT_ID_COLUMNS ('strain', 'name') instead. + + $ ${AUGUR} parse \ + > --sequences parse/zika.fasta \ + > --output-sequences "$TMP/sequences.fasta" \ + > --output-metadata "$TMP/metadata.tsv" \ + > --fields col1 virus name date region country division city db segment authors url title journal paper_url \ + > --prettify-fields region country division city \ + > --fix-dates monthfirst + + $ diff -u "parse/sequences_other.fasta" "$TMP/sequences.fasta" + $ rm -f "$TMP/sequences.fasta" "$TMP/metadata.tsv" + +Parse Zika sequences into sequences and metadata when no output-id-field is provided and none of the fields match DEFAULT_ID_COLUMNS (e.g. ('strain', 'name')). +This should use the first field as the id field and the metadata should not have an extra strain or name column. + + $ ${AUGUR} parse \ + > --sequences parse/zika.fasta \ + > --output-sequences "$TMP/sequences.fasta" \ + > --output-metadata "$TMP/metadata.tsv" \ + > --fields col1 virus col3 date region country division city db segment authors url title journal paper_url \ + > --prettify-fields region country division city \ + > --fix-dates monthfirst + + $ diff -u "parse/sequences.fasta" "$TMP/sequences.fasta" + $ diff "parse/metadata.tsv" "$TMP/metadata.tsv" | tr '>' '+' | tr '<' '-' + 1c1 + - strain\tvirus\taccession\tdate\tregion\tcountry\tdivision\tcity\tdb\tsegment\tauthors\turl\ttitle\tjournal\tpaper_url (esc) + --- + + col1\tvirus\tcol3\tdate\tregion\tcountry\tdivision\tcity\tdb\tsegment\tauthors\turl\ttitle\tjournal\tpaper_url (esc) + $ rm -f "$TMP/sequences.fasta" "$TMP/metadata.tsv" + Parse compressed Zika sequences into sequences and metadata. $ ${AUGUR} parse \