diff --git a/Snakefile b/Snakefile index d324ded..56dab5a 100644 --- a/Snakefile +++ b/Snakefile @@ -1,3 +1,6 @@ +if not config: + configfile: "config/config_zika.yaml" + rule all: input: auspice_json = "auspice/zika.json", @@ -17,10 +20,10 @@ rule download: message: "Downloading sequences and metadata from data.nextstrain.org" output: sequences = "data/sequences.fasta.zst", - metadata = "data/metadata.tsv.zst" + metadata = "data/metadata.tsv.zst", params: - sequences_url = "https://data.nextstrain.org/files/zika/sequences.fasta.zst", - metadata_url = "https://data.nextstrain.org/files/zika/metadata.tsv.zst" + sequences_url = "https://data.nextstrain.org/files/workflows/zika/sequences_all.fasta.zst", + metadata_url = "https://data.nextstrain.org/files/workflows/zika/metadata_all.tsv.zst" shell: """ curl -fsSL --compressed {params.sequences_url:q} --output {output.sequences} @@ -30,8 +33,8 @@ rule download: rule decompress: message: "Decompressing sequences and metadata" input: - sequences = "data/sequences.fasta.zst", - metadata = "data/metadata.tsv.zst" + sequences = "data/sequences_all.fasta.zst", + metadata = "data/metadata_all.tsv.zst" output: sequences = "data/sequences.fasta", metadata = "data/metadata.tsv" @@ -41,6 +44,20 @@ rule decompress: zstd -d -c {input.metadata} > {output.metadata} """ +rule wrangle_metadata: + input: + metadata="data/metadata.tsv" + output: + metadata="results/wrangled_metadata.tsv", + params: + strain_id=lambda w: config.get("strain_id_field", "strain"), + shell: + """ + csvtk -t rename -f strain -n strain_original {input.metadata} \ + | csvtk -t mutate -f {params.strain_id} -n strain > {output.metadata} + """ + + rule filter: message: """ @@ -51,8 +68,8 @@ rule filter: - minimum genome length of {params.min_length} (50% of Zika virus genome) """ input: - sequences = rules.decompress.output.sequences, - metadata = rules.decompress.output.metadata, + sequences = "data/sequences.fasta", + metadata = rules.wrangle_metadata.output.metadata, exclude = files.dropped_strains output: sequences = "results/filtered.fasta" @@ -120,7 +137,7 @@ rule refine: input: tree = rules.tree.output.tree, alignment = rules.align.output, - metadata = rules.decompress.output.metadata + metadata = rules.wrangle_metadata.output.metadata output: tree = "results/tree.nwk", node_data = "results/branch_lengths.json" @@ -186,7 +203,7 @@ rule traits: """ input: tree = rules.refine.output.tree, - metadata = rules.decompress.output.metadata + metadata = rules.wrangle_metadata.output.metadata output: node_data = "results/traits.json", params: @@ -207,7 +224,7 @@ rule export: message: "Exporting data files for for auspice" input: tree = rules.refine.output.tree, - metadata = rules.decompress.output.metadata, + metadata = rules.wrangle_metadata.output.metadata, branch_lengths = rules.refine.output.node_data, traits = rules.traits.output.node_data, nt_muts = rules.ancestral.output.node_data, @@ -216,7 +233,8 @@ rule export: auspice_config = files.auspice_config, description = files.description output: - auspice_json = rules.all.input.auspice_json + auspice_json = "results/raw_zika.json", + root_sequence = "results/raw_zika_root-sequence.json", shell: """ augur export v2 \ @@ -230,6 +248,27 @@ rule export: --output {output.auspice_json} """ +rule final_strain_name: + input: + auspice_json=rules.export.output.auspice_json, + metadata=rules.wrangle_metadata.output.metadata, + root_sequence=rules.export.output.root_sequence, + output: + auspice_json=rules.all.input.auspice_json, + root_sequence="auspice/zika_root-sequence.json", + params: + display_strain_field=lambda w: config.get("display_strain_field", "strain"), + shell: + """ + python3 bin/set_final_strain_name.py \ + --metadata {input.metadata} \ + --input-auspice-json {input.auspice_json} \ + --display-strain-name {params.display_strain_field} \ + --output {output.auspice_json} + + cp {input.root_sequence} {output.root_sequence} + """ + rule clean: message: "Removing directories: {params}" params: diff --git a/bin/set_final_strain_name.py b/bin/set_final_strain_name.py new file mode 100755 index 0000000..0036f2a --- /dev/null +++ b/bin/set_final_strain_name.py @@ -0,0 +1,36 @@ +import pandas as pd +import json, argparse + +def replace_name_recursive(node, lookup): + if node["name"] in lookup: + node["name"] = lookup[node["name"]] + + if "children" in node: + for child in node["children"]: + replace_name_recursive(child, lookup) + +if __name__=="__main__": + parser = argparse.ArgumentParser( + description="Swaps out the strain names in the Auspice JSON with the final strain name", + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument('--input-auspice-json', type=str, required=True, help="input auspice_json") + parser.add_argument('--metadata', type=str, required=True, help="input data") + parser.add_argument('--display-strain-name', type=str, required=True, help="field to use as strain name in auspice") + parser.add_argument('--output', type=str, metavar="JSON", required=True, help="output Auspice JSON") + args = parser.parse_args() + + metadata = pd.read_csv(args.metadata, sep='\t') + name_lookup = {} + for ri, row in metadata.iterrows(): + strain_id = row['strain'] + name_lookup[strain_id] = args.display_strain_name if pd.isna(row[args.display_strain_name]) else row[args.display_strain_name] + + with open(args.input_auspice_json, 'r') as fh: + data = json.load(fh) + + replace_name_recursive(data['tree'], name_lookup) + + with open(args.output, 'w') as fh: + json.dump(data, fh) diff --git a/config/auspice_config.json b/config/auspice_config.json index b3ba0c8..be302f4 100644 --- a/config/auspice_config.json +++ b/config/auspice_config.json @@ -5,6 +5,11 @@ ], "build_url": "https://github.com/nextstrain/zika", "colorings": [ + { + "key": "strain_original", + "title": "strain name", + "type": "categorical" + }, { "key": "gt", "title": "genotype", diff --git a/config/config_zika.yaml b/config/config_zika.yaml new file mode 100644 index 0000000..aa8d020 --- /dev/null +++ b/config/config_zika.yaml @@ -0,0 +1,2 @@ +strain_id_field: "accession" +display_strain_field: "strain_original" \ No newline at end of file diff --git a/config/dropped_strains.txt b/config/dropped_strains.txt index 22b5878..746e1ba 100644 --- a/config/dropped_strains.txt +++ b/config/dropped_strains.txt @@ -1,86 +1,87 @@ -PF13/251013_18 # reference included in config/zika_reference.gb -AFMC_U # too basal -AFMC_S # too basal -Boracay/16423 # too basal -JMB_185 # too basal -PHL/2012/CPC_0740 # too basal +MG827392 +KX369547 # PF13/251013_18 # reference included in config/zika_reference.gb +KY553111 # AFMC_U # too basal +KY962729 # AFMC_S # too basal +KY120353 # Boracay/16423 # too basal +KU179098 # JMB_185 # too basal +KU681082 # PHL/2012/CPC_0740 # too basal VIE/Bra/2016 # too basal -Dominican_Republic/2016/PD2 # duplicate of other strain in dataset -GD01 # duplicate of other strain in dataset -GDZ16001 # duplicate of other strain in dataset -VEN/UF_2/2016 # duplicate of other strain in dataset -ZZ_1 # duplicate of other strain in dataset -VR10599/Pavia/2016 # export with unknown origin -34997/Pavia/2016 # export with unknown origin -COL/FLR_00001/2015 # duplicate of COL/FLR/2015 -COL/FLR_00002/2015 # duplicate of COL/FLR/2015 -COL/FLR_00003/2015 # duplicate of COL/FLR/2015 -COL/FLR_00004/2015 # duplicate of COL/FLR/2015 -COL/FLR_00005/2015 # duplicate of COL/FLR/2015 -COL/FLR_00006/2015 # duplicate of COL/FLR/2015 -COL/FLR_00007/2015 # duplicate of COL/FLR/2015 -COL/FLR_00008/2015 # duplicate of COL/FLR/2015 -COL/FLR_00009/2015 # duplicate of COL/FLR/2015 -COL/FLR_00010/2015 # duplicate of COL/FLR/2015 -COL/FLR_00011/2015 # duplicate of COL/FLR/2015 -COL/FLR_00012/2015 # duplicate of COL/FLR/2015 -COL/FLR_00013/2015 # duplicate of COL/FLR/2015 -COL/FLR_00014/2015 # duplicate of COL/FLR/2015 -COL/FLR_00015/2015 # duplicate of COL/FLR/2015 -COL/FLR_00016/2015 # duplicate of COL/FLR/2015 -COL/FLR_00017/2015 # duplicate of COL/FLR/2015 -COL/FLR_00018/2015 # duplicate of COL/FLR/2015 -COL/FLR_00019/2015 # duplicate of COL/FLR/2015 -COL/FLR_00020/2015 # duplicate of COL/FLR/2015 -COL/FLR_00021/2015 # duplicate of COL/FLR/2015 -COL/FLR_00022/2015 # duplicate of COL/FLR/2015 -COL/FLR_00023/2015 # duplicate of COL/FLR/2015 -COL/FLR_00024/2015 # duplicate of COL/FLR/2015 -COL/FLR_00025/2015 # duplicate of COL/FLR/2015 -COL/FLR_00026/2015 # duplicate of COL/FLR/2015 -COL/FLR_00034/2015 # duplicate of COL/FLR/2015 -COL/FLR_00035/2015 # duplicate of COL/FLR/2015 -COL/FLR_00036/2015 # duplicate of COL/FLR/2015 -COL/FLR_00038/2015 # duplicate of COL/FLR/2015 -COL/FLR_00040/2015 # duplicate of COL/FLR/2015 -COL/FLR_00041/2015 # duplicate of COL/FLR/2015 -COL/FLR_00042/2015 # duplicate of COL/FLR/2015 -COL/PRV_00027/2015 # misdated -COL/PRV_00028/2015 # misdated -COL/PAN_00029/2015 # misdated -COL/PAN_00030/2015 # misdated -BRA/2016/FC_DQ12D1 # large indel -Brazil/2016/ZBRX8 # large indel -Brazil/2016/ZBRX11 # large indel -CX17 # large indel -MEX/2016/mex27 # large indel -MEX/2016/mex50 # large indel -SLV/2016/ElSalvador_1055 # large indel -USVI/20/2016 # large indel +KU853013 # Dominican_Republic/2016/PD2 # duplicate of other strain in dataset +KU740184 # GD01 # duplicate of other strain in dataset +KU761564 # GDZ16001 # duplicate of other strain in dataset +KX893855 # VEN/UF_2/2016 # duplicate of other strain in dataset +KY927808 # ZZ_1 # duplicate of other strain in dataset +KY003154 # VR10599/Pavia/2016 # export with unknown origin +KY003153 # 34997/Pavia/2016 # export with unknown origin +MF574552 # COL/FLR_00001/2015 # duplicate of COL/FLR/2015 +MF574559 # COL/FLR_00002/2015 # duplicate of COL/FLR/2015 +MF574560 # COL/FLR_00003/2015 # duplicate of COL/FLR/2015 +MF574561 # COL/FLR_00004/2015 # duplicate of COL/FLR/2015 +MF574571 # COL/FLR_00005/2015 # duplicate of COL/FLR/2015 +MF574555 # COL/FLR_00006/2015 # duplicate of COL/FLR/2015 +MF574557 # COL/FLR_00007/2015 # duplicate of COL/FLR/2015 +MF574562 # COL/FLR_00008/2015 # duplicate of COL/FLR/2015 +MF574572 # COL/FLR_00009/2015 # duplicate of COL/FLR/2015 +MF574570 # COL/FLR_00010/2015 # duplicate of COL/FLR/2015 +MF574565 # COL/FLR_00011/2015 # duplicate of COL/FLR/2015 +MF574568 # COL/FLR_00012/2015 # duplicate of COL/FLR/2015 +MF574558 # COL/FLR_00013/2015 # duplicate of COL/FLR/2015 +MF574576 # COL/FLR_00014/2015 # duplicate of COL/FLR/2015 +MF574567 # COL/FLR_00015/2015 # duplicate of COL/FLR/2015 +MF574575 # COL/FLR_00016/2015 # duplicate of COL/FLR/2015 +MF574553 # COL/FLR_00017/2015 # duplicate of COL/FLR/2015 +MF574573 # COL/FLR_00018/2015 # duplicate of COL/FLR/2015 +MF574574 # COL/FLR_00019/2015 # duplicate of COL/FLR/2015 +MF574577 # COL/FLR_00020/2015 # duplicate of COL/FLR/2015 +MF574556 # COL/FLR_00021/2015 # duplicate of COL/FLR/2015 +MF574554 # COL/FLR_00022/2015 # duplicate of COL/FLR/2015 +MF574566 # COL/FLR_00023/2015 # duplicate of COL/FLR/2015 +MF574569 # COL/FLR_00024/2015 # duplicate of COL/FLR/2015 +MF574563 # COL/FLR_00025/2015 # duplicate of COL/FLR/2015 +MF574564 # COL/FLR_00026/2015 # duplicate of COL/FLR/2015 +MF574581 # COL/FLR_00034/2015 # duplicate of COL/FLR/2015 +MF574588 # COL/FLR_00035/2015 # duplicate of COL/FLR/2015 +MF574582 # COL/FLR_00036/2015 # duplicate of COL/FLR/2015 +MF574586 # COL/FLR_00038/2015 # duplicate of COL/FLR/2015 +MF574584 # COL/FLR_00040/2015 # duplicate of COL/FLR/2015 +MF574583 # COL/FLR_00041/2015 # duplicate of COL/FLR/2015 +MF574580 # COL/FLR_00042/2015 # duplicate of COL/FLR/2015 +MF574579 # COL/PRV_00027/2015 # misdated +MF574578 # COL/PRV_00028/2015 # misdated +MF574585 # COL/PAN_00029/2015 # misdated +MF574587 # COL/PAN_00030/2015 # misdated +KY785436 # BRA/2016/FC_DQ12D1 # large indel +KY559010 # Brazil/2016/ZBRX8 # large indel +KY559011 # Brazil/2016/ZBRX11 # large indel +KX986761 # CX17 # large indel +MF801405 # MEX/2016/mex27 # large indel +MF801424 # MEX/2016/mex50 # large indel +MF801377 # SLV/2016/ElSalvador_1055 # large indel +VI20_12plex # USVI/20/2016 # large indel USVI/21/2016 # large indel -USVI/23/2016 # large indel -USVI/27/2016 # large indel -USVI/30/2016 # large indel -USVI/32/2016 # large indel -Thailand/1605aTw # excess divergence -VE_Ganxian # excess divergence -ZK_YN001 # excess divergence -Haiti/0029/2014 # contamination present -Haiti/0033/2014 # contamination present -Haiti/0036/2014 # contamination present -Haiti/0054/2014 # contamination present -Haiti/0074/2014 # contamination present -Haiti/0097/2014 # contamination present -mosquito/Haiti/1682/2016 # contamination present +VI23_12plex # USVI/23/2016 # large indel +VI27_1d # USVI/27/2016 # large indel +VI30_1d # USVI/30/2016 # large indel +VI32_12plex # USVI/32/2016 # large indel +KY126351 # Thailand/1605aTw # excess divergence +KU744693 # VE_Ganxian # excess divergence +KY328290 # ZK_YN001 # excess divergence +KY415986 # Haiti/0029/2014 # contamination present +KY415987 # Haiti/0033/2014 # contamination present +KY415990 # Haiti/0036/2014 # contamination present +KY415988 # Haiti/0054/2014 # contamination present +KY415989 # Haiti/0074/2014 # contamination present +KY415991 # Haiti/0097/2014 # contamination present +MF384325 # mosquito/Haiti/1682/2016 # contamination present ZF36_36S # contamination present -MR766 # lab strain -Aedes_sp/MEX_I_44/2016 # duplicate of Aedes_aegypti/MEX/MEX_I_44/2016 -Puerto_Rico/2015/PRVABC59 # duplicate of PRVABC59 -V15555 # highly diverged -DK # lab strain -DK23 # lab strain -rGZ02a/2018 # highly diverged -rGZ02p/2018 # highly diverged -V211784 # highly diverged -LMM/AG5643 -Faranah/18 +MK105975 # MR766 # lab strain +KX856011 # Aedes_sp/MEX_I_44/2016 # duplicate of Aedes_aegypti/MEX/MEX_I_44/2016 +MK028857 # Puerto_Rico/2015/PRVABC59 # duplicate of PRVABC59 +MN025403 # V15555 # highly diverged +MT505349 # DK # lab strain +MT505350 # DK23 # lab strain +MW680969 # rGZ02a/2018 # highly diverged +MW680970 # rGZ02p/2018 # highly diverged +OK054351 # V211784 # highly diverged +MT478034 # LMM/AG5643 +OL414716 # Faranah/18 diff --git a/example_data/sequences.fasta b/example_data/sequences.fasta index 64facba..9203c90 100644 --- a/example_data/sequences.fasta +++ b/example_data/sequences.fasta @@ -1,4 +1,4 @@ ->PAN/CDC_259359_V1_V3/2015 +>KX156774 gaatttgaagcgaatgctaacaacagtatcaacaggttttattttggatttggaaacgag agtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgc taaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggac @@ -179,7 +179,7 @@ gaccttccccacccttcaatctggggcctgaactggagatcagctgtggatctccagaag agggactagtggttagaggagaccccccggaaaacgcaaaacagcatattgacgctggga aagaccagagactccatgagtttccaccacgctggccgccaggcacagatcgccgaatag cggcggccggtgtggggaaatccatgggtct ->COL/FLR_00024/2015 +>MF574569 tcagactgcgacagttcgagtttgaagcgaaagctagcaacagtatcaacaggttttatt ttggatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggatt ccggattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaa @@ -358,7 +358,7 @@ agctgggaaaccaagcctatagtcaggccgagaacgccatggcacggaagaagccatgct gcctgtgagcccctcagaggacactgagtcaaaaaaccccacgcgcttggaggcgcagga tgggaaaagaaggtggcgaccttccccacccttcaatctggggcctgaactggagatcag ctgtggatctccagaagagggactagtggttagaggaga ->PRVABC59 +>KU501215 gttgttgatctgtgtgaatcagactgcgacagttcgagtttgaagcgaaagctagcaaca gtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaaa aaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtgag @@ -537,7 +537,7 @@ tgtgacccccccaggagaagctgggaaaccaagcctatagtcaggccgagaacgccatgg cacggaagaagccatgctgcctgtgagcccctcagaggacactgagtcaaaaaaccccac gcgcttggaggcgcaggatgggaaaagaaggtggcgaccttccccacccttcaatctggg gcctgaactggagatcagctgtggatctccagaagagggactagtggttagagga ->COL/FLR_00008/2015 +>MF574562 tcagactgcgacagttcgagtttgaagcgaaagctagcaacagtatcaacaggttttatt ttggatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggatt ccggattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaa @@ -716,7 +716,7 @@ agctgggaaaccaagcctatagtcaggccgagaacgccatggcacggaagaagccatgct gcctgtgagcccctcagaggacactgagtcaaaaaaccccacgcgcttggaggcgcagga tgggaaaagaaggtggcgaccttccccacccttcaatctggggcctgaactggagatcag ctgtggatctccagaagagggactagtggttagaggaga ->Colombia/2016/ZC204Se +>KY317939 gacagttcgagtttgaagcgaaagctagcaacagtatcaacaggttttattttggatttg gaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgt caatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgcc @@ -894,7 +894,7 @@ agtcagccacagcttggggaaagctgtgcagcctgtgacccccccaggagaagctgggaa accaagcctatagtcaggccgagaacgccatggcacggaagaagccatgctgcctgtgag cccctcagaggacactgagtcaaaaaaccccacgcgcttggaggcgcaggatgggaaaag aaggtggcgaccttccccacccttcaatctggggcctgaactggagat ->ZKC2/2016 +>KX253996 agttgttgatctgtgtgaatcagactgcgacagttcgagtttgaagcgaaagctagcaac agtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaa aaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtga @@ -1076,7 +1076,7 @@ ggcctgaactggagatcagctgtggatctccagaagagggactagtggttagaggagacc ccccggaaaacgcaaaacagcatattgacgctgggaaagaccagagactccatgagtttc caccacgctggccgccaggcacagatcgccgaatagcggcggccggtgtggggaaatcca tgggtct ->VEN/UF_1/2016 +>KX702400 agttgttactgttgctgactcagactgcgacagttcgagtttgaagcgaaagctagcaac agtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaa aaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtga @@ -1258,7 +1258,7 @@ ggcctgaactggagatcagctgtggatctccagaagagggactagtggttagaggagacc ccccggaaaacgcaaaacagcatattgacgctgggaaagaccagagactccatgagtttc caccacgctggccgccaggcacagatcgccgaatagcggcggccggtgtggggaaatcca tgggtctt ->DOM/2016/BB_0059 +>KY785425 tggctgccatgctgagaataatcaatgctaggaaggagaagaagagacgaggcgcagata ctagtgtcggaattgttggcctcctgctgaccacagctatggcagcggaggtcactagac gtgggagtgcatactacatgtacttggacagaaacgatgctggggaggccatatctttcc @@ -1427,7 +1427,7 @@ ggtgtggatctctcatagggcacagaccgcgcaccacctgggctgagaacattaaaaaca cagtcaacatggtgcgcaggatcataggtgaggaagaaaagtacatggactacctatcca cccaagttcgctacttgggtgaagaagggtctacacctggagtgctgtaagcaccaatct taatgttgtcaggcc ->BRA/2016/FC_6706 +>KY785433 agtttgaagcgaaagctagcaacagtatcaacaggttttatttyggatttggaaacgaga gtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgct aaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggact @@ -1601,7 +1601,7 @@ cattccctatttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagacc gcgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcatagg tgatgaagaaaagtacatggactacctatccacccaagttcgctacttgggtgaagaagg gtctacacctggagtgctgtaagcaccaatcttaatgttgtcaggc ->DOM/2016/BB_0183 +>KY785420 gtttgaagcgaaagctagcaacagtatcaacaggttttattttggatttggaaacgagag tttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgcta aaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggactt @@ -1780,7 +1780,7 @@ tagtcaggccgagaacgccatggcacggaagaagccatgctgcctgtgagcccctcagag gacactgagtcaaaaaaccccacgcgcttggaggcgcaggatgggaaaagaaggtggcga ccttccccacccttcaatctggggcctgaactggagatcagctgtggatccccagaagag g ->EcEs062_16 +>KX879603 agtagttgatctgtgtgaatcagactgcgacagttcgagtttgaagcgaaagctagcaac agtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaa aaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtga @@ -1962,7 +1962,7 @@ ggcctgaactggagatcagctgtggatctccagaagagggactagtggttagaggagacc ccccggaaaacgcaaaacagcatattgacgctgggaaagaccagagactccatgagtttc caccacgctggccgccaggcacagatcgccgaatagcggcggccggtgtggggaaatcca tgggagatcgga ->HND/2016/HU_ME59 +>KY785418 gtttgaagcgaaagctagcaacagtatcaacaggttttattttggatttggaaacgagag tttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgcta aaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggactt @@ -2136,7 +2136,7 @@ attccctatttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagaccg cgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcataggt gatgaagaaaagtacatggactacctatccacccaagttcgctacttgggtgaagaaggg tctacacctggagtgctgtaagcaccaatcttaatgttgtcaggc ->DOM/2016/MA_WGS16_011 +>KY785484 aagcgaaagctagcaacagtatcaacaggttttattttggatttggaaacgagagtttct ggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgctaaaacg cggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggacttctgct @@ -2314,7 +2314,7 @@ ggggaaagctgtgcagcctgtgacccccccaggagaagctgggaaaccaagcctatagtc aggccgagaacgccatggcacggaagaagccatgctgcctgtgagcccctcagaggacac tgagtcaaaaaaccccacgcgcttggaggcgcaggatgggaaaagaaggtggcgaccttc cccacccttcaatctggggcctgaactggggatcag ->DOM/2016/BB_0433 +>KY785441 tttgaagcgaaagctagcaacagtatcaacaggttttattttggatttggaaacgagagt ttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgctaa aacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggacttc @@ -2488,7 +2488,7 @@ ttccctatttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagaccgc gcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcataggtg aggaagaaaagtacatggactacctatccacccaagttcgctacttgggtgaagaagggt ctacacctggagtgctgtaagcaccaatcctaatgttgtcaggcc ->USA/2016/FL022 +>KY075935 gcgacagttcgagtttgaagcgaaagctagcaacagtatcaacaggttttattttggatt tggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggatt gtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctg @@ -2662,7 +2662,7 @@ aaatggacagacattccctatttgggaaaaagggaagacttgtggtgtggatctctcata gggcacagaccgcgcaccacctgggctgagaacattaaaaacacagtcaacatggtgcgc aggatcataggtgaggaagaaaagtacatggactacctatccacccaagtccgctacttg ggtgaagaagggtctacacctggagtgctgtaagcaccaatctta ->SG_027 +>KY241697 ctgcgacagttcgagtttgaagcgaaagctagcaacagtatcaacaggttttattttgga tttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccgga ttgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggc @@ -2842,7 +2842,7 @@ tgagcccctcagaggacactgagtcaaaaaaccccacgcgcttggaggcgcaggatggga aaagaaggtggcgaccttccccacccttcaatctggggcctgaactggagatcagctgtg gatctccagaagagggactagtggttagaggagaccccccggaaaacgcaaaacagcata ttgacgctgggaaagaccagagactccatgagtttccaccacgctggccgccag ->SG_074 +>KY241744 gaatcagactgcgacagttcgagtttgaagcgaaagctagcaacagtatcaacaggtttt attttggatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggagg attccggattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggctt @@ -3023,7 +3023,7 @@ ggatgggaaaagaaggtggcgaccttccccacccttcaatctggggcctgaactggagat cagctgtggatctccagaagagggactagtggttagaggagaccccccggaaaacgcaaa acagcatattgacgctgggaaagaccagagactccatgagtttccaccacgctggccgcc aggcacagatcgccgaatagcg ->SG_056 +>KY241726 gaatcagactgcgacagttcgagtttgaagcgaaagctagcaacagtatcaacaggtttt attttggatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggagg attccggattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggctt @@ -3203,7 +3203,7 @@ gctgcctgtgagcccctcagaggacactgagtcaaaaaaccccacgcgcttggaggcgca ggatgggaaaagaaggtggcgaccttccccacccttcaatctggggcctgaactggagat cagctgtggatctccagaagagggactagtggttagaggagaccccccggaaaacgcaaa acagcatattgacgctgggaaagaccagagactccatgagtttccaccacgctggcc ->USA/2016/FLUR022 +>KY325473 gtgtgaatcagactgcgacagttcgagtttgaagcgaaagctagcaacagtatcaacagg ttttattttggatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccg gaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggg @@ -3384,7 +3384,7 @@ cgcaggatgggaaaagaaggtggcgaccttccccacccttcaatctggggcctgaactgg agatcagctgtggatctccagaagagggactagtggttagaggagaccccccggaaaacg caaaacagcatattgacgctgggaaagaccagagactccatgagtttccaccacgctggc cgccaggcacagatcgccgaatagcggcggccggtgtggggaaatc ->Aedes_aegypti/USA/2016/FL05 +>KY075937 gacagttcgagtttgaagcgaaagctagcaacagtatcaacaggttttattttggatttg gaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgt caatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgcc @@ -3562,7 +3562,7 @@ agtcagccacagcttggggaaagctgtgcagcctgtgacccccccaggagaagctgggaa accaagcctatagtcaggccgagaacgccatggcacggaagaagccatgctgcctgtgag cccctcagaggacactgagtcaaaaaaccccacgcgcttggaggcgcaggatgggaaaag aaggtggcgaccttccccacccttcaatctggggcctgaactggagat ->SG_018 +>KY241688 atgnnnnnnnnnnnnnnnnnnnccggaggattccggattgtcaatatgctaaaacgcgga gtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggacttctgctgggt catgggcccatcaggatggtcttggcgattctagcctttttgaggttcacggcaatcaag @@ -3741,7 +3741,7 @@ tcaaaaaaccccacgcgcttggaggcgcaggatgggaaaagaaggtggcgaccttcccca cccttcaatctggggcctgaactggagatcagctgtggatctccagaagagggactagtg gttagaggagaccccccggaaaacgcaaaacagcatattgacgctgggaaagaccagaga ctccatgagtttccaccacgctggccgccaggcacagat ->USA/2016/FLWB042 +>KY325478 ctttgggggcttgaagaggctgccagccggacttctgctgggtcatgggcccatcaggat ggtcttggcgattctagcctttttgagattcacggcaatcaagccatcactgggtctcat caatagatggggttcagtggggaaaaaagaggctatggaaataataaagaagttcaagaa @@ -3916,7 +3916,7 @@ aatctcaatgttgtcaggcctgctagtcagccacagcttggggaaagctgtgcagcctgt gacccccccaggagaagctgggaaaccaagcctatagtcaggccgagaacgccatggcac ggaagaagccatgctgcctgtgagcccctcagaggacactgagtcaaaaaaccccacgcg cttggaggcgcaggnnnnnnaaagaag ->COL/PRV_00028/2015 +>MF574578 ttgaagcgaaagctagcaacagtatcaacaggttttattttggatttggaaacgagagtt tctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgctaaa acgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggacttct @@ -4095,7 +4095,7 @@ gtcaggccgagaacgccatggcacggaagaagccatgctgcctgtgagcccctcagagga cactgagtcaaaaaaccccacgcgcttggaggcgcaggatgggaaaagaaggtggcgacc ttccccacccttcaatctggggcctgaactggagatcagctgtggatctccagaagaggg actagtggttagaggaga ->Thailand/1610acTw +>MF692778 gcaacagtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaa cccaaaaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccg tgtgagcccctttgggggcttgaagaggctgccagccggacttctgctgggccatgggcc @@ -4271,7 +4271,7 @@ ggactacctatccacccaagttcgctacttgggtgaagaagggtctacacctggagtgct gtaagcaccaatcttagtgttgtcaggcctgctagtcagccacagcttggggaaagctgt gcagcctgtgacccccccaggagaagctgggaaaccaagcccatagtcaggccgagaacg ccatggcacggaag ->1_0087_PF +>KX447509 agtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaa aaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtga gcccctttgggggcttgaagaggctgccagccggacttctgctgggtcatgggcccatca @@ -4449,7 +4449,7 @@ ctgtgacccccccaggagaagctgggaaaccaagcctatagtcaggccgagaacgccatg gcacggaagaagccatgctgcctgtgagcccctcagaggacactgagtcaaaaaacccca cgcgcttggaggcgcaggatgggaaaagaaggtggcgaccttccccacccttcaatctgg ggcctgaactggagatcagctgtggat ->1_0199_PF +>KX447519 actgcgacagttcgagtttgaagcgaaagctagcaacagtatcaacaggttttattttgg atttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccgg attgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagagg @@ -4603,7 +4603,7 @@ tgggctctagtggacaaggaaagagagcaccacctgagaggagagtgccagagttgtgtg tacaacatgatgggaaaaagagaaaagaaacaaggggaatttggaaaggccaagggcagc cgcgccatctggtatatgtggctaggggctagatttctagagttcgaagcccttggattc ttgaacgaggatcactggatgg ->1_0181_PF +>KX447512 agtatcaacaggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaa aaaagaaatccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtga gcccctttgggggcttgaagaggctgccagccggacttctgctgggtcatgggcccatca @@ -4781,7 +4781,7 @@ ctgtgacccccccaggagaagctgggaaaccaagcctatagtcaggccgagaacgccatg gcacggaagaagccatgctgcctgtgagcccctcagaggacactgagtcaaaaaacccca cgcgcttggaggcgcaggatgggaaaagaaggtggcgaccttccccacccttcaatctgg ggcctgaactggagatcagctgtgga ->Brazil/2015/ZBRC301 +>KY558995 gatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccg gattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagag gctgccagccggacttctgctgggtcatgggcccatcaggatggtcttggcgattctagc @@ -4950,7 +4950,7 @@ gactgcttgcctagcaaaatcatatgcgcagatgtggcagctcctttatttccacagaan ggacctccgactgatggccaatgccatttgttcatctgtgccagttgactgggttccaac tgggagaactacctggtcaatccatggaaanggagaatggatgaccactgaagacatgct tg ->Brazil/2015/ZBRA105 +>KY558989 gatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccg gattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagag gctgccagccggacttctgctgggtcatgggcccatcaggatggtcttggcgattctagc @@ -5119,7 +5119,7 @@ gactgcttgcctagcaaaatcatatgcgcaaatgtggcagctcctttatttccacagaag ggacctccgactgatggccaatgccatttgttcatctgtgccagttgactgggttccaac tgggagaactacctggtcaatccatggaaagggagaatggatgaccactgaagacatgct tg ->Brazil/2016/ZBRC16 +>KY558991 tgagaataatcaatgctaggaaggagaagaagagacgaggcgcagatactagtgtcggaa ttgttggcctcctgctgaccacagctatggcagcggaggtcactagacgtgggagtgcat actatatgtacttggacagaaacgatgctggggaggccatatcttttccaaccacattgg @@ -5272,7 +5272,7 @@ atgcagatgacactgctggctgggacacccgcatcagcaggtttgatctggagaatgaag ctctaatcaccaaccaaatggagagagggcacagggccttggcattggccataatcaagt acacataccaaaacaaagtggtaaaggtccttagaccagctgaaaaagggaaaacagtta tggacatcatttcgagacaagaccaaaggggg ->V8375 +>KU501217 atgaaaaacccaaaaaagaaatccggaggattccggattgtcaatatgctaaaacgcgga gtagcccgtgtgagcccctttgggggcttgaagaggctgccagccggacttctgctgggt catgggcccatcaggatggtcttggcgattctagcctttttgagattcacggcaatcaag @@ -5445,7 +5445,7 @@ ttgggaaaaagggaagacttgtggtgtggatctctcatagggcacagaccgcgcaccacc tgggctgagaacattaaaaacacagtcaacatggtgcgcaggatcataggtgatgaagaa aagtacatggactacctatccacccaagttcgctacttgggtgaagaagggtctacacct ggagtgctgtaa ->Nica1_16 +>KX421195 tcgagtttgaagcgaaagctagcaacagtatcaacaggttttattttggatttggaaacg agagtttctggtcatgaaaaacccaaaaaagaaatccggaggattccggattgtcaatat gctaaaacgcggagtagcccgtgtgagcccctttgggggcttgaagaggctgccagccgg @@ -5624,7 +5624,7 @@ cctatagtcaggccgagaacgccatggcacggaagaagccatgctgcctgtgagcccctc agaggacactgagtcaaaaaaccccacgcgcttggaggcgcaggatgggaaaagaaggtg gcgaccttccccacccttcaatctggggcctgaactggagatcagctgtggatctccaga agagggactagtggttagaggag ->Brazil/2015/ZBRC303 +>KY558997 tgagaataatcaatgctaggaaggagaagaagagacgaggcacagatactagtgtcggaa ttgttggcctcctgctgaccacagctatggcagcggaggtcactagacgtgggagtgcat actatatgtacttggacagaaacgatgctggggaggccatatcttttccaaccacattgg @@ -5782,7 +5782,7 @@ agatgcaagacttgtggctgctgcggaggtcagagaaagtgaccaactggttgcagagca acggatgggataggctcaaacgaatggcagtcagtggagatgattgcgttgtgaagccaa ttgatgataggtttgcacatgccctcaggttcttgaatgatatgggaaaagttaggaagg acacacaagagtgg ->SMGC_1 +>KX266255 tctgtgtgaatcagactgcgacagttcgagtttgaagcgaaagctagcaacagtatcaac aggttttattttggatttggaaacgagagtttctggtcatgaaaaacccaaaaaagaaat ccggaggattccggattgtcaatatgctaaaacgcggagtagcccgtgtgagcccctttg