diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile index 2341f931..17e18334 100644 --- a/phylogenetic/Snakefile +++ b/phylogenetic/Snakefile @@ -15,9 +15,7 @@ include: "rules/prepare_sequences.smk" include: "rules/prepare_sequences_E.smk" include: "rules/construct_phylogeny.smk" include: "rules/annotate_phylogeny.smk" -include: "rules/annotate_phylogeny_E.smk" include: "rules/export.smk" -include: "rules/export_E.smk" # Include custom rules defined in the config. if "custom_rules" in config: diff --git a/phylogenetic/rules/annotate_phylogeny.smk b/phylogenetic/rules/annotate_phylogeny.smk index e28cda75..d58b4a9d 100644 --- a/phylogenetic/rules/annotate_phylogeny.smk +++ b/phylogenetic/rules/annotate_phylogeny.smk @@ -42,11 +42,12 @@ rule ancestral: rule translate: """Translating amino acid sequences""" input: - tree = "results/tree_{serotype}_genome.nwk", - node_data = "results/nt-muts_{serotype}_genome.json", - reference = "config/reference_dengue_{serotype}.gb" + tree = "results/tree_{serotype}_{gene}.nwk", + node_data = "results/nt-muts_{serotype}_{gene}.json", + # The genbank references for the E gene are dynamically generated files located within the results folder. + reference = lambda wildcard: "config/reference_dengue_{serotype}.gb" if wildcard.gene in ["genome"] else "results/config/reference_dengue_{serotype}_{gene}.gb" output: - node_data = "results/aa-muts_{serotype}_genome.json" + node_data = "results/aa-muts_{serotype}_{gene}.json" shell: """ augur translate \ diff --git a/phylogenetic/rules/annotate_phylogeny_E.smk b/phylogenetic/rules/annotate_phylogeny_E.smk deleted file mode 100644 index a18cd289..00000000 --- a/phylogenetic/rules/annotate_phylogeny_E.smk +++ /dev/null @@ -1,41 +0,0 @@ -""" -This part of the workflow creates additonal annotations for the phylogenetic tree. -REQUIRED INPUTS: - metadata = data/metadata_all.tsv - prepared_sequences = results/aligned.fasta - tree = results/tree.nwk -OUTPUTS: - node_data = results/*.json - There are no required outputs for this part of the workflow as it depends - on which annotations are created. All outputs are expected to be node data - JSON files that can be fed into `augur export`. - See Nextstrain's data format docs for more details on node data JSONs: - https://docs.nextstrain.org/page/reference/data-formats.html -This part of the workflow usually includes the following steps: - - augur traits - - augur ancestral - - augur translate - - augur clades -See Augur's usage docs for these commands for more details. -Custom node data files can also be produced by build-specific scripts in addition -to the ones produced by Augur commands. -""" - -ruleorder: translate_E > translate - -rule translate_E: - """Translating amino acid sequences""" - input: - tree = "results/tree_{serotype}_E.nwk", - node_data = "results/nt-muts_{serotype}_E.json", - reference = "results/config/reference_dengue_{serotype}_E.gb" - output: - node_data = "results/aa-muts_{serotype}_E.json" - shell: - """ - augur translate \ - --tree {input.tree} \ - --ancestral-sequences {input.node_data} \ - --reference-sequence {input.reference} \ - --output {output.node_data} \ - """ \ No newline at end of file diff --git a/phylogenetic/rules/export.smk b/phylogenetic/rules/export.smk index d1ed9b89..b6d44e5f 100644 --- a/phylogenetic/rules/export.smk +++ b/phylogenetic/rules/export.smk @@ -40,7 +40,7 @@ rule colors: rule prepare_auspice_config: """Prepare the auspice config file for each serotypes""" output: - auspice_config="results/config/auspice_config_{serotype}.json", + auspice_config="results/config/auspice_config_{serotype}_{gene}.json", params: replace_clade_key="clade_membership", replace_clade_title=lambda wildcard: r"Serotype" if wildcard.serotype in ['all'] else r"DENV genotype", @@ -109,17 +109,17 @@ rule prepare_auspice_config: rule export: """Exporting data files for auspice""" input: - tree = "results/tree_{serotype}_genome.nwk", + tree = "results/tree_{serotype}_{gene}.nwk", metadata = "data/metadata_{serotype}.tsv", - branch_lengths = "results/branch-lengths_{serotype}_genome.json", - traits = "results/traits_{serotype}_genome.json", - clades = "results/clades_{serotype}_genome.json", - nt_muts = "results/nt-muts_{serotype}_genome.json", - aa_muts = "results/aa-muts_{serotype}_genome.json", - auspice_config = "results/config/auspice_config_{serotype}.json", + branch_lengths = "results/branch-lengths_{serotype}_{gene}.json", + traits = "results/traits_{serotype}_{gene}.json", + clades = lambda wildcard: "results/clades_{serotype}_{gene}.json" if wildcard.gene in ['genome'] else [], + nt_muts = "results/nt-muts_{serotype}_{gene}.json", + aa_muts = "results/aa-muts_{serotype}_{gene}.json", + auspice_config = "results/config/auspice_config_{serotype}_{gene}.json", colors = "results/colors_{serotype}.tsv", output: - auspice_json = "results/raw_dengue_{serotype}_genome.json", + auspice_json = "results/raw_dengue_{serotype}_{gene}.json", params: strain_id = config.get("strain_id_field", "strain"), shell: diff --git a/phylogenetic/rules/export_E.smk b/phylogenetic/rules/export_E.smk deleted file mode 100644 index aee353f5..00000000 --- a/phylogenetic/rules/export_E.smk +++ /dev/null @@ -1,47 +0,0 @@ -""" -This part of the workflow collects the phylogenetic tree and annotations to -export a Nextstrain dataset. -REQUIRED INPUTS: - metadata = data/metadata_all.tsv - tree = results/tree.nwk - branch_lengths = results/branch_lengths.json - node_data = results/*.json -OUTPUTS: - auspice_json = auspice/${build_name}.json - There are optional sidecar JSON files that can be exported as part of the dataset. - See Nextstrain's data format docs for more details on sidecar files: - https://docs.nextstrain.org/page/reference/data-formats.html -This part of the workflow usually includes the following steps: - - augur export v2 - - augur frequencies -See Augur's usage docs for these commands for more details. -""" - -ruleorder: export_E > export - -rule export_E: - """Exporting data files for auspice""" - input: - tree = "results/tree_{serotype}_E.nwk", - metadata = "data/metadata_{serotype}.tsv", - branch_lengths = "results/branch-lengths_{serotype}_E.json", - traits = "results/traits_{serotype}_E.json", - nt_muts = "results/nt-muts_{serotype}_E.json", - aa_muts = "results/aa-muts_{serotype}_E.json", - auspice_config = "config/auspice_config_{serotype}_E.json", - output: - auspice_json = "results/raw_dengue_{serotype}_E.json", - root_sequence = "results/raw_dengue_{serotype}_E_root-sequence.json", - params: - strain_id = config.get("strain_id_field", "strain"), - shell: - """ - augur export v2 \ - --tree {input.tree} \ - --metadata {input.metadata} \ - --metadata-id-columns {params.strain_id} \ - --node-data {input.branch_lengths} {input.traits} {input.nt_muts} {input.aa_muts} \ - --auspice-config {input.auspice_config} \ - --include-root-sequence \ - --output {output.auspice_json} - """