-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This approach is necessary because the mutations assigned to the top branches in the tree are random. I'd consider this approach temporary and we should revisit it in the near-future.
- Loading branch information
1 parent
412e4ab
commit cd29467
Showing
3 changed files
with
67 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Ensure we include 2 Ia and Ib samples so we can use them to check clade assignment | ||
# Clade Ia | ||
PP601197 | ||
KJ642618 | ||
# Clade Ib | ||
PP601222 | ||
PP601209 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
""" | ||
Labels the two child nodes of the root as clade Ia and Ib | ||
based on an expected tree structure. This approach is temporary and is | ||
necessary because the distribution of mutations at these two nodes | ||
(via augur ancestral) is random and thus we can't use our normal | ||
`augur clades` approach. | ||
This script expects certain tips to be present for each clade | ||
which are force-included in the analysis. | ||
Usage: provide the tree on STDIN, node-data JSON written to STDOUT | ||
""" | ||
|
||
import argparse | ||
from sys import stdin,stdout | ||
from Bio import Phylo | ||
from collections import defaultdict | ||
import json | ||
|
||
TIPS = { | ||
"clade Ia": ["PP601197", "KJ642618"], | ||
"clade Ib": ["PP601222", "PP601209"] | ||
} | ||
|
||
if __name__=="__main__": | ||
parser = argparse.ArgumentParser(description = __doc__) | ||
args = parser.parse_args() | ||
|
||
t = Phylo.read(stdin, "newick") | ||
|
||
node_data = { # node-data JSON | ||
"nodes": defaultdict(dict), | ||
"branches": defaultdict(dict), | ||
} | ||
|
||
for node in t.clade: | ||
tips = set([n.name for n in node.get_terminals()]) | ||
for clade_name, defining_tips in TIPS.items(): | ||
if all([name in tips for name in defining_tips]): | ||
node_data['branches'][node.name]['labels'] = {'clade': clade_name} | ||
node_data['nodes'][node.name]["clade_membership"] = clade_name | ||
for descendant in node.find_clades(): | ||
node_data['nodes'][descendant.name]["clade_membership"] = clade_name | ||
|
||
json.dump(node_data, stdout) |