Skip to content

Conducting conflict analyses

Stephen Smith edited this page Oct 14, 2015 · 6 revisions

Hopefully you have peyotl installed and if not check this out. So I assume that you have a working peyotl and you understand virtualenv (if you use it).

We can use what we have from the other tutorials to put this together for a simple analysis of conflict in a portion of the synthetic tree. This requires us to

  • Get the trees for a clade and put them in a file
  • Get a section of the synthetic tree and put it in a file
  • Conduct a conflict analysis of the trees downloaded against the synthetic tree
  • Present the results in a reasonable manner

All but the last two in this list have been done in the other tutorials, but we will put it all together.

Here I am going to make use of a very simple node class in python and a newick reader. The source for these files is below. Also, I am going to put all of the pieces required here in one file instead of having multiple (as you might have done for the previous tutorials). Of course, you can do what you like. I am also going to keep it as simple as possible so it is clear how to modify, instead of making it particularly refined or final. You can run this by throwing all these files in a directory and running python map_ot.py Lonicera 649885 > tree.nex and then opening the tree.nex in figtree. In this case, Lonicera is my named clade of interest (to get a set of trees from the tree database) and 649885 is the specific ott id for the part of the synth tree I want. You could modify the script to only use the ott id of course. But here, I simple have it to show different ways of using the apis. To see how to use ottId to get studies of interest, see this tutorial page.

Here is the map_ot.py

from peyotl.api import APIWrapper
from peyotl.nexson_syntax import create_content_spec, \
                                 get_ot_study_info_from_nexml, \
                                 PhyloSchema
import sys
import codecs
import node
import tree_reader
import tree_utils

#get the synth tree based on ott_id
def get_synth_tree_bit(ott_id):
    tm = APIWrapper().treemachine
    x = tm.get_synthetic_tree(format='newick',ott_id=ott_id,max_depth=2)
    return x['newick']

#reading the trees from the list of tree files
def read_trees(tree_names):
    trees = []
    for i in tree_names:
        tf = open(str(i)+".tre", "r")
        tl = tf.readline()
        trees.append(tree_reader.read_tree_string(tl))
        tf.close()
    return trees

#will convert nexson to newick; much of this taken from scripts/nexson/nexson_newick.py
def convert_nexson_newick(inp, tid):
    outfn = tid+".tre"
    src_schema = None
    out = codecs.open(outfn, mode='w', encoding='utf-8')
    otu_label = 'ottid' # originallabel, ottid, otttaxonname
    blob = inp
    schema = create_content_spec(content='tree', content_id=tid, format='newick', otu_label=otu_label)
    try:
        schema.convert(src=blob, serialize=True, output_dest=out, src_schema=src_schema)
    except KeyError:
        if 'nexml' not in blob and 'nex:nexml' not in blob:
            blob = blob['data']
            schema.convert(src=blob, serialize=True, output_dest=out, src_schema=src_schema)
        else:
            raise
    return

#just break down the tree into each node's the ingroups and outgroups
def get_biparts(tree):
    ins = []
    outs = []
    rootnms = set(tree.lvsnms())
    for i in tree.iternodes():
        if len(i.children) == 0 or i == tree:
            continue
        tins = set(i.lvsnms())
        ins.append(tins)
        outs.append(rootnms.difference(tins))
    return [ins,outs]

#map very simple conflict and the mrcas for nodes 
#conflict here is just calculated if there is an intersection between
#ingroup1 and outgroup2, ingroup2 and outgroup1, ingroup1 and ingroup2, and outgroup1 and outgroup2
def map_tree_into_stree(tree, treename, stree):
    ins,outs = get_biparts(tree)
    souts = set(stree.lvsnms())
    for i in stree.iternodes():
        if len(i.children) == 0 or i == stree:
            continue
        sin = i.data["otts"]
        tsouts = i.data["roototts"]
        for j,k in zip(ins,outs):
            #conflict
            if len(j.intersection(sin)) > 0 and len(j.intersection(tsouts)) > 0 \
                    and len(k.intersection(sin)) > 0 and len(k.intersection(tsouts)) > 0:
                        i.data["conflicts"].add(treename)
            #mrca
            else:
                testnames = sin.intersection(j)
                if len(testnames) < 2:
                    continue
                testnodes = []
                for m in stree.leaves():
                    if m.label.split("ott")[1] in testnames:
                        testnodes.append(m)
                tree_utils.get_mrca(testnodes,stree).data["mrcas"].add(treename)


#present the results so they can be viewed in figtree
def get_figtree(tree):
    for i in tree.iternodes():
        if "conflicts" in i.data or "mrcas" in i.data:
            i.label += "[&"
            labdat = []
            if "conflicts" in i.data:
                labdat.append("conflict="+str(len(set(i.data["conflicts"]))))
            if "mrcas" in i.data:
                labdat.append("mrcas="+str(len(set(i.data["mrcas"]))))
            i.label += ",".join(labdat)+"]"
    retst = "#NEXUS\nbegin trees;\n\ttree tree1 = [&R] "
    retst += tree.get_newick_repr(False)+";\nend;\n"
    return retst


if __name__ == "__main__":
    if len(sys.argv) != 3:
        print "python "+sys.argv[0]+" taxonname ottid"
        sys.exit(0)
    a = APIWrapper()
    oti = a.oti
    ps = a.phylesystem_api
    b = oti.find_trees(ottTaxonName=sys.argv[1])
    tree_names = []
    for i in b:
        for j in i["matched_trees"]:
            tr = j["nexson_id"]
            st = "_".join(j["oti_tree_id"].split("_")[0:2])
            nexson = ps.get(st)
            convert_nexson_newick(nexson, tr)
            tree_names.append(tr)
    trees = read_trees(tree_names)
    stree = get_synth_tree_bit(sys.argv[2])
    sytree = tree_reader.read_tree_string(str(stree))
    #preparing things
    roototts = set([i.split("ott")[1] for i in sytree.lvsnms()])
    for i in sytree.iternodes():
        if i != sytree and len(i.children) > 0:
            i.data["conflicts"] = set()
            i.data["mrcas"] = set()
            i.data["otts"] = set([j.split("ott")[1] for j in i.lvsnms()])
            i.data["roototts"] = roototts.difference(i.data["otts"])
    #do some basic conflict
    for i in range(len(trees)):
        map_tree_into_stree(trees[i],tree_names[i],sytree)
    print get_figtree(sytree)

The other files needed for the above script include this simple node class in node.py

class Node:
    def __init__(self):
        self.label = ""
        self.length = 0.0
        self.parent = None
        self.children = []
        self.data = {}
        self.istip = False
        self.comment = ""

    def add_child(self, child):
        # make sure that the child is not already in there
        assert child not in self.children
        self.children.append(child)
        child.parent = self

    def remove_child(self, child):
        # make sure that the child is in there
        assert child in self.children
        self.children.remove(child)
        child.parent = None

    def leaves(self, v=None):
        if v is None:
            v = []
        if len(self.children) == 0:
            v.append(self)
        else:
            for child in self.children:
                child.leaves(v)
        return v

    def leaves_fancy(self):
        return [n for n in self.iternodes() if n.istip]

    def lvsnms(self):
        return [n.label for n in self.iternodes() if n.istip]

    def iternodes(self, order="preorder"):
        if order.lower() == "preorder":
            yield self
        for child in self.children:
            for d in child.iternodes(order):
                yield d
        if order.lower() == "postorder":
            yield self

    def get_newick_repr(self, showbl=False):
        ret = ""
        for i in range(len(self.children)):
            if i == 0:
                ret += "("
            ret += self.children[i].get_newick_repr(showbl)
            if i == len(self.children)-1:
                ret += ")"
            else:
                ret += ","
        if self.label is not None:
            ret += self.label
        if showbl is True:
            ret += ":"+"{:.12g}".format(self.length)
        return ret

This simple newick reader in tree_reader.py

from node import Node
def read_tree_string(instr):
    root = None
    index = 0
    nextchar = instr[index]
    start = True
    keepgoing = True
    curnode = None
    while keepgoing is True:
        if nextchar is "(":
            if start is True:
                root = Node()
                curnode = root
                start = False
            else:
                newnode = Node()
                curnode.add_child(newnode)
                curnode = newnode
        elif nextchar is ',':
            curnode = curnode.parent
        elif nextchar is ")":
            curnode = curnode.parent
            index += 1
            nextchar = instr[index]
            name = ""
            while True:
                if nextchar is ',' or nextchar is ')' or nextchar is ':' \
                        or nextchar is ';' or nextchar is '[':
                    break
                name += nextchar
                index += 1
                nextchar = instr[index]
            curnode.label = name
            index -= 1
        elif nextchar is "[":
            index += 1
            nextchar = instr[index]
            name = ""
            while True:
                if nextchar is "]":
                    break
                name += nextchar
                index += 1
                nextchar = instr[index]
            curnode.comment += name
        elif nextchar == ';':
            keepgoing = False
            break
        elif nextchar == ":":
            index += 1
            nextchar = instr[index]
            brlen = ""
            while True:
                if nextchar is ',' or nextchar == ')' or nextchar is ':' \
                        or nextchar is ';' or nextchar is '[':
                    break
                brlen += nextchar
                index += 1
                nextchar = instr[index]
            curnode.length = float(brlen)
            index -= 1
        elif nextchar == ' ':
            index += 1
            nextchar = instr[index]
        else:  # this is an external named node
            newnode = Node()
            curnode.add_child(newnode)
            curnode = newnode
            curnode.istip = True
            name = ""
            while True:
                if nextchar is ',' or nextchar == ')' or nextchar is ':' \
                        or nextchar is ';' or nextchar is '[':
                    break
                name += nextchar
                index += 1
                nextchar = instr[index]
            curnode.label = name
            index -= 1
        if index < len(instr) - 1:
            index += 1
        nextchar = instr[index]
    return root

And this simple tree_utils.py file

def get_mrca(nodes, tree):
    traceback = []
    first = nodes[0]
    while first != tree:
        first = first.parent
        traceback.append(first)
        if first.parent is None:
            break
    curmrca = nodes[0].parent
    for i in nodes:
        if i == nodes[0]:
            continue
        curmrca = mrca_recurs(curmrca, traceback, i)
    return curmrca


def mrca_recurs(node1, path1, node2):
    path = path1[path1.index(node1):]
    parent = node2
    mrca = None
    while parent is not None:
        if parent in path:
            mrca = parent
            break
        parent = parent.parent
    return mrca

back to Tutorials

Clone this wiki locally