-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Function to change taxon map of input nexus, write to out nexus, work…
…around for #3
- Loading branch information
Showing
2 changed files
with
71 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import os | ||
|
||
from tetres.beast_helper.utils import change_taxon_map | ||
|
||
|
||
def test_change_taxon_map(ten_taxa_multichain): | ||
in_nexus = os.path.join(ten_taxa_multichain.working_dir, ten_taxa_multichain.tree_files[0]) | ||
out_nexus = os.path.join(ten_taxa_multichain.working_dir, f"changed_mapping_{ten_taxa_multichain.tree_files[0]}") | ||
new_map = { 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10} | ||
change_taxon_map(input_nexus=in_nexus, output_nexus=out_nexus, new_map=new_map) | ||
assert True | ||
|
||
|
||
def test_change_taxon_map_mcc(ten_taxa_multichain): | ||
in_nexus = os.path.join(ten_taxa_multichain.working_dir, f"chain0_1-mcc.tree") | ||
out_nexus = os.path.join(ten_taxa_multichain.working_dir, f"changed_mapping_mcc.tree") | ||
new_map = {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10} | ||
change_taxon_map(input_nexus=in_nexus, output_nexus=out_nexus, new_map=new_map) | ||
assert True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import re | ||
|
||
|
||
def change_taxon_map(input_nexus, output_nexus, new_map): | ||
# Function that will apply teh new map to the input nexus and save it as output_nexus | ||
|
||
# todo check file exists | ||
# todo check that the map is compatible etc... | ||
|
||
begin_map = re.compile('\t?translate\n', re.I) | ||
end = re.compile('\t*?;\n?') | ||
re_tree = re.compile("\t?tree .*=? (.*$)", flags=re.I | re.MULTILINE) | ||
re_taxa = re.compile('([0-9]+)([\\[:])') | ||
|
||
new_map_reversed = {v: k for (k, v) in new_map.items()} | ||
old_map = {} | ||
within_map = False | ||
finished_map = False | ||
|
||
with open(input_nexus, "r") as in_file, open(output_nexus, "w+") as out_file: | ||
for line in in_file: | ||
if begin_map.match(line): | ||
# we enter the taxon map lines | ||
out_file.write(line) | ||
within_map = True | ||
if within_map and end.match(line): | ||
# The taxon map is finished | ||
out_file.write(";\n") | ||
within_map = False | ||
finished_map = True | ||
if within_map and not begin_map.match(line): | ||
# We are within the taxon map, all the line need to be extracted | ||
split = line.split() | ||
cur_key = int(split[0]) | ||
old_map[cur_key] = int(split[1][:-1]) if split[1][-1] == "," else int(split[1]) | ||
out_file.write(f"\t\t{cur_key} {new_map[cur_key]},\n") | ||
if not finished_map and not within_map: | ||
# Write everything that comes before the taxon map to the new file | ||
out_file.write(line) | ||
|
||
if re_tree.match(line): | ||
# matching a tree, need to change the taxon integer matches accordingly | ||
|
||
# apply new taxon map ... | ||
tree_string = f'{re.split(re_tree, line)[1][:re.split(re_tree, line)[1].rfind(")") + 1]};' | ||
new_newick = re_taxa.sub(lambda m: m.group().replace(m.group(1), | ||
str(new_map_reversed[ | ||
old_map[int(m.group(1))]])), | ||
tree_string) | ||
out_file.write(f"{line.split('=')[0]}= {new_newick}\n") | ||
out_file.write("End;") | ||
return 1 |