-
Notifications
You must be signed in to change notification settings - Fork 23
/
visualize.py
127 lines (112 loc) · 3.89 KB
/
visualize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python3
import argparse
import csv
from pathlib import Path
from Bio import SeqIO
from pygenomeviz import GenomeViz
from pygenomeviz.utils import ColorCycler
def main():
"""Run visualize conserved regions workflow"""
# Parse arguments
args = get_args()
genome_fasta_file1: Path = args.fasta1
genome_fasta_file2: Path = args.fasta2
fastani_visual_file: Path = args.visual_file
visualize_result_file: Path = args.plot_outfile
cmap: str = args.cmap
link_color: str = args.link_color
curve: bool = args.curve
# Load genome fasta information
genome_name1 = genome_fasta_file1.with_suffix("").name
genome_name2 = genome_fasta_file2.with_suffix("").name
records1 = SeqIO.parse(genome_fasta_file1, "fasta")
seq_length1 = sum([len(r) for r in records1])
records2 = SeqIO.parse(genome_fasta_file2, "fasta")
seq_length2 = sum([len(r) for r in records2])
# Load fastANI visual result
fastani_results = []
with open(fastani_visual_file) as f:
reader = csv.reader(f, delimiter="\t")
for row in reader:
# Ignore blank lines
if len(row) == 0:
continue
start1, end1 = int(row[6]), int(row[7])
start2, end2 = int(row[8]), int(row[9])
identity = float(row[2])
link1, link2 = (genome_name1, start1, end1), (genome_name2, start2, end2)
fastani_results.append((link1, link2, identity))
# Visualize conserved regions detected by fastANI
gv = GenomeViz(
fig_width=15,
fig_track_height=1.0,
feature_track_ratio=0.1,
track_align_type="center", # "left", "center", "right"
)
gv.set_scale_bar()
track1 = gv.add_feature_track(genome_name1, seq_length1)
track2 = gv.add_feature_track(genome_name2, seq_length2)
ColorCycler.set_cmap(cmap) # "hsv", "viridis", "jet", etc...
colors = ColorCycler.get_color_list(len(fastani_results))
min_identity = int(min([res[2] for res in fastani_results]))
for res, color in zip(fastani_results, colors):
link1, link2, identity = res
track1.add_feature(link1[1], link1[2], plotstyle="bigbox", facecolor=color)
track2.add_feature(link2[1], link2[2], plotstyle="bigbox", facecolor=color)
gv.add_link(
link1, link2, link_color, v=identity, vmin=min_identity, curve=curve
)
gv.set_colorbar([link_color], vmin=min_identity, bar_height=0.35)
gv.savefig(visualize_result_file)
def get_args() -> argparse.Namespace:
"""Get arguments
Returns:
argparse.Namespace: Argument values
"""
usage = "python visualize.py [fasta1] [fasta2] [fastANI visual file] [plot outfile]"
desc = "Visualize conserved regions detected by fastANI"
parser = argparse.ArgumentParser(usage=usage, description=desc)
parser.add_argument(
"fasta1",
type=Path,
help="Input genome fasta 1",
)
parser.add_argument(
"fasta2",
type=Path,
help="Input genome fasta 2",
)
parser.add_argument(
"visual_file",
type=Path,
help="fastANI visual result file",
)
parser.add_argument(
"plot_outfile",
type=Path,
help="Plot result outfile [*.jpg|*.png|*.svg|*.pdf]",
)
default_cmap = "gist_rainbow"
parser.add_argument(
"--cmap",
type=str,
help=f"Colormap for matplotlib (Default: '{default_cmap}')",
default=default_cmap,
metavar="",
)
default_link_color = "grey"
parser.add_argument(
"--link_color",
type=str,
help=f"Link color (Default: '{default_link_color}')",
default=default_link_color,
metavar="",
)
parser.add_argument(
"--curve",
help="Plot curved style link (Default: OFF)",
action="store_true",
)
return parser.parse_args()
if __name__ == "__main__":
main()