-
Notifications
You must be signed in to change notification settings - Fork 0
/
find_sv_type.py
72 lines (51 loc) · 2.41 KB
/
find_sv_type.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#! /usr/bin/evn python
import sys
import os
import highest_global_alignment
##This script can parse a line from the VCF and output the variant type field SVTYPE##
#determines if added characters are arbitrary or duplications
##NOTE: if there is a duplication and another structural variant in the alternate
##this will classify the whole variant as a duplication
def if_insertion_or_duplication(reference_alignment, alternate_alignment):
insertion_index = reference_alignment.find("-")
current_index = insertion_index
end = False
while end == False:
if len(reference_alignment) == current_index + 1 or reference_alignment[current_index] != "-":
end = True
else:
current_index += 1
insertion = alternate_alignment[insertion_index:current_index]
possible_duplications = [insertion]
index = 1
while index < len(insertion):
if insertion[:index] in insertion[index:]:
possible_duplications.append(insertion[:index])
index += 1
for duplication in possible_duplications:
if reference_alignment[(insertion_index - len(duplication)):insertion_index] == duplication:
return "DUP"
elif reference_alignment[current_index:(current_index + len(duplication))] == duplication:
return "DUP"
return "INS"
#determines whether characters that differ are an inversion
def if_inversion_or_mismatch(reference_alignment, alternate_alignment):
difference_indices = [i for i in range(len(reference_alignment)) if reference_alignment[i] != alternate_alignment[i]]
reference_segment = reference_alignment[difference_indices[0]:difference_indices[-1]+1]
alternate_segment = alternate_alignment[difference_indices[0]:difference_indices[-1]+1]
if reference_segment == alternate_segment[::-1]:
return "INV"
else:
return "BND"
def find_sv_type(reference, alternate):
alignment = highest_global_alignment.align(reference, alternate)
reference_alignment = alignment[0]
alternate_alignment = alignment[1]
if "-" in reference_alignment and "-" not in alternate_alignment:#possible insertion or duplication
return if_insertion_or_duplication(reference_alignment, alternate_alignment)
elif "-" not in reference_alignment and "-" in alternate_alignment:#deletion
return "DEL"
elif "-" not in reference_alignment and "-" not in alternate_alignment:#may be an inversion or simple mismatch
return if_inversion_or_mismatch(reference_alignment, alternate_alignment)
else:#super complicated and weird alignment
return "BND"