-
Notifications
You must be signed in to change notification settings - Fork 0
/
editdistance.py
57 lines (49 loc) · 1.26 KB
/
editdistance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import string
import sys
import re
def GetSequence(filename):
f=open (filename, "r")
genome=''
while True:
s=f.readline()
try:
if len(s)>0:
s=s.strip()
genome=''.join([genome, '+'])
genome=''.join([genome, s])
else:
break
except:
break
sequences=genome.split('+')
del sequences[0]
f.close()
#print sequences
return sequences
def getMatrix_Alignment(s):
s1=s[0]
s2=s[1]
rows=len(s[0])
cols=len(s[1])
#print rows,cols
matrix=[[0 for i in range(cols+1)] for j in range(rows+1)]
for i in range(0,rows+1):
matrix[i][0]=1*i
for j in range(0,cols+1):
matrix[0][j]=1*j
for i in range(1,rows+1):
for j in range(1,cols+1):
m=0
if s1[i-1] ==s2[j-1]:
m=matrix[i-1][j-1]+0
if s1[i-1] != s2[j-1]:
m=matrix[i-1][j-1]+1
L=[ matrix[i-1][j]+1, matrix[i][j-1]+1,m]
matrix[i][j]=min(L)
return matrix[-1][-1]
def main():
file1=sys.argv[1]
protein=GetSequence(file1)
score=getMatrix_Alignment(protein)
print score
main()