-
Notifications
You must be signed in to change notification settings - Fork 4
/
palmgrab.py
86 lines (77 loc) · 3.24 KB
/
palmgrab.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import os
import sys
import glob
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqIO import FastaIO
from Bio.SeqRecord import SeqRecord
def palmgrab(inputfa, palmout, rdrpout) :
PfinList = [] # palmprint fasta
RfinList = [] # rdrpcore fasta
# Resulttrunc.append(SeqRecord(Seq(splitseq), id=name))
if os.path.exists(inputfa):
#for input in glob.glob(os.path.join(os.path.join(inputfa,'tmfa'),'*.fa')) :
# Import 2 TMalign fasta output
seqList = []
fasta_sequences = SeqIO.parse(open(inputfa), 'fasta')
pdb_id = os.path.basename(inputfa.split('.fa')[0])
for fasta in fasta_sequences:
name, sequence = fasta.id, str(fasta.seq)
# Remove comments
sequence = str(fasta.seq).split('#')[0]
seqList.append(sequence)
# Determine palmprint (sequence[1]) coordinates in alignment
find=0
lind=0
# start coordinate
for i, ef in enumerate(seqList[1]) :
if ef != '-':
find = i
break
# end coordinate
#for i, el in enumerate(seqList[1][::1].split('\n')[-1]) :
for i, el in enumerate(reversed(seqList[1])) :
if el != '-' and el != ' ':
lind = i
break
PfinList.append(SeqRecord(Seq(seqList[0][find:len(seqList[1])-lind]),
id=pdb_id,
description= 'pp:' + str(find) + '-' + str(len(seqList[1])-lind)))
#print(PfinList[0])
# Determine rdrpcore extensions
#truncList = seqList[0].split(seqList[0][find:len(seqList[1])-lind])
if find >= 100 :
if lind >= 50 :
# Both upstream/downstream extension are available
RfinList.append(
SeqRecord(Seq(seqList[0][find-100:len(seqList[1])-lind+50]),
id=pdb_id,
description= 'rc:' + str(find-100) + '-' + str(len(seqList[1])-lind+50)))
else :
# Upstream extension available, Downstream not available
# use end of sequence
RfinList.append(
SeqRecord(Seq(seqList[0][find-100:len(seqList[1])]),
id=pdb_id,
description= 'rc:' + str(find-100) + '-' + str(len(seqList[1])) ))
else :
if lind >= 50:
# Upstream extension not available, Downstream is available
# use start of sequence
RfinList.append(
SeqRecord(Seq(seqList[0][:len(seqList[1])-lind+50]),
id=pdb_id,
description= 'rc:' + str(find) + '-' + str(len(seqList[1])-lind+50)))
else :
# Upstream/downstream extension not available
# use start/end of sequence
RfinList.append(
SeqRecord(Seq(seqList[0]),
id=pdb_id,
description= 'rc:' + str(find) + '-' + str(len(seqList[1]))))
#print(RfinList[0])
# Write output files
SeqIO.write(PfinList, str(palmout), "fasta")
SeqIO.write(RfinList, str(rdrpout), "fasta")
if __name__ == '__main__':
palmgrab(sys.argv[1],sys.argv[2],sys.argv[3])