-
Notifications
You must be signed in to change notification settings - Fork 0
/
IceLogoPrepImroved.py
101 lines (74 loc) · 3.02 KB
/
IceLogoPrepImroved.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import warnings; warnings.simplefilter('ignore')
import requests as r
from Bio import SeqIO
from io import StringIO
from Bio.Seq import Seq
from time import sleep
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import os
import logging
import argparse
from tqdm import tqdm
session = r.Session()
retry = Retry(connect=3, backoff_factor=0.5)
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)
logging.basicConfig(level=logging.INFO)
#Define CL arguments
argParser = argparse.ArgumentParser()
argParser.add_argument("input", help= "Input file name")
argParser.add_argument("-o", "--output", metavar="DIR", help= "Full path to output directory")
argParser.add_argument("-n", "--name", help="Output file name (Default: IceLogoSeqs.txt)", default="IceLogoSeqs.txt")
argParser.add_argument("-a", "--AA", help="center AA; if variable, leave empty")
#Parse arguments
args = argParser.parse_args()
input = vars(args)["input"]
output = vars(args)["output"]
name = vars(args)["name"]
AA = vars(args)["AA"]
if AA not in ["A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","X","Y", None]:
raise ValueError("Not a valid amino acid")
if not output:
output = os.path.dirname(__file__)
logging.info("IceLogo Seq preparation!")
logging.info("Input file: {}".format(input))
logging.info("Output directory: {}".format(output))
logging.info("Writing IceLogo sequences to {}".format(output))
os.chdir(output)
with open(input) as fn:
with open(name, "w") as of:
for line in tqdm(fn.readlines()[1:]):
linelist = line.split("\t")
protein = linelist[0]
position = int(linelist[1])
#Get Uniprot sequence for protein
baseUrl="http://www.uniprot.org/uniprot/"
currentUrl=baseUrl+protein+".fasta"
sleep(1)
response = session.post(currentUrl)
cData=''.join(response.text)
Seq=StringIO(cData)
pSeq=SeqIO.parse(Seq,'fasta')
for record in pSeq:
Uniprotseq = record.seq
if AA:
if Uniprotseq[position-1] == AA:
cheat = 10*"X" + str(Uniprotseq) + 10*"X"
newpos = position + 9
prefix = cheat[newpos-8:newpos]
suffix = cheat[newpos+1:newpos+9]
mod_res = cheat[newpos]
alignment = prefix + mod_res + suffix
of.write(alignment + "\n")
else:
cheat = 10*"X" + str(Uniprotseq) + 10*"X"
newpos = position + 9
prefix = cheat[newpos-8:newpos]
suffix = cheat[newpos+1:newpos+9]
mod_res = cheat[newpos]
alignment = prefix + mod_res + suffix
of.write(alignment + "\n")
of.close()
fn.close()