-
Notifications
You must be signed in to change notification settings - Fork 0
/
zscore.py
89 lines (64 loc) · 2.16 KB
/
zscore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env python
# coding: utf-8
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('-denovo','--denovo',type=str,default='None',help='denovo motif_number')
parser.add_argument('-pse','--pse',type=float,default=0.5,help='pseudocount')
args=parser.parse_args()
motif=args.denovo
pse=args.pse
import math
f = open('data/te_tf_mat_TElabel_cortex.txt')
data3 = f.read()
f.close()
tename= data3.split('\n')
import csv
import numpy as np
with open('out/denovo_'+str(motif)+'_TE_log2fold_count_motif_detail.bed') as f:
reader = csv.reader(f, delimiter='\t')
enrich = [row for row in reader]
enrich=np.array(enrich)
enrich=enrich.astype('float32')
enrich=enrich.T
score=[]
for i in range(len(tename)):
if pse>0:
score.append(math.log2((enrich[i][0]/enrich[i][1])/(enrich[i][2]/enrich[i][3])+pse))
else:
if enrich[i][0]!=0:#no pseudocount
score.append(math.log2((enrich[i][0]/enrich[i][1])/(enrich[i][2]/enrich[i][3])))
else:
score.append(float('nan'))
score_TE=score
control=[]
for j in range(1,51):
with open('out/denovo_'+str(motif)+'_TE_log2fold_count_motif_control_'+str(j)+'_detail.bed') as f:
reader = csv.reader(f, delimiter='\t')
enrich = [row for row in reader]
enrich=np.array(enrich)
enrich=enrich.astype('float32')
enrich=enrich.T
score=[]
for i in range(len(tename)):
if pse>0:
score.append(math.log2((enrich[i][0]/enrich[i][1])/(enrich[i][2]/enrich[i][3])+pse))
else:
if enrich[i][0]!=0:#no pseudocount
score.append(math.log2((enrich[i][0]/enrich[i][1])/(enrich[i][2]/enrich[i][3])))
else:
score.append(float('nan'))
control.append(score)
control=np.array(control)
control=control.T
rz=[]
for i in range(len(tename)):
if len(control[i])>0:
q75, q25 = np.nanpercentile(control[i], [75 ,25])
iqr = q75 - q25
rz.append((score_TE[i]-np.nanmedian(control[i]))/(iqr/1.3489))#robust zscore
else:
rz.append(None)
f = open('denovo_'+str(motif)+'_rz_score_enrichment.txt', 'w')
for x in rz:
f.write(str(x) + "\n")
f.close()