-
Notifications
You must be signed in to change notification settings - Fork 3
/
read_results_DE.py
102 lines (83 loc) · 4.65 KB
/
read_results_DE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import pickle
import os
import numpy as np
save_path = "../symsim_scVI/symsim_result/DE/"
results_DE_scVI_A_all = pickle.load(open(os.path.join(save_path, "results0.dic"), "rb"))
results_DE_scVI_B_all = pickle.load(open(os.path.join(save_path, "results1.dic"), "rb"))
results_DE_scVI_AB_all = pickle.load(open(os.path.join(save_path, "results2.dic"), "rb"))
results_DE_scANVI_all = pickle.load(open(os.path.join(save_path, "results3.dic"), "rb"))
for rep in range(50):
results_DE_scVI_A = pickle.load(open(os.path.join(save_path, "results0.%i.dic"%rep), "rb"))
results_DE_scVI_B = pickle.load(open(os.path.join(save_path, "results1.%i.dic"%rep), "rb"))
results_DE_scVI_AB = pickle.load(open(os.path.join(save_path, "results2.%i.dic"%rep), "rb"))
results_DE_scANVI = pickle.load(open(os.path.join(save_path, "results3.%i.dic"%rep), "rb"))
for x in results_DE_scVI_A.keys():
for y in results_DE_scVI_A[x].keys():
results_DE_scVI_A_all[x][y].append(results_DE_scVI_A[x][y][0])
results_DE_scVI_B_all[x][y].append(results_DE_scVI_B[x][y][0])
results_DE_scVI_AB_all[x][y].append(results_DE_scVI_AB[x][y][0])
results_DE_scANVI_all[x][y].append(results_DE_scANVI[x][y][0])
for k in results_DE_scVI_A.keys():
for res_type in results_DE_scVI_A[k].keys():
print(k, res_type)
print("results_DE_scVI_A", np.mean(results_DE_scVI_A_all[k][res_type]),
"+-", np.std(results_DE_scVI_A_all[k][res_type]))
print("results_DE_scVI_B", np.mean(results_DE_scVI_B_all[k][res_type]),
"+-", np.std(results_DE_scVI_B_all[k][res_type]))
print("results_DE_scVI_AB", np.mean(results_DE_scVI_AB_all[k][res_type]),
"+-", np.std(results_DE_scVI_AB_all[k][res_type]))
print("results_DE_scANVI", np.mean(results_DE_scANVI_all[k][res_type]),
"+-", np.std(results_DE_scANVI_all[k][res_type]))
scVI_res = {'scVI_A':{},'scVI_B':{},'scVI_AB':{},'scANVI':{}}
for i, type in enumerate(['scVI_A','scVI_B','scVI_AB','scANVI']):
X = pickle.load(open(os.path.join(save_path, "results%i.dic"%i), "rb"))
scVI_res[type] = X
for rep in range(30):
X = pickle.load(open(os.path.join(save_path, "results%i.%i.dic" % (i,rep)), "rb"))
for x in X.keys():
for y in X[x].keys():
scVI_res[type][x][y].append(X[x][y][0])
for type in scVI_res.keys():
for comparison in scVI_res[type].keys():
for res_type in scVI_res[type][comparison].keys():
print("results_scVI %s %s %s"%(type, comparison, res_type), np.mean(scVI_res[type][comparison][res_type]),
"+-", np.std(scVI_res[type][comparison][res_type]))
from sklearn.metrics import roc_auc_score
from scipy.stats import spearmanr
from scipy.stats import kendalltau
import os
import numpy as np
import pandas as pd
save_path = "../symsim_scVI/symsim_result/DE/"
os.chdir('/data/yosef2/users/chenling/HarmonizationSCANVI')
def eval_de(log_fold_change, logFC):
"""
:param log_fold_change: groundtruth
:param bayes_f: non-log Bayes Factor
:return:
"""
auc_1 = roc_auc_score(np.abs(log_fold_change) >= 0.6, np.abs(logFC))
auc_2 = roc_auc_score(np.abs(log_fold_change) >= 0.8, np.abs(logFC))
spear = spearmanr(logFC, log_fold_change)[0]
kend = kendalltau(logFC, log_fold_change)[0]
return auc_1, auc_2, np.abs(spear), np.abs(kend)
theoretical_FC = pd.read_csv(os.path.join(save_path, "theoreticalFC.csv"),
sep=",", index_col=0, header=0)
edgeR_res={'.scVI':{},'.scANVI':{},'':{}}
for type in ['.scVI','.scANVI','']:
edgeR_res[type] = {'1_2':{},'2_3':{},'2_4':{},'4_5':{}}
for comparison in ['1_2','2_3','2_4','4_5']:
edgeR_res[type][comparison] = {'A': {}, 'B': {}, 'AB': {}}
for batch in ['A', 'B', 'AB']:
edgeR_res[type][comparison][batch] = {'R1':[],'R2':[],'S':[],'K':[]}
for rep in range(50):
X = pd.read_csv(os.path.join(save_path, 'EdgeR/%s%s.%i.%s.edgeR.csv') % (batch, type, (rep+1), comparison))
stats = eval_de(theoretical_FC[comparison.replace('_', '')], X['logFC'])
for i,x in enumerate(['R1','R2','S','K']):
edgeR_res[type][comparison][batch][x].append(stats[i])
for type in edgeR_res.keys():
for comparison in edgeR_res[type].keys():
for batch in edgeR_res[type][comparison].keys():
for res_type in edgeR_res[type][comparison][batch].keys():
print("results_EdgeR%s %s %s %s"%(type, comparison, batch, res_type), np.mean(edgeR_res[type][comparison][batch][res_type]),
"+-", np.std(edgeR_res[type][comparison][batch][res_type]))