-
Notifications
You must be signed in to change notification settings - Fork 2
/
immune_composition.py
78 lines (51 loc) · 2.52 KB
/
immune_composition.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
import numpy as np
import pandas as pd
from lifelines import CoxPHFitter
from statsmodels.stats.multitest import multipletests
#-----------------------------------------
#Recurrence
cellprevalence_path = "intermediate_data/cellprevalence_df.csv"
cellprevalence_df = pd.read_csv(cellprevalence_path, index_col="ID")
#Get a list of all the cell types to test
each_celltype = cellprevalence_df.columns[:15]
univariate_results = []
for celltype in each_celltype:
this_cell = cellprevalence_df[[celltype, "Recurrence_time", "Recurrence"]]
#Perform univariate Cox regression for this cell type
cox = CoxPHFitter()
cox.fit(this_cell, duration_col="Recurrence_time", event_col="Recurrence")
summary_df = cox.summary
coefficients = summary_df["coef"].values[0]
hazards = cox.hazard_ratios_[0]
p_values = summary_df["p"].values[0]
univariate_results.append([celltype, coefficients, hazards, p_values])
univariate_results_df = pd.DataFrame(univariate_results, columns=["Celltype", "Coef", "HR", "P"])
univariate_results_df.sort_values(by="P", ascending=True, inplace=True)
p_values = univariate_results_df["P"]
corrected = multipletests(p_values, method="fdr_bh")[1]
univariate_results_df["BH-Corrected FDR"] = corrected
univariate_results_df.to_csv("results/immune_composition_recurrence.csv", index=False)
#-----------------------------------------
#Recurrence
cellprevalence_path = "intermediate_data/cellprevalence_df.csv"
cellprevalence_df = pd.read_csv(cellprevalence_path, index_col="ID")
#Get a list of all the cell types to test
each_celltype = cellprevalence_df.columns[:15]
univariate_results = []
for celltype in each_celltype:
this_cell = cellprevalence_df[[celltype, "Survival_time", "Survival"]]
#Perform univariate Cox regression for this cell type
cox = CoxPHFitter()
cox.fit(this_cell, duration_col="Survival_time", event_col="Survival")
summary_df = cox.summary
coefficients = summary_df["coef"].values[0]
hazards = cox.hazard_ratios_[0]
p_values = summary_df["p"].values[0]
univariate_results.append([celltype, coefficients, hazards, p_values])
univariate_results_df = pd.DataFrame(univariate_results, columns=["Celltype", "Coef", "HR", "P"])
univariate_results_df.sort_values(by="P", ascending=True, inplace=True)
p_values = univariate_results_df["P"]
corrected = multipletests(p_values, method="fdr_bh")[1]
univariate_results_df["BH-Corrected FDR"] = corrected
univariate_results_df.to_csv("results/immune_composition_survival.csv", index=False)