-
Notifications
You must be signed in to change notification settings - Fork 3
/
icu_admission_prediction.py
116 lines (85 loc) · 3.1 KB
/
icu_admission_prediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# -*- coding: utf-8 -*-
"""ICU_admission_prediction.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1fKMq_5tHt6CrD3z5X3qdScMTQvRxMqRN
"""
import numpy as np
import pandas as pd
data = pd.read_excel('/content/Kaggle_Sirio_Libanes_ICU_Prediction.xlsx')
data.head()
data.shape
data.describe()
data.isnull().sum()
"""# Data Preprocessing"""
for i in data.columns:
if type(data[i].iloc[0]) == str:
factor = pd.factorize(data[i])
data[i] = factor[0]
"""# Implementing Model"""
from sklearn.model_selection import train_test_split
#Independent Vector
X = data[list(data.columns)[:-1]].values
#Dependent Vector
y = data[data.columns[-1]].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, stratify=y)
"""# Data Normalization"""
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(np.nan_to_num(X_train))
X_test = scaler.transform(np.nan_to_num(X_test))
from sklearn import metrics
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import precision_recall_fscore_support
import matplotlib.pyplot as plt
import seaborn as sns
sns.countplot('GENDER', hue='ICU', data=data)
sns.countplot('AGE_ABOVE65', hue='ICU', data=data)
"""# Logistic Regression"""
from sklearn.linear_model import LogisticRegression
logreg=LogisticRegression(max_iter=1000)
logreg.fit(X_train,y_train)
y_pred=logreg.predict(X_test)
acc = metrics.accuracy_score(y_test, y_pred)
print('accuracy ' +str(acc))
#print('average auc ' +str(roc_auc["average"]))
prfs = precision_recall_fscore_support(y_test, y_pred, labels = [0,1])
fpr, tpr, _ = roc_curve(y_test, y_pred)
roc_auc = auc(fpr, tpr)
print('precision:',prfs[0] )
print('recall', prfs[1])
print('fscore', prfs[2])
"""# Decision Tree Classifier"""
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()
# Train Decision Tree Classifer
clf = clf.fit(X_train,y_train)
#Predict the response for test dataset
y_pred = clf.predict(X_test)
acc = metrics.accuracy_score(y_test, y_pred)
from sklearn.metrics import roc_curve, auc
print('accuracy ' +str(acc))
#print('average auc ' +str(roc_auc["average"]))
prfs = precision_recall_fscore_support(y_test, y_pred, labels = [0,1])
fpr, tpr, _ = roc_curve(y_test, y_pred)
roc_auc = auc(fpr, tpr)
print('precision:',prfs[0] )
print('recall', prfs[1])
print('fscore', prfs[2])
"""# Random Forest Classifier"""
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_recall_fscore_support
model = RandomForestClassifier(n_jobs=64,n_estimators=200,criterion='entropy',oob_score=True)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
acc = metrics.accuracy_score(y_test, y_pred)
print('accuracy ' +str(acc))
#print('average auc ' +str(roc_auc["average"]))
prfs = precision_recall_fscore_support(y_test, y_pred, labels = [0,1])
fpr, tpr, _ = roc_curve(y_test, y_pred)
roc_auc = auc(fpr, tpr)
print('precision:',prfs[0] )
print('recall', prfs[1])
print('fscore', prfs[2])