-
Notifications
You must be signed in to change notification settings - Fork 0
/
bagging_logistic_regression_lasso.py
68 lines (59 loc) · 2.78 KB
/
bagging_logistic_regression_lasso.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import grouping
import load_patient_info
import numpy as np
from tqdm import tqdm
from sklearn.linear_model import LogisticRegression
matching = grouping.matching
matching_keys = matching.keys()
patients_info = load_patient_info.patients_info
#######################################################################################################################
x_train_file_names_positive = []
x_test_file_names_positive = []
for i in range(len(matching_keys)):
if len(x_train_file_names_positive) < 985:
x_train_file_names_positive.append(matching_keys[i])
else:
x_test_file_names_positive.append((matching_keys[i]))
x_train_file_names_negative = []
for i in range(200):
for item in x_train_file_names_positive:
x_train_file_names_negative.append(matching[item][i])
x_test_file_names_negative = []
for i in range(200):
for item in x_test_file_names_positive:
x_test_file_names_negative.append(matching[item][i])
test = []
for patient in x_test_file_names_positive:
test.append(patients_info[patient])
for patient in x_test_file_names_positive:
for non_patient in matching[patient]:
test.append(patients_info[non_patient])
test = np.array(test)
#######################################################################################################################
for fold_num in range(5):
print("start " + str(fold_num + 1) + " fold")
tmp_validation_names_positive = x_train_file_names_positive[fold_num * 197:(fold_num + 1) * 197]
tmp_training_names_positive = \
[item for item in x_train_file_names_positive if item not in tmp_validation_names_positive]
validation_X = []
for patient in tmp_validation_names_positive:
validation_X.append(patients_info[patient])
for patient in tmp_validation_names_positive:
for non_patient in matching[patient]:
validation_X.append(patients_info[non_patient])
this_fold_test_result = np.zeros(len(test))
this_fold_validation_result = np.zeros(197*201)
for j in tqdm(range(200)):
X = []
for item in tmp_training_names_positive:
X.append(patients_info[item])
for item in tmp_training_names_positive:
X.append(patients_info[matching[item][j]])
y = np.concatenate((np.zeros(788) + 1, np.zeros(788)), axis=0)
X = np.array(X)
logistic = LogisticRegression(penalty='l1')
logistic.fit(X, y)
this_fold_test_result += logistic.predict_proba(test)[:, 1]
this_fold_validation_result += logistic.predict_proba(validation_X)[:, 1]
np.savetxt("./result/bagging_logistic_regression_lasso/fold_" + str(fold_num+1) + "_test", this_fold_test_result)
np.savetxt("./result/bagging_logistic_regression_lasso/fold_" + str(fold_num+1) + "_validation", this_fold_validation_result)