-
Notifications
You must be signed in to change notification settings - Fork 0
/
train_sea.py
154 lines (123 loc) · 5.57 KB
/
train_sea.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import numpy as np
import pandas as pd
import os
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.utils import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix
import scipy
import tensorflow as tf
from keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping
from keras.preprocessing.image import img_to_array, load_img, ImageDataGenerator
from keras.optimizers import SGD
from keras.applications.imagenet_utils import decode_predictions
from keras.layers import Activation, Dense, GlobalAveragePooling2D, GlobalMaxPooling2D
from keras.models import Model
from keras.utils import to_categorical
from keras import optimizers, layers
from keras import backend as K
from train_util import read_images, load_xy, get_checkpoint_tensorboard, create_model_grayscale, get_fresh_weights, base_output, dense1_linear_output, train_validate_test_split
#from efficientnet import EfficientNetB4
import efficientnet.keras as efn
new_shape = (380, 380, 3)
IMG_SHAPE = (380, 380)
tensorboard_path = './tensorboard_best_salmon_sea_not_smolt_batch_16_21_april_2020_v1.1.0'
checkpoint_path = './checkpoints_best_salmon_sea_not_smolt_batch_16_21_april_2020_v1.1.0/salmon_scale_efficientnetB4.{epoch:03d}-{val_loss:.2f}.hdf5'
def do_train_sea():
global new_shape, tensorboard_path, checkpoint_path, to_predict, dataset_size_selected
os.environ["CUDA_VISIBLE_DEVICES"]="0"
age = []
a_batch_size = 12
rb_imgs, all_sea_age, all_smolt_age, all_farmed_class, all_spawn_class, all_filenames = load_xy()
uten_ukjent = len(all_sea_age) - all_sea_age.count(-1.0)
rb_imgs2 = np.empty(shape=(uten_ukjent,)+new_shape)
unique, counts = np.unique(all_sea_age, return_counts=True)
print("age distrib:"+str( dict(zip(unique, counts)) ))
all_sea_age2 = []
found_count = 0
all_filenames2 = []
for i in range(0, len(all_sea_age)):
if all_sea_age[i] > -1:
rb_imgs2[found_count] = rb_imgs[i]
all_sea_age2.append(all_sea_age[i])
found_count += 1
all_filenames2.append(all_filenames[i])
assert found_count == uten_ukjent
age = all_sea_age2
rb_imgs = rb_imgs2
early_stopper = EarlyStopping(patience=20)
train_datagen = ImageDataGenerator(
zca_whitening=True,
width_shift_range=5,
height_shift_range=5, #20,
zoom_range=0.,
rotation_range=360,
horizontal_flip=False,
vertical_flip=True,
rescale=1./255)
train_idx, val_idx, test_idx = train_validate_test_split( range(0, len(rb_imgs)) )
train_rb_imgs = np.empty(shape=(len(train_idx),)+new_shape)
train_age = []
for i in range(0, len(train_idx)):
train_rb_imgs[i] = rb_imgs[train_idx[i]]
train_age.append(age[train_idx[i]])
val_rb_imgs = np.empty(shape=(len(val_idx),)+new_shape)
val_age = []
for i in range(0, len(val_idx)):
val_rb_imgs[i] = rb_imgs[val_idx[i]]
val_age.append(age[val_idx[i]])
test_rb_imgs = np.empty(shape=(len(test_idx),)+new_shape)
test_age = []
test_age_names = []
for i in range(0, len(test_idx)):
test_rb_imgs[i] = rb_imgs[test_idx[i]]
test_age.append(age[test_idx[i]])
test_age_names.append(all_filenames2[test_idx[i]])
train_age = np.vstack(train_age)
val_age = np.vstack(val_age)
test_age = np.vstack(test_age)
val_rb_imgs = np.multiply(val_rb_imgs, 1./255)
test_rb_imgs = np.multiply(test_rb_imgs, 1./255)
train_generator = train_datagen.flow(train_rb_imgs, train_age, batch_size= a_batch_size)
rgb_efficientNetB4 = efn.EfficientNetB4(include_top=False, weights='imagenet', input_shape=new_shape, classes=2)
z = dense1_linear_output( rgb_efficientNetB4 )
scales = Model(inputs=rgb_efficientNetB4.input, outputs=z)
learning_rate=0.00007
adam = optimizers.Adam(lr=learning_rate)
for layer in scales.layers:
layer.trainable = True
scales.compile(loss='mse', optimizer=adam, metrics=['accuracy','mse', 'mape'] )
tensorboard, checkpointer = get_checkpoint_tensorboard(tensorboard_path, checkpoint_path)
#only for classification
classWeight = None
history_callback = scales.fit_generator(train_generator,
steps_per_epoch=1600,
epochs=150,
callbacks=[early_stopper, tensorboard, checkpointer],
validation_data=(val_rb_imgs, val_age),
class_weight=classWeight)
test_metrics = scales.evaluate(x=test_rb_imgs, y=test_age)
print("test metric:"+str(scales.metrics_names))
print("test metrics:"+str(test_metrics))
print("precision, recall, f1")
y_pred_test = scales.predict(test_rb_imgs, verbose=1)
y_pred_test_bool = np.argmax(y_pred_test, axis=1)
y_true_bool = np.argmax(test_age, axis=1)
#np.argmax inverse of to_categorical
argmax_test = np.argmax(test_age, axis=1)
unique, counts = np.unique(argmax_test, return_counts=True)
print("test ocurrence of each class:"+str(dict(zip(unique, counts))))
print("classification_report")
print(classification_report(y_true_bool, y_pred_test_bool))
print("confusion matrix")
print(str(confusion_matrix(y_true_bool, y_pred_test_bool)))
print("*** y_test****")
print(y_pred_test.shape)
df_output = pd.DataFrame(columns={'y', 'y_hat', 'sea_name' })
df_output['sea_name'] = test_age_names
df_output['y_hat'] = y_pred_test
df_output['y'] = test_age
df_output.to_csv(tensorboard_path+'/y_pred_sea1.txt', index=False, sep=' ')
if __name__ == '__main__':
do_train_sea()