-
Notifications
You must be signed in to change notification settings - Fork 0
/
experiment.py
192 lines (172 loc) · 8.98 KB
/
experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
import random
import numpy as np
import torch
from torch import optim
from torch.utils.data.dataloader import DataLoader
from torchvision.datasets import ImageFolder
from multi_resolution_segmentation import MultiResolutionSegmentation, train_multi_segmentation
from utils import train_transform, transform, to_device, SegmentationDataset, train_or_eval, eval_segmentation, is_regular_img
import argparse
import numpy as np
import heapq
import json
# Use different learning rates
lr_round_1 = 1e-3 # Initial learning rate used in DeepSolar paper (Yu et al. 2018)
lr_round_2 = 3e-5 # Similar to 1e-5 used in https://keras.io/guides/transfer_learning/
def seed_global_rngs(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
def random_search(train_loader: DataLoader,
val_loader: DataLoader,
ny_train_loader: DataLoader,
ny_val_loader: DataLoader,
ny_val_loader_seg: DataLoader,
num_trials: int = 10,
seed: int = 5670,
ny_num_epochs: int = 10):
rng = np.random.default_rng(seed)
# Set alpha to a higher value during training on DeepSolar dataset because it's unbalanced
# Draw from reciprocal distribution over [4, 16]
alpha1_values = 2**rng.uniform(2, 4, num_trials)
# Set alpha to a lower value during fine tuning
# Draw from reciprocal distribution over [1, 8]
alpha2_values = 2**rng.uniform(0, 3, num_trials)
# To generate combinations of endpoints:
# 1. For each trial, generate 4 random integers in {0, 1}
# 2. If all of them are 0, generate another 4 random integers
# 3. endpoints_values[t, k - 1] means that endpoint k should be used in trial t
endpoints_values = rng.integers(0, 2, (num_trials, 4))
for t in range(num_trials):
# Keep generating new random integers until at least one of them is 1
while not np.any(endpoints_values[t]):
endpoints_values[t] = rng.integers(0, 2, 4)
# Choose the 'nearest' or 'bilinear' upsampling mode uniformly at random
upsampling_mode_values = rng.choice(['nearest', 'bilinear'], num_trials)
# Store results and print them out at the end
results = []
# Start random search
for t in range(num_trials):
alpha1 = alpha1_values[t]
alpha2 = alpha2_values[t]
# Class constructor expects a list of endpoint numbers, not a bit vector
endpoints = [k + 1 for k in range(4) if endpoints_values[t, k]]
# Choose an upsampling mode
upsampling_mode = upsampling_mode_values[t]
# Re-seed the global RNGs before each trial to improve reproducibility
seed_global_rngs(seed)
# Create and train model
model = to_device(MultiResolutionSegmentation(pos_class_weight=alpha1, endpoints=endpoints))
print(f'Trial {t}: alpha1 = {alpha1}, alpha2 = {alpha2}, endpoints = {endpoints}')
# Use RMSProp parameters from the DeepSolar paper (alpha = second moment discount rate)
# except for learning rate decay and epsilon
optimizer = optim.RMSprop(model.parameters(), alpha=0.9, momentum=0.9, lr=lr_round_1)
# Round 1
round_1_results, round_1_training_history = \
train_multi_segmentation(model, train_loader, val_loader, optimizer, num_epochs=1)
# Round 2
model.set_alpha(alpha2)
optimizer = optim.RMSprop(model.parameters(), alpha=0.9, momentum=0.9, lr=lr_round_2)
round_2_results, round_2_training_history = \
train_multi_segmentation(model, ny_train_loader, ny_val_loader, optimizer, num_epochs=ny_num_epochs)
round_2_results.update(eval_segmentation(model, ny_val_loader_seg))
results.append({
'trial': t,
'alpha1': alpha1,
'alpha2': alpha2,
'endpoints': endpoints,
'upsampling_mode': upsampling_mode,
'model': model.cpu(), # Get it off the GPU to conserve memory
'round_1': round_1_results,
'round_1_training_history': round_1_training_history,
'round_2': round_2_results,
'round_2_training_history': round_2_training_history
})
return results
def print_results(results, round):
for result in results:
print(f"Trial {result['trial']}: alpha1 = {result['alpha1']}, alpha2 = {result['alpha2']}, endpoints = {result['endpoints']}")
print(f"Round {round}")
stats = result[round]
if 'precision' in stats:
print(f"Precision: {stats['precision']:.2%}")
if 'recall' in stats:
print(f"Recall: {stats['recall']:.2%}")
if 'f1' in stats:
print(f"F1: {stats['f1']:.2%}")
if 'avg_jaccard' in stats:
print(f"Average Jaccard similarity: {stats['avg_jaccard']:.2%}")
if 'avg_precision' in stats:
print(f"Average precision: {stats['avg_precision']:.2%}")
if 'avg_recall' in stats:
print(f"Average recall: {stats['avg_recall']:.2%}")
print()
def log_stats(train_results, test_results, log_file):
# Make a copy of the results dicts minus the model key
results_without_models = [{k: v for k, v in res.items() if k != 'model'} for res in train_results]
with open(log_file, 'w') as f:
json.dump({
'train_results': results_without_models,
'test_results': test_results
}, f, indent=4)
def parse_args():
parser = argparse.ArgumentParser(description='Train and store the model')
parser.add_argument('-o', '--out', metavar='model.pt', default='random_search.pt')
parser.add_argument('--logfile', metavar='log_file.json', default='random_search.json')
parser.add_argument('-n', '--num-trials', type=int, default=10)
parser.add_argument('--seed', type=int, default=5670)
parser.add_argument('-b', '--batch-size', type=int, default=48)
parser.add_argument('-e', '--num-epochs', type=int, default=10)
# parser.add_argument('-m', '--mixed-precision', action='store_true')
parser.add_argument('--train-dir', default='./SPI_train/')
parser.add_argument('--val-dir', default='./SPI_val/')
parser.add_argument('--ny-train-dir', default='./NY_dataset/train/')
parser.add_argument('--ny-val-dir', default='./NY_dataset/val/')
parser.add_argument('--ny-test-dir', default='./NY_dataset/eval/')
return parser.parse_args()
def main():
args = parse_args()
# Use the original DeepSolar dataset for the first round of transfer learning
train_set = ImageFolder(root=args.train_dir, transform=train_transform)
train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=4)
val_set = ImageFolder(root=args.val_dir, transform=transform)
val_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=True, num_workers=4)
# Use our NY dataset for the second round
ny_train_set = ImageFolder(root=args.ny_train_dir, transform=train_transform)
ny_train_loader = DataLoader(ny_train_set, batch_size=args.batch_size, shuffle=True, num_workers=4)
ny_val_set = ImageFolder(root=args.ny_val_dir, transform=transform, is_valid_file=is_regular_img)
ny_val_loader = DataLoader(ny_val_set, batch_size=args.batch_size, shuffle=True, num_workers=4)
ny_val_set_seg = SegmentationDataset(args.ny_val_dir)
ny_val_loader_seg = DataLoader(ny_val_set_seg, batch_size=args.batch_size, shuffle=True, num_workers=4)
train_results = random_search(train_loader,
val_loader,
ny_train_loader,
ny_val_loader,
ny_val_loader_seg,
num_trials=args.num_trials,
seed=args.seed,
ny_num_epochs=args.num_epochs)
# Select model with best performance on NY dataset
best_trial = max(train_results, key=lambda elt: elt['round_2']['avg_jaccard'])
best_model = best_trial['model']
# Run test round
print("Now testing the best model on the test set:\n")
ny_test_set = ImageFolder(root=args.ny_test_dir, transform=transform, is_valid_file=is_regular_img)
ny_test_loader = DataLoader(ny_test_set, batch_size=args.batch_size, shuffle=True, num_workers=4)
ny_test_set_seg = SegmentationDataset(args.ny_test_dir)
ny_test_loader_seg = DataLoader(ny_test_set_seg, batch_size=args.batch_size, shuffle=True, num_workers=4)
# Move back to GPU
best_model = to_device(best_model)
cls_metrics = train_or_eval(best_model, ny_test_loader)
seg_metrics = eval_segmentation(best_model, ny_test_loader_seg)
# Store these metrics in separate part of JSON
wanted_keys = {'trial', 'alpha1', 'alpha2', 'endpoints', 'upsampling_mode'}
test_results = {
**{k: v for k, v in best_trial.items() if k in wanted_keys},
**cls_metrics,
**seg_metrics
}
best_model.to_save_file(args.out)
log_stats(train_results, test_results, args.logfile)
if __name__ == '__main__':
main()