-
Notifications
You must be signed in to change notification settings - Fork 14
/
defaults.py
412 lines (282 loc) · 11.1 KB
/
defaults.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved.
"""Configs."""
from fvcore.common.config import CfgNode
# -----------------------------------------------------------------------------
# Config definition
# -----------------------------------------------------------------------------
_C = CfgNode()
# ---------------------------------------------------------------------------- #
# Training options.
# ---------------------------------------------------------------------------- #
_C.TRAIN = CfgNode()
# If True Train the model, else skip training.
_C.TRAIN.ENABLE = True
# Dataset.
_C.TRAIN.DATASET = "IC_dataset"
# Total mini-batch size.
_C.TRAIN.BATCH_SIZE = 16
# Evaluate model on test data every eval period epochs.
_C.TRAIN.EVAL_PERIOD = 1
# Save model checkpoint every checkpoint period epochs.
_C.TRAIN.CHECKPOINT_PERIOD = 1
# Resume training from the latest checkpoint in the output directory.
_C.TRAIN.AUTO_RESUME = False
# Path to the checkpoint to load the initial weight.
_C.TRAIN.CHECKPOINT_FILE_PATH = "vit_b_16_plus_240-laion400m_e32-699c4b84.pt" #"./configs/MViTv2_T_in1k.pyth" # ./configs/MViTv2_B_in1k.pyth
# If True, reset epochs when loading checkpoint.
_C.TRAIN.CHECKPOINT_EPOCH_RESET = True
# If True, use FP16 for activations
_C.TRAIN.MIXED_PRECISION = False
# ---------------------------------------------------------------------------- #
# Augmentation options.
# ---------------------------------------------------------------------------- #
_C.AUG = CfgNode()
# Number of repeated augmentations to used during training.
# If this is greater than 1, then the actual batch size is
# TRAIN.BATCH_SIZE * AUG.NUM_SAMPLE.
_C.AUG.NUM_SAMPLE = 1
# Not used if using randaug.
_C.AUG.COLOR_JITTER = 0.4
# RandAug parameters.
_C.AUG.AA_TYPE = "rand-m9-n6-mstd0.5-inc1"
# Interpolation method.
_C.AUG.INTERPOLATION = "bicubic"
# Probability of random erasing.
_C.AUG.RE_PROB = 0.25
# Random erasing mode.
_C.AUG.RE_MODE = "pixel"
# Random erase count.
_C.AUG.RE_COUNT = 1
# Do not random erase first (clean) augmentation split.
_C.AUG.RE_SPLIT = False
# ---------------------------------------------------------------------------- #
# MipUp options.
# ---------------------------------------------------------------------------- #
_C.MIXUP = CfgNode()
# Whether to use mixup.
_C.MIXUP.ENABLE = False
# Mixup alpha.
_C.MIXUP.ALPHA = 0.8
# Cutmix alpha.
_C.MIXUP.CUTMIX_ALPHA = 1.0
# Probability of performing mixup or cutmix when either/both is enabled.
_C.MIXUP.PROB = 1.0
# Probability of switching to cutmix when both mixup and cutmix enabled.
_C.MIXUP.SWITCH_PROB = 0.5
# Label smoothing.
_C.MIXUP.LABEL_SMOOTH_VALUE = 0.1
# ---------------------------------------------------------------------------- #
# Testing options
# ---------------------------------------------------------------------------- #
_C.TEST = CfgNode()
# If True test the model, else skip the testing.
_C.TEST.ENABLE = True
# Dataset for testing.
_C.TEST.DATASET = "IC_dataset"
# Total mini-batch size
_C.TEST.BATCH_SIZE = 1
# Path to the checkpoint to load the initial weight.
_C.TEST.CHECKPOINT_FILE_PATH = ""
# If True, convert 3D conv weights to 2D.
_C.TEST.CHECKPOINT_SQUEEZE_TEMPORAL = True
# -----------------------------------------------------------------------------
# Model options
# -----------------------------------------------------------------------------
_C.MODEL = CfgNode()
# Model name
_C.MODEL.MODEL_NAME = "MViT"
# The number of classes to predict for the model.
_C.MODEL.NUM_CLASSES = 1
# Loss function.
_C.MODEL.LOSS_FUNC = "cross_entropy"
# Dropout rate before final projection in the backbone.
_C.MODEL.DROPOUT_RATE = 0.0
# Activation layer for the output head.
_C.MODEL.HEAD_ACT = "softmax"
# Activation checkpointing enabled or not to save GPU memory.
_C.MODEL.ACT_CHECKPOINT = False
# -----------------------------------------------------------------------------
# MViT options
# -----------------------------------------------------------------------------
_C.MVIT = CfgNode()
# Options include `conv`, `max`.
_C.MVIT.MODE = "conv"
# If True, perform pool before projection in attention.
_C.MVIT.POOL_FIRST = False
# If True, use cls embed in the network, otherwise don't use cls_embed in transformer.
_C.MVIT.CLS_EMBED_ON = False
# Kernel size for patchtification.
_C.MVIT.PATCH_KERNEL = [7, 7]
# Stride size for patchtification.
_C.MVIT.PATCH_STRIDE = [4, 4]
# Padding size for patchtification.
_C.MVIT.PATCH_PADDING = [3, 3]
# Base embedding dimension for the transformer.
_C.MVIT.EMBED_DIM = 96
# Base num of heads for the transformer.
_C.MVIT.NUM_HEADS = 1
# Dimension reduction ratio for the MLP layers.
_C.MVIT.MLP_RATIO = 4.0
# If use, use bias term in attention fc layers.
_C.MVIT.QKV_BIAS = True
# Drop path rate for the tranfomer.
_C.MVIT.DROPPATH_RATE = 0.1
# Depth of the transformer.
_C.MVIT.DEPTH = 16
# Dimension multiplication at layer i. If 2.0 is used, then the next block will increase
# the dimension by 2 times. Format: [depth_i: mul_dim_ratio]
_C.MVIT.DIM_MUL = []
# Head number multiplication at layer i. If 2.0 is used, then the next block will
# increase the number of heads by 2 times. Format: [depth_i: head_mul_ratio]
_C.MVIT.HEAD_MUL = []
# Stride size for the Pool KV at layer i.
# Format: [[i, stride_t_i, stride_h_i, stride_w_i], ...,]
_C.MVIT.POOL_KV_STRIDE = None
# Initial stride size for KV at layer 1. The stride size will be further reduced with
# the raio of MVIT.DIM_MUL. If will overwrite MVIT.POOL_KV_STRIDE if not None.
_C.MVIT.POOL_KV_STRIDE_ADAPTIVE = None
# Stride size for the Pool Q at layer i.
# Format: [[i, stride_t_i, stride_h_i, stride_w_i], ...,]
_C.MVIT.POOL_Q_STRIDE = []
# Kernel size for Q, K, V pooling.
_C.MVIT.POOL_KVQ_KERNEL = (3, 3)
# If True, perform no decay on positional embedding and cls embedding.
_C.MVIT.ZERO_DECAY_POS_CLS = False
# If True, use absolute positional embedding.
_C.MVIT.USE_ABS_POS = False
# If True, use relative positional embedding for spatial dimentions
_C.MVIT.REL_POS_SPATIAL = True
# If True, init rel with zero
_C.MVIT.REL_POS_ZERO_INIT = False
# If True, using Residual Pooling connection
_C.MVIT.RESIDUAL_POOLING = True
# Dim mul in qkv linear layers of attention block instead of MLP
_C.MVIT.DIM_MUL_IN_ATT = True
# -----------------------------------------------------------------------------
# Data options
# -----------------------------------------------------------------------------
_C.DATA = CfgNode()
# The path to the data directory.
_C.DATA.PATH_TO_DATA_DIR = ""
# If a imdb have been dumpped to a local file with the following format:
# `{"im_path": im_path, "class": cont_id}`
# then we can skip the construction of imdb and load it from the local file.
_C.DATA.PATH_TO_PRELOAD_IMDB = ""
# The mean value of pixels across the R G B channels.
_C.DATA.MEAN = [0.485, 0.456, 0.406]
# List of input frame channel dimensions.
# The std value of pixels across the R G B channels.
_C.DATA.STD = [0.229, 0.224, 0.225]
# The spatial crop size for training.
_C.DATA.TRAIN_CROP_SIZE = 224
# The spatial crop size for testing.
_C.DATA.TEST_CROP_SIZE = 224
# Crop ratio for for testing. Default is 224/256.
_C.DATA.VAL_CROP_RATIO = 0.875
# If combine train/val split as training for in21k
_C.DATA.IN22K_TRAINVAL = False
# If not None, use IN1k as val split when training in21k
_C.DATA.IN22k_VAL_IN1K = ""
# ---------------------------------------------------------------------------- #
# Optimizer options
# ---------------------------------------------------------------------------- #
_C.SOLVER = CfgNode()
# Base learning rate.
_C.SOLVER.BASE_LR = 0.00025
# Learning rate policy (see utils/lr_policy.py for options and examples).
_C.SOLVER.LR_POLICY = "cosine"
# Final learning rates for 'cosine' policy.
_C.SOLVER.COSINE_END_LR = 1e-6
# Step size for 'exp' and 'cos' policies (in epochs).
_C.SOLVER.STEP_SIZE = 1
# Steps for 'steps_' policies (in epochs).
_C.SOLVER.STEPS = []
# Learning rates for 'steps_' policies.
_C.SOLVER.LRS = []
# Maximal number of epochs.
_C.SOLVER.MAX_EPOCH = 400
# Momentum.
_C.SOLVER.MOMENTUM = 0.9
# Momentum dampening.
_C.SOLVER.DAMPENING = 0.0
# Nesterov momentum.
_C.SOLVER.NESTEROV = True
# L2 regularization.
_C.SOLVER.WEIGHT_DECAY = 0.05
# Start the warm up from SOLVER.BASE_LR * SOLVER.WARMUP_FACTOR.
_C.SOLVER.WARMUP_FACTOR = 0.1
# Gradually warm up the SOLVER.BASE_LR over this number of epochs.
_C.SOLVER.WARMUP_EPOCHS = 70.0
# The start learning rate of the warm up.
_C.SOLVER.WARMUP_START_LR = 1e-8
# Optimization method.
_C.SOLVER.OPTIMIZING_METHOD = "sgd"
# Base learning rate is linearly scaled with NUM_SHARDS.
_C.SOLVER.BASE_LR_SCALE_NUM_SHARDS = False
# If True, start from the peak cosine learning rate after warm up.
_C.SOLVER.COSINE_AFTER_WARMUP = True
# If True, perform no weight decay on parameter with one dimension (bias term, etc).
_C.SOLVER.ZERO_WD_1D_PARAM = True
# Clip gradient at this value before optimizer update
_C.SOLVER.CLIP_GRAD_VAL = None
# Clip gradient at this norm before optimizer update
_C.SOLVER.CLIP_GRAD_L2NORM = None
# The layer-wise decay of learning rate. Set to 1. to disable.
_C.SOLVER.LAYER_DECAY = 1.0
# ---------------------------------------------------------------------------- #
# Misc options
# ---------------------------------------------------------------------------- #
# Number of GPUs to use (applies to both training and testing).
_C.NUM_GPUS = 1
# Number of machine to use for the job.
_C.NUM_SHARDS = 1
# The index of the current machine.
_C.SHARD_ID = 0
# Output basedir.
_C.OUTPUT_DIR = "./tmp"
# Note that non-determinism may still be present due to non-deterministic
# operator implementations in GPU operator libraries.
_C.RNG_SEED = 10
# Log period in iters.
_C.LOG_PERIOD = 10
# If True, log the model info.
_C.LOG_MODEL_INFO = True
# Distributed backend.
_C.DIST_BACKEND = "nccl"
_C.local_rank = 0
_C.normal_json_path = './datasets/AD_json/hyperkvasir_normal.json'
_C.outlier_json_path = './datasets/AD_json/hyperkvasir_outlier.json'
_C.val_normal_json_path = './datasets/AD_json/elpv_normal.json'
_C.val_outlier_json_path = './datasets/AD_json/elpv_outlier.json'
_C.model = 'ViT-B-16'
_C.pretrained = None
_C.shot = 2
_C.image_size = 240
_C.few_shot_dir = "./visa"
# ---------------------------------------------------------------------------- #
# Common train/test data loader options
# ---------------------------------------------------------------------------- #
_C.DATA_LOADER = CfgNode()
_C.DATA_LOADER.data_path = "./data"
# Number of data loader workers per training process.
_C.DATA_LOADER.NUM_WORKERS = 2
# Load data to pinned host memory.
_C.DATA_LOADER.PIN_MEMORY = True
def assert_and_infer_cfg(cfg):
# TRAIN assertions.
assert cfg.NUM_GPUS == 0 or cfg.TRAIN.BATCH_SIZE % cfg.NUM_GPUS == 0
# TEST assertions.
assert cfg.NUM_GPUS == 0 or cfg.TEST.BATCH_SIZE % cfg.NUM_GPUS == 0
# Execute LR scaling by num_shards.
if cfg.SOLVER.BASE_LR_SCALE_NUM_SHARDS:
cfg.SOLVER.BASE_LR *= cfg.NUM_SHARDS
cfg.SOLVER.WARMUP_START_LR *= cfg.NUM_SHARDS
cfg.SOLVER.COSINE_END_LR *= cfg.NUM_SHARDS
# General assertions.
assert cfg.SHARD_ID < cfg.NUM_SHARDS
return cfg
def get_cfg():
"""
Get a copy of the default config.
"""
return _C.clone()