forked from mindspore-lab/mindcv
-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.py
293 lines (269 loc) · 17.7 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
import argparse
import logging
import os
import yaml
logger = logging.getLogger(__name__)
def str2bool(v):
if isinstance(v, bool):
return v
if v.lower() in ("yes", "true", "1"):
return True
elif v.lower() in ("no", "false", "0"):
return False
else:
raise argparse.ArgumentTypeError("Boolean value expected.")
# fmt: off
def create_parser():
# The first arg parser parses out only the --config argument, this argument is used to
# load a yaml file containing key-values that override the defaults for the main parser below
parser_config = argparse.ArgumentParser(description='Training Config', add_help=False)
parser_config.add_argument('-c', '--config', type=str, default='',
help='YAML config file specifying default arguments (default="")')
# The main parser. It inherits the --config argument for better help information.
parser = argparse.ArgumentParser(description='ImageNet Training', parents=[parser_config])
# System parameters
group = parser.add_argument_group('System parameters')
group.add_argument('--mode', type=int, default=0,
help='Running in GRAPH_MODE(0) or PYNATIVE_MODE(1) (default=0)')
group.add_argument('--distribute', type=str2bool, nargs='?', const=True, default=False,
help='Run distribute (default=False)')
group.add_argument('--val_while_train', type=str2bool, nargs='?', const=True, default=False,
help='Verify accuracy while training (default=False)')
group.add_argument('--val_interval', type=int, default=1,
help='Interval for validation while training. Unit: epoch (default=1)')
group.add_argument('--log_interval', type=int, default=100,
help='Interval for print training log. Unit: step (default=100)')
group.add_argument('--seed', type=int, default=42,
help='Seed value for determining randomness in numpy, random, and mindspore (default=42)')
# Dataset parameters
group = parser.add_argument_group('Dataset parameters')
group.add_argument('--dataset', type=str, default='imagenet',
help='Type of dataset (default="imagenet")')
group.add_argument('--data_dir', type=str, default='./',
help='Path to dataset (default="./")')
group.add_argument('--train_split', type=str, default='train',
help='Dataset train split name (default="train")')
group.add_argument('--val_split', type=str, default='val',
help='Dataset validation split name (default="val")')
group.add_argument('--dataset_download', type=str2bool, nargs='?', const=True, default=False,
help='If downloading the dataset, only support Mnist, Cifar10 and Cifar100 (default=False)')
group.add_argument('--num_parallel_workers', type=int, default=8,
help='Number of parallel workers (default=8)')
group.add_argument('--shuffle', type=str2bool, nargs='?', const=True, default=True,
help='Whether or not to perform shuffle on the dataset (default=True)')
group.add_argument('--num_samples', type=int, default=None,
help='Number of elements to sample. None means sample all elements (default=None)')
group.add_argument('--batch_size', type=int, default=128,
help='Number of batch size (default=128)')
group.add_argument('--drop_remainder', type=str2bool, nargs='?', const=True, default=True,
help='Determines whether or not to drop the last block whose data '
'row number is less than batch size (default=True)')
# Augmentation parameters
group = parser.add_argument_group('Augmentation parameters')
group.add_argument('--image_resize', type=int, default=224,
help='Crop the size of the image (default=224)')
group.add_argument('--scale', type=tuple, default=(0.08, 1.0),
help='Random resize scale (default=(0.08, 1.0))')
group.add_argument('--ratio', type=tuple, default=(0.75, 1.333),
help='Random resize aspect ratio (default=(0.75, 1.333))')
group.add_argument('--hflip', type=float, default=0.5,
help='Horizontal flip training aug probability (default=0.5)')
group.add_argument('--vflip', type=float, default=0.0,
help='Vertical flip training aug probability (default=0.0)')
group.add_argument('--color_jitter', type=float, default=0.4,
help='Color jitter factor (default=0.4)')
group.add_argument('--interpolation', type=str, default='bilinear',
help='Image interpolation mode for resize operator(default="bilinear")')
group.add_argument('--auto_augment', type=str, default=None,
help='AutoAugment policy. "randaug" for RandAugment, "autoaug" for original AutoAugment, '
'"autoaugr" for AutoAugment with increasing posterize. '
'If apply, recommend for imagenet: randaug-m7-mstd0.5 (default=None).'
'Example: "randaug-m10-n2-w0-mstd0.5-mmax10-inc0", "autoaug-mstd0.5" or autoaugr-mstd0.5.')
group.add_argument('--re_prob', type=float, default=0.0,
help='Probability of performing erasing (default=0.0)')
group.add_argument('--re_scale', type=tuple, default=(0.02, 0.33),
help='Range of area scale of the erased area (default=(0.02, 0.33))')
group.add_argument('--re_ratio', type=tuple, default=(0.3, 3.3),
help='Range of aspect ratio of the erased area (default=(0.3, 3.3))')
group.add_argument('--re_value', default=0,
help='Pixel value used to pad the erased area (default=0)')
group.add_argument('--re_max_attempts', type=int, default=10,
help='The maximum number of attempts to propose a valid erased area, '
'beyond which the original image will be returned (default=10)')
group.add_argument('--mean', type=list, default=[0.485 * 255, 0.456 * 255, 0.406 * 255],
help='List or tuple of mean values for each channel, '
'with respect to channel order (default=[0.485 * 255, 0.456 * 255, 0.406 * 255])')
group.add_argument('--std', type=list, default=[0.229 * 255, 0.224 * 255, 0.225 * 255],
help='List or tuple of mean values for each channel, '
'with respect to channel order (default=[0.229 * 255, 0.224 * 255, 0.225 * 255])')
group.add_argument('--crop_pct', type=float, default=0.875,
help='Input image center crop percent (default=0.875)')
group.add_argument('--mixup', type=float, default=0.0,
help='Hyperparameter of beta distribution of mixup. '
'Recommended value is 0.2 for ImageNet (default=0.0)')
group.add_argument('--cutmix', type=float, default=0.0,
help='Hyperparameter of beta distribution of cutmix (default=0.0)')
group.add_argument('--cutmix_prob', type=float, default=1.0,
help='Probability of applying cutmix and/or mixup (default=1.0)')
group.add_argument('--aug_repeats', type=int, default=0,
help='Number of dataset repetition for repeated augmentation. '
'If 0 or 1, repeated augmentation is disabled. '
'Otherwise, repeated augmentation is enabled and the common choice is 3 (default=0)')
# Model parameters
group = parser.add_argument_group('Model parameters')
group.add_argument('--model', type=str, default='mobilenet_v2_035_224',
help='Name of model')
group.add_argument('--num_classes', type=int, default=None,
help='Number of label classes. If None, read from standard datasets (default=None)')
group.add_argument('--in_channels', type=int, default=3,
help='Input channels (default=3)')
group.add_argument('--drop_rate', type=float, default=None,
help='Drop rate (default=None)')
group.add_argument('--drop_path_rate', type=float, default=None,
help='Drop path rate (default=None)')
group.add_argument('--pretrained', type=str2bool, nargs='?', const=True, default=False,
help='Load pretrained model (default=False)')
group.add_argument('--ckpt_path', type=str, default='',
help='Initialize model from this checkpoint. '
'If resume training, specify the checkpoint path (default="")')
group.add_argument('--resume_opt', type=str2bool, nargs='?', const=True, default=False,
help='Resume optimizer state including LR (default=False)')
group.add_argument('--keep_checkpoint_max', type=int, default=10,
help='Max number of checkpoint files (default=10)')
group.add_argument('--ckpt_save_dir', type=str, default="./ckpt",
help='Path of checkpoint (default="./ckpt")')
group.add_argument('--ckpt_save_interval', type=int, default=1,
help='Checkpoint saving interval. Unit: epoch (default=1)')
group.add_argument('--ckpt_save_policy', type=str, default='latest_k',
help='Checkpoint saving strategy. The optional values is '
'None, "top_k" or "latest_k" (default="latest_k")')
group.add_argument('--epoch_size', type=int, default=90,
help='Train epoch size (default=90)')
group.add_argument('--dataset_sink_mode', type=str2bool, nargs='?', const=True, default=True,
help='The dataset sink mode (default=True)')
group.add_argument('--ema', type=str2bool, nargs='?', const=True, default=False,
help='Training with ema (default=False)')
group.add_argument('--ema_decay', type=float, default=0.9999,
help='EMA decay (default=0.9999)')
group.add_argument('--clip_grad', type=str2bool, nargs='?', const=True, default=False,
help='Whether use clip grad (default=False)')
group.add_argument('--clip_value', type=float, default=15.0,
help='Clip value (default=15.0)')
# Optimize parameters
group = parser.add_argument_group('Optimizer parameters')
group.add_argument('--opt', type=str, default='adam',
choices=['sgd', 'momentum', 'adam', 'adamw', 'lion', 'rmsprop', 'adagrad', 'lamb', "nadam"],
help='Type of optimizer (default="adam")')
group.add_argument('--momentum', type=float, default=0.9,
help='Hyperparameter of type float, means momentum for the moving average. '
'It must be at least 0.0 (default=0.9)')
group.add_argument('--weight_decay', type=float, default=1e-6,
help='Weight decay (default=1e-6)')
group.add_argument('--use_nesterov', type=str2bool, nargs='?', const=True, default=False,
help='Enables the Nesterov momentum (default=False)')
group.add_argument('--filter_bias_and_bn', type=str2bool, nargs='?', const=True, default=True,
help='Filter Bias and BatchNorm (default=True)')
group.add_argument('--eps', type=float, default=1e-10,
help='Term Added to the Denominator to Improve Numerical Stability (default=1e-10)')
# Scheduler parameters
group = parser.add_argument_group('Scheduler parameters')
group.add_argument('--scheduler', type=str, default='cosine_decay',
choices=['constant', 'cosine_decay', 'exponential_decay', 'step_decay', 'multi_step_decay'],
help='Type of scheduler (default="cosine_decay")')
group.add_argument('--lr', type=float, default=0.001,
help='Learning rate (default=0.001)')
group.add_argument('--min_lr', type=float, default=1e-6,
help='The minimum value of learning rate if scheduler supports (default=1e-6)')
group.add_argument('--warmup_epochs', type=int, default=3,
help='Warmup epochs (default=3)')
group.add_argument('--warmup_factor', type=float, default=0.0,
help='Warmup factor of learning rate (default=0.0)')
group.add_argument('--decay_epochs', type=int, default=100,
help='Decay epochs (default=100)')
group.add_argument('--decay_rate', type=float, default=0.9,
help='LR decay rate if scheduler supports (default=0.9)')
group.add_argument('--multi_step_decay_milestones', type=list, default=[30, 60, 90],
help='List of epoch milestones for lr decay, which is ONLY effective for '
'the multi_step_decay scheduler. LR will be decay by decay_rate at the milestone epoch.')
group.add_argument('--lr_epoch_stair', type=str2bool, nargs='?', const=True, default=False,
help='If True, LR will be updated in the first step of each epoch and '
'LRs are the same in the remaining steps in the epoch. Otherwise, '
'learning rate is updated every step dynamically (default=False)')
group.add_argument('--num_cycles', type=int, default=1,
help='Num of cycles for cosine decay (default=1)')
group.add_argument('--cycle_decay', type=float, default=1.0,
help='Decay rate of lr max in each cosine cycle (default=1.0)')
# Loss parameters
group = parser.add_argument_group('Loss parameters')
group.add_argument('--loss', type=str, default='CE', choices=['BCE', 'CE'],
help='Type of loss, BCE (BinaryCrossEntropy) or CE (CrossEntropy) (default="CE")')
group.add_argument('--label_smoothing', type=float, default=0.0,
help='Use label smoothing (default=0.0)')
group.add_argument('--aux_factor', type=float, default=0.0,
help='Aux loss factor (default=0.0)')
group.add_argument('--reduction', type=str, default='mean',
help='Type of reduction to be applied to loss (default="mean")')
# AMP parameters
group = parser.add_argument_group('Auto mixing precision parameters')
group.add_argument('--amp_level', type=str, default='O0',
help='Amp level - Auto Mixed Precision level for saving memory and acceleration. '
'Choice: O0 - all FP32, O1 - only cast ops in white-list to FP16, '
'O2 - cast all ops except for blacklist to FP16, '
'O3 - cast all ops to FP16. (default="O0").')
group.add_argument('--loss_scale_type', type=str, default='fixed',
choices=['fixed', 'dynamic', 'auto'],
help='The type of loss scale (default="fixed")')
group.add_argument('--loss_scale', type=float, default=1.0,
help='Loss scale (default=1.0)')
group.add_argument('--drop_overflow_update', type=bool, default=False,
help='Whether to execute optimizer if there is an overflow (default=False)')
# modelarts
group = parser.add_argument_group('modelarts')
group.add_argument('--enable_modelarts', type=str2bool, nargs='?', const=True, default=False,
help='Run on modelarts platform (default=False)')
group.add_argument('--device_target', type=str, default='Ascend')
group.add_argument('--multi_data_url', type=str, default='/cache/data/',
help='path to multi dataset')
group.add_argument('--data_url', type=str, default='/cache/data/',
help='path to dataset')
group.add_argument('--ckpt_url', type=str, default='/cache/output/',
help='pre_train_model path in obs')
group.add_argument('--train_url', type=str, default='/cache/output/',
help='model folder to save/load')
return parser_config, parser
# fmt: on
def _check_cfgs_in_parser(cfgs: dict, parser: argparse.ArgumentParser):
actions_dest = [action.dest for action in parser._actions]
defaults_key = parser._defaults.keys()
for k in cfgs.keys():
if k not in actions_dest and k not in defaults_key:
raise KeyError(f"{k} does not exist in ArgumentParser!")
def parse_args(args=None):
parser_config, parser = create_parser()
# Do we have a config file to parse?
args_config, remaining = parser_config.parse_known_args(args)
if args_config.config:
with open(args_config.config, "r") as f:
cfg = yaml.safe_load(f)
_check_cfgs_in_parser(cfg, parser)
parser.set_defaults(**cfg)
parser.set_defaults(config=args_config.config)
# The main arg parser parses the rest of the args, the usual
# defaults will have been overridden if config file specified.
args = parser.parse_args(remaining)
return args
def save_args(args: argparse.Namespace, filepath: str, rank: int = 0) -> None:
"""If in master process, save ``args`` to a YAML file. Otherwise, do nothing.
Args:
args (Namespace): The parsed arguments to be saved.
filepath (str): A filepath ends with ``.yaml``.
rank (int): Process rank in the distributed training. Defaults to 0.
"""
assert isinstance(args, argparse.Namespace)
assert filepath.endswith(".yaml")
if rank != 0:
return
os.makedirs(os.path.dirname(os.path.abspath(filepath)), exist_ok=True)
with open(filepath, "w") as f:
yaml.safe_dump(args.__dict__, f)
logger.info(f"Args is saved to {filepath}.")