-
Notifications
You must be signed in to change notification settings - Fork 9
/
base_hifi.yaml
148 lines (112 loc) · 2.76 KB
/
base_hifi.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# preprocessing
base_config:
- configs/base.yaml
data_input_path: []
data_out_path: []
val_num: 1
pe: 'parselmouth' # 'parselmouth' or 'harvest'
f0_min: 65
f0_max: 1100
pc_aug: false # pc-nsf training method
pc_aug_prob: 0.5
pc_aug_key: 5
aug_min: 0.9
aug_max: 1.4
aug_num: 1
key_aug: false
key_aug_prob: 0.5
use_stftloss: false
loss_fft_sizes: [2048, 2048, 4096, 1024, 512, 256, 128,1024, 2048, 512]
loss_hop_sizes: [512, 240, 480, 100, 50, 25, 12,120, 240, 50]
loss_win_lengths: [2048, 1200, 2400, 480, 240, 120, 60,600, 1200, 240]
lab_aux_melloss: 45
lab_aux_stftloss: 2.5
raw_data_dir: []
binary_data_dir: null
binarization_args:
num_workers: 8
shuffle: true
DataIndexPath: data
valid_set_name: valid
train_set_name: train
volume_aug: true
volume_aug_prob: 0.5
mel_vmin: -6. #-6.
mel_vmax: 1.5
audio_sample_rate: 44100
audio_num_mel_bins: 128
hop_size: 512 # Hop size.
fft_size: 2048 # FFT size.
win_size: 2048 # FFT size.
fmin: 40
fmax: 16000
fmax_for_loss: null
crop_mel_frames: 20
# global constants
# neural networks
#model_cls: training.nsf_HiFigan_task.nsf_HiFigan
model_args:
mini_nsf: false
upsample_rates: [ 8, 8, 2, 2, 2 ]
upsample_kernel_sizes: [ 16,16, 4, 4, 4 ]
upsample_initial_channel: 512
resblock_kernel_sizes: [ 3,7,11 ]
resblock_dilation_sizes: [ [ 1,3,5 ], [ 1,3,5 ], [ 1,3,5 ] ]
discriminator_periods: [ 3, 5, 7, 11, 17, 23, 37 ]
resblock: "1"
# training
task_cls: training.nsf_HiFigan_task.nsf_HiFigan
discriminate_optimizer_args:
optimizer_cls: torch.optim.AdamW
lr: 0.0001
beta1: 0.8
beta2: 0.99
weight_decay: 0
generater_optimizer_args:
optimizer_cls: torch.optim.AdamW
lr: 0.0001
beta1: 0.8
beta2: 0.99
weight_decay: 0
lr_scheduler_args:
scheduler_cls: lr_scheduler.scheduler.WarmupLR
warmup_steps: 5000
min_lr: 0.00001
clip_grad_norm: null
#accumulate_grad_batches: 1
#sampler_frame_count_grid: 6
ds_workers: 4
dataloader_prefetch_factor: 2
batch_size: 3
num_valid_plots: 100
log_interval: 100
num_sanity_val_steps: 1 # steps of validation at the beginning
val_check_interval: 1000
num_ckpt_keep: 5
max_updates: 100000
permanent_ckpt_start: 200000
permanent_ckpt_interval: 40000
###########
# pytorch lightning
# Read https://lightning.ai/docs/pytorch/stable/common/trainer.html#trainer-class-api for possible values
###########
pl_trainer_accelerator: 'auto'
pl_trainer_devices: 'auto'
pl_trainer_precision: '32-true'
#pl_trainer_precision: 'bf16'
pl_trainer_num_nodes: 1
pl_trainer_strategy:
name: auto
process_group_backend: nccl
find_unused_parameters: true
nccl_p2p: true
seed: 114514
###########
# finetune
###########
finetune_enabled: false
finetune_ckpt_path: null
finetune_ignored_params: []
finetune_strict_shapes: true
freezing_enabled: false
frozen_params: []