-
Notifications
You must be signed in to change notification settings - Fork 9
/
HiFivae.yaml
145 lines (113 loc) · 2.86 KB
/
HiFivae.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# preprocessing
base_config:
- configs/base.yaml
aug_min: 0.8
aug_max: 1.8
aug_num: 1
key_aug: true
key_aug_prob: 0.6
#lab_aux_loss: 45
lab_aux_loss: 2.5
lab_kl_loss: 0.02
RSSloss_stftmax: 2048
RSSloss_stftmin: 128
RSSloss_stftnum: 12
lab_wav_loss: 0
raw_data_dir: []
binary_data_dir: null
binarization_args:
num_workers: 8
shuffle: true
DataIndexPath: data
valid_set_name: valid
train_set_name: train
volume_aug: True
volume_aug_prob: 0.7
mel_vmin: -6. #-6.
mel_vmax: 1.5
aux_step: 16000
audio_sample_rate: 44100
#audio_num_mel_bins: 128
audio_num_mel_bins: 192
hop_size: 512 # Hop size.
fft_size: 2048 # FFT size.
win_size: 2048 # FFT size.
fmin: 40
fmax: 16000
fmax_for_loss: null
crop_mel_frames: 22
# global constants
# neural networks
loss_fft_sizes: [2048, 2048, 4096, 1024, 512, 256, 128,1024, 2048, 512]
loss_hop_sizes: [512, 240, 480, 100, 50, 25, 12,120, 240, 50]
loss_win_lengths: [2048, 1200, 2400, 480, 240, 120, 60,600, 1200, 240]
# fft_sizes=[1024, 2048, 512],
# hop_sizes=[120, 240, 50],
# win_lengths=[600, 1200, 240],
#model_cls: training.nsf_HiFigan_task.nsf_HiFigan
model_args:
upsample_rates: [ 8, 8, 2, 2, 2 ]
upsample_kernel_sizes: [ 16,16, 4, 4, 4 ]
upsample_initial_channel: 512
resblock_kernel_sizes: [ 3,7,11 ]
resblock_dilation_sizes: [ [ 1,3,5 ], [ 1,3,5 ], [ 1,3,5 ] ]
discriminator_periods: [ 3, 5, 7, 11, 17, 23, 37 ]
# discriminator_periods: [ 3, 5, 7,11, 17 ]
resblock: "1"
# training
task_cls: training.HiFivae_task.HiFivae_task
discriminate_optimizer_args:
optimizer_cls: torch.optim.AdamW
lr: 0.0001
beta1: 0.8
beta2: 0.99
weight_decay: 0
generater_optimizer_args:
optimizer_cls: torch.optim.AdamW
lr: 0.0001
beta1: 0.8
beta2: 0.99
weight_decay: 0
lr_scheduler_args:
scheduler_cls: lr_scheduler.scheduler.WarmupLR
warmup_steps: 5000
min_lr: 0.00001
clip_grad_norm: null
#accumulate_grad_batches: 1
#sampler_frame_count_grid: 6
ds_workers: 4
dataloader_prefetch_factor: 2
batch_size: 6
use_rss_loss: true
num_valid_plots: 100
log_interval: 100
num_sanity_val_steps: 1 # steps of validation at the beginning
val_check_interval: 4000
num_ckpt_keep: 5
max_updates: 5000000
permanent_ckpt_start: 200000
permanent_ckpt_interval: 40000
###########
# pytorch lightning
# Read https://lightning.ai/docs/pytorch/stable/common/trainer.html#trainer-class-api for possible values
###########
pl_trainer_accelerator: 'auto'
pl_trainer_devices: 'auto'
pl_trainer_precision: '32-true'
#pl_trainer_precision: 'bf16'
pl_trainer_num_nodes: 1
pl_trainer_strategy:
name: auto
process_group_backend: nccl
find_unused_parameters: false
nccl_p2p: true
seed: 1234
###########
# finetune
###########
finetune_enabled: false
finetune_ckpt_path: null
finetune_ignored_params: []
finetune_strict_shapes: true
freezing_enabled: false
frozen_params: []