Skip to content

Commit

Permalink
student
Browse files Browse the repository at this point in the history
  • Loading branch information
MaxMax2016 committed Feb 27, 2023
1 parent e3e1a4d commit baf379f
Show file tree
Hide file tree
Showing 7 changed files with 416 additions and 24 deletions.
18 changes: 16 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ put prosody_model.pt To ./bert/prosody_model.pt

put vits_bert_model.pth To ./vits_bert_model.pth

> python vits_infer.py
> python vits_infer.py --config ./configs/bert_vits.json --model vits_bert_model.pth
./vits_infer_out have the waves infered, listen !!!

Expand All @@ -54,7 +54,21 @@ put 000001-010000.txt to ./data/000001-010000.txt
![bert_lose](https://user-images.githubusercontent.com/16432329/220883346-c382bea2-1d2f-4a16-b797-2f9e2d2fb639.png)

### Model compression based on knowledge distillation
Coming soon~~~
Student model has 3× speed of teacher model.

To train:

> python train.py -c configs/bert_vits_student.json -m bert_vits_student
To infer, pretrained student model link:https://drive.google.com/file/d/1hTLWYEKH4GV9mQltrMyr3k2UKUo4chdp/view?usp=sharing

Also get studet model at release page.

> python vits_infer.py --config ./configs/bert_vits_student.json --model vits_bert_student.pth
You can use vits_istft as a student model too.

https://github.com/PlayVoice/vits_chinese/tree/vits_istft

### Another data Link
https://github.com/PlayVoice/HuaYan_TTS
Expand Down
6 changes: 4 additions & 2 deletions configs/bert_vits.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
{
"train": {
"train_class": "models.SynthesizerTrn",
"eval_class": "models.SynthesizerEval",
"log_interval": 100,
"eval_interval": 10000,
"seed": 1234,
Expand All @@ -17,8 +19,8 @@
"c_kl": 1.0
},
"data": {
"training_files":"filelists/train.txt",
"validation_files":"filelists/valid.txt",
"training_files": "filelists/train.txt",
"validation_files": "filelists/valid.txt",
"max_wav_value": 32768.0,
"sampling_rate": 16000,
"filter_length": 1024,
Expand Down
53 changes: 53 additions & 0 deletions configs/bert_vits_student.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{
"train": {
"train_class": "models.SyntStudentTrn",
"eval_class": "models.SynthesizerEval",
"teacher": "./vits_bert_model.pth",
"log_interval": 100,
"eval_interval": 10000,
"seed": 1234,
"epochs": 20000,
"learning_rate": 1e-4,
"betas": [0.8, 0.99],
"eps": 1e-9,
"batch_size": 8,
"fp16_run": false,
"lr_decay": 0.999875,
"segment_size": 12800,
"init_lr_ratio": 1,
"warmup_epochs": 0,
"c_mel": 45,
"c_kl": 1.0
},
"data": {
"training_files":"filelists/train.txt",
"validation_files":"filelists/valid.txt",
"max_wav_value": 32768.0,
"sampling_rate": 16000,
"filter_length": 1024,
"hop_length": 256,
"win_length": 1024,
"n_mel_channels": 80,
"mel_fmin": 0.0,
"mel_fmax": null,
"add_blank": false,
"n_speakers": 0
},
"model": {
"inter_channels": 192,
"hidden_channels": 192,
"filter_channels": 512,
"n_heads": 2,
"n_layers": 5,
"kernel_size": 3,
"p_dropout": 0.1,
"resblock": "1",
"resblock_kernel_sizes": [3,7,11],
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
"upsample_rates": [8,8,2,2],
"upsample_initial_channel": 256,
"upsample_kernel_sizes": [16,16,4,4],
"n_layers_q": 3,
"use_spectral_norm": false
}
}
Loading

0 comments on commit baf379f

Please sign in to comment.