Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[tts] Update finetune #2430

Merged
merged 10 commits into from
Sep 23, 2022
52 changes: 42 additions & 10 deletions demos/speech_web/speech_server/src/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,55 @@ def __init__(self):
self.finetune_config = os.path.join("conf/tts3_finetune.yaml")

def finetune(self, input_dir, exp_dir='temp', epoch=100):
"""
use cmd follow examples/other/tts_finetune/tts3/run.sh
"""
newdir_name = "newdir"
new_dir = os.path.join(input_dir, newdir_name)
mfa_dir = os.path.join(exp_dir, 'mfa_result')
dump_dir = os.path.join(exp_dir, 'dump')
output_dir = os.path.join(exp_dir, 'exp')
lang = "zh"
ngpu = 1

cmd = f"""
python3 {self.PYTHONPATH}/finetune.py \
--input_dir={input_dir} \
--pretrained_model_dir={self.pretrained_model_dir} \
--mfa_dir={mfa_dir} \
--dump_dir={dump_dir} \
--output_dir={output_dir} \
--lang={lang} \
--ngpu={ngpu} \
--epoch={epoch} \
--finetune_config={self.finetune_config}
# check oov
python3 {self.PYTHONPATH}/local/check_oov.py \
--input_dir={input_dir} \
--pretrained_model_dir={self.pretrained_model_dir} \
--newdir_name={newdir_name} \
--lang={lang}

# get mfa result
python3 {self.PYTHONPATH}/local/get_mfa_result.py \
--input_dir={new_dir} \
--mfa_dir={mfa_dir} \
--lang={lang}

# generate durations.txt
python3 {self.PYTHONPATH}/local/generate_duration.py \
--mfa_dir={mfa_dir}

# extract feature
python3 {self.PYTHONPATH}/local/extract_feature.py \
--duration_file="./durations.txt" \
--input_dir={new_dir} \
--dump_dir={dump_dir} \
--pretrained_model_dir={self.pretrained_model_dir}

# create finetune env
python3 {self.PYTHONPATH}/local/prepare_env.py \
--pretrained_model_dir={self.pretrained_model_dir} \
--output_dir={output_dir}

# finetune
python3 {self.PYTHONPATH}/local/finetune.py \
--pretrained_model_dir={self.pretrained_model_dir} \
--dump_dir={dump_dir} \
--output_dir={output_dir} \
--ngpu={ngpu} \
--epoch=100 \
--finetune_config={self.finetune_config}
"""

print(cmd)
Expand Down
204 changes: 142 additions & 62 deletions examples/other/tts_finetune/tts3/README.md

Large diffs are not rendered by default.

216 changes: 216 additions & 0 deletions examples/other/tts_finetune/tts3/conf/fastspeech2_layers.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
epoch
iteration
main_params
main_optimizer
spk_embedding_table.weight
encoder.embed.0.weight
encoder.embed.1.alpha
encoder.encoders.0.self_attn.linear_q.weight
encoder.encoders.0.self_attn.linear_q.bias
encoder.encoders.0.self_attn.linear_k.weight
encoder.encoders.0.self_attn.linear_k.bias
encoder.encoders.0.self_attn.linear_v.weight
encoder.encoders.0.self_attn.linear_v.bias
encoder.encoders.0.self_attn.linear_out.weight
encoder.encoders.0.self_attn.linear_out.bias
encoder.encoders.0.feed_forward.w_1.weight
encoder.encoders.0.feed_forward.w_1.bias
encoder.encoders.0.feed_forward.w_2.weight
encoder.encoders.0.feed_forward.w_2.bias
encoder.encoders.0.norm1.weight
encoder.encoders.0.norm1.bias
encoder.encoders.0.norm2.weight
encoder.encoders.0.norm2.bias
encoder.encoders.1.self_attn.linear_q.weight
encoder.encoders.1.self_attn.linear_q.bias
encoder.encoders.1.self_attn.linear_k.weight
encoder.encoders.1.self_attn.linear_k.bias
encoder.encoders.1.self_attn.linear_v.weight
encoder.encoders.1.self_attn.linear_v.bias
encoder.encoders.1.self_attn.linear_out.weight
encoder.encoders.1.self_attn.linear_out.bias
encoder.encoders.1.feed_forward.w_1.weight
encoder.encoders.1.feed_forward.w_1.bias
encoder.encoders.1.feed_forward.w_2.weight
encoder.encoders.1.feed_forward.w_2.bias
encoder.encoders.1.norm1.weight
encoder.encoders.1.norm1.bias
encoder.encoders.1.norm2.weight
encoder.encoders.1.norm2.bias
encoder.encoders.2.self_attn.linear_q.weight
encoder.encoders.2.self_attn.linear_q.bias
encoder.encoders.2.self_attn.linear_k.weight
encoder.encoders.2.self_attn.linear_k.bias
encoder.encoders.2.self_attn.linear_v.weight
encoder.encoders.2.self_attn.linear_v.bias
encoder.encoders.2.self_attn.linear_out.weight
encoder.encoders.2.self_attn.linear_out.bias
encoder.encoders.2.feed_forward.w_1.weight
encoder.encoders.2.feed_forward.w_1.bias
encoder.encoders.2.feed_forward.w_2.weight
encoder.encoders.2.feed_forward.w_2.bias
encoder.encoders.2.norm1.weight
encoder.encoders.2.norm1.bias
encoder.encoders.2.norm2.weight
encoder.encoders.2.norm2.bias
encoder.encoders.3.self_attn.linear_q.weight
encoder.encoders.3.self_attn.linear_q.bias
encoder.encoders.3.self_attn.linear_k.weight
encoder.encoders.3.self_attn.linear_k.bias
encoder.encoders.3.self_attn.linear_v.weight
encoder.encoders.3.self_attn.linear_v.bias
encoder.encoders.3.self_attn.linear_out.weight
encoder.encoders.3.self_attn.linear_out.bias
encoder.encoders.3.feed_forward.w_1.weight
encoder.encoders.3.feed_forward.w_1.bias
encoder.encoders.3.feed_forward.w_2.weight
encoder.encoders.3.feed_forward.w_2.bias
encoder.encoders.3.norm1.weight
encoder.encoders.3.norm1.bias
encoder.encoders.3.norm2.weight
encoder.encoders.3.norm2.bias
encoder.after_norm.weight
encoder.after_norm.bias
spk_projection.weight
spk_projection.bias
duration_predictor.conv.0.0.weight
duration_predictor.conv.0.0.bias
duration_predictor.conv.0.2.weight
duration_predictor.conv.0.2.bias
duration_predictor.conv.1.0.weight
duration_predictor.conv.1.0.bias
duration_predictor.conv.1.2.weight
duration_predictor.conv.1.2.bias
duration_predictor.linear.weight
duration_predictor.linear.bias
pitch_predictor.conv.0.0.weight
pitch_predictor.conv.0.0.bias
pitch_predictor.conv.0.2.weight
pitch_predictor.conv.0.2.bias
pitch_predictor.conv.1.0.weight
pitch_predictor.conv.1.0.bias
pitch_predictor.conv.1.2.weight
pitch_predictor.conv.1.2.bias
pitch_predictor.conv.2.0.weight
pitch_predictor.conv.2.0.bias
pitch_predictor.conv.2.2.weight
pitch_predictor.conv.2.2.bias
pitch_predictor.conv.3.0.weight
pitch_predictor.conv.3.0.bias
pitch_predictor.conv.3.2.weight
pitch_predictor.conv.3.2.bias
pitch_predictor.conv.4.0.weight
pitch_predictor.conv.4.0.bias
pitch_predictor.conv.4.2.weight
pitch_predictor.conv.4.2.bias
pitch_predictor.linear.weight
pitch_predictor.linear.bias
pitch_embed.0.weight
pitch_embed.0.bias
energy_predictor.conv.0.0.weight
energy_predictor.conv.0.0.bias
energy_predictor.conv.0.2.weight
energy_predictor.conv.0.2.bias
energy_predictor.conv.1.0.weight
energy_predictor.conv.1.0.bias
energy_predictor.conv.1.2.weight
energy_predictor.conv.1.2.bias
energy_predictor.linear.weight
energy_predictor.linear.bias
energy_embed.0.weight
energy_embed.0.bias
decoder.embed.0.alpha
decoder.encoders.0.self_attn.linear_q.weight
decoder.encoders.0.self_attn.linear_q.bias
decoder.encoders.0.self_attn.linear_k.weight
decoder.encoders.0.self_attn.linear_k.bias
decoder.encoders.0.self_attn.linear_v.weight
decoder.encoders.0.self_attn.linear_v.bias
decoder.encoders.0.self_attn.linear_out.weight
decoder.encoders.0.self_attn.linear_out.bias
decoder.encoders.0.feed_forward.w_1.weight
decoder.encoders.0.feed_forward.w_1.bias
decoder.encoders.0.feed_forward.w_2.weight
decoder.encoders.0.feed_forward.w_2.bias
decoder.encoders.0.norm1.weight
decoder.encoders.0.norm1.bias
decoder.encoders.0.norm2.weight
decoder.encoders.0.norm2.bias
decoder.encoders.1.self_attn.linear_q.weight
decoder.encoders.1.self_attn.linear_q.bias
decoder.encoders.1.self_attn.linear_k.weight
decoder.encoders.1.self_attn.linear_k.bias
decoder.encoders.1.self_attn.linear_v.weight
decoder.encoders.1.self_attn.linear_v.bias
decoder.encoders.1.self_attn.linear_out.weight
decoder.encoders.1.self_attn.linear_out.bias
decoder.encoders.1.feed_forward.w_1.weight
decoder.encoders.1.feed_forward.w_1.bias
decoder.encoders.1.feed_forward.w_2.weight
decoder.encoders.1.feed_forward.w_2.bias
decoder.encoders.1.norm1.weight
decoder.encoders.1.norm1.bias
decoder.encoders.1.norm2.weight
decoder.encoders.1.norm2.bias
decoder.encoders.2.self_attn.linear_q.weight
decoder.encoders.2.self_attn.linear_q.bias
decoder.encoders.2.self_attn.linear_k.weight
decoder.encoders.2.self_attn.linear_k.bias
decoder.encoders.2.self_attn.linear_v.weight
decoder.encoders.2.self_attn.linear_v.bias
decoder.encoders.2.self_attn.linear_out.weight
decoder.encoders.2.self_attn.linear_out.bias
decoder.encoders.2.feed_forward.w_1.weight
decoder.encoders.2.feed_forward.w_1.bias
decoder.encoders.2.feed_forward.w_2.weight
decoder.encoders.2.feed_forward.w_2.bias
decoder.encoders.2.norm1.weight
decoder.encoders.2.norm1.bias
decoder.encoders.2.norm2.weight
decoder.encoders.2.norm2.bias
decoder.encoders.3.self_attn.linear_q.weight
decoder.encoders.3.self_attn.linear_q.bias
decoder.encoders.3.self_attn.linear_k.weight
decoder.encoders.3.self_attn.linear_k.bias
decoder.encoders.3.self_attn.linear_v.weight
decoder.encoders.3.self_attn.linear_v.bias
decoder.encoders.3.self_attn.linear_out.weight
decoder.encoders.3.self_attn.linear_out.bias
decoder.encoders.3.feed_forward.w_1.weight
decoder.encoders.3.feed_forward.w_1.bias
decoder.encoders.3.feed_forward.w_2.weight
decoder.encoders.3.feed_forward.w_2.bias
decoder.encoders.3.norm1.weight
decoder.encoders.3.norm1.bias
decoder.encoders.3.norm2.weight
decoder.encoders.3.norm2.bias
decoder.after_norm.weight
decoder.after_norm.bias
feat_out.weight
feat_out.bias
postnet.postnet.0.0.weight
postnet.postnet.0.1.weight
postnet.postnet.0.1.bias
postnet.postnet.0.1._mean
postnet.postnet.0.1._variance
postnet.postnet.1.0.weight
postnet.postnet.1.1.weight
postnet.postnet.1.1.bias
postnet.postnet.1.1._mean
postnet.postnet.1.1._variance
postnet.postnet.2.0.weight
postnet.postnet.2.1.weight
postnet.postnet.2.1.bias
postnet.postnet.2.1._mean
postnet.postnet.2.1._variance
postnet.postnet.3.0.weight
postnet.postnet.3.1.weight
postnet.postnet.3.1.bias
postnet.postnet.3.1._mean
postnet.postnet.3.1._variance
postnet.postnet.4.0.weight
postnet.postnet.4.1.weight
postnet.postnet.4.1.bias
postnet.postnet.4.1._mean
postnet.postnet.4.1._variance

Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,6 @@ num_snapshots: -1

# frozen_layers should be a list
# if you don't need to freeze, set frozen_layers to []
# fastspeech2 layers can be found on conf/fastspeech2_layers.txt
# example: frozen_layers: ["encoder", "duration_predictor"]
frozen_layers: ["encoder"]
Loading