diff --git a/paddlespeech/t2s/exps/fastspeech2/preprocess.py b/paddlespeech/t2s/exps/fastspeech2/preprocess.py index 5bda75451b0..db1842b2e89 100644 --- a/paddlespeech/t2s/exps/fastspeech2/preprocess.py +++ b/paddlespeech/t2s/exps/fastspeech2/preprocess.py @@ -86,6 +86,9 @@ def process_sentence(config: Dict[str, Any], logmel = mel_extractor.get_log_mel_fbank(wav) # change duration according to mel_length compare_duration_and_mel_length(sentences, utt_id, logmel) + # utt_id may be popped in compare_duration_and_mel_length + if utt_id not in sentences: + return None phones = sentences[utt_id][0] durations = sentences[utt_id][1] num_frames = logmel.shape[0] diff --git a/paddlespeech/t2s/exps/speedyspeech/preprocess.py b/paddlespeech/t2s/exps/speedyspeech/preprocess.py index 3f81c4e1475..e833d139405 100644 --- a/paddlespeech/t2s/exps/speedyspeech/preprocess.py +++ b/paddlespeech/t2s/exps/speedyspeech/preprocess.py @@ -79,6 +79,9 @@ def process_sentence(config: Dict[str, Any], logmel = mel_extractor.get_log_mel_fbank(wav) # change duration according to mel_length compare_duration_and_mel_length(sentences, utt_id, logmel) + # utt_id may be popped in compare_duration_and_mel_length + if utt_id not in sentences: + return None labels = sentences[utt_id][0] # extract phone and duration phones = [] diff --git a/paddlespeech/t2s/exps/tacotron2/preprocess.py b/paddlespeech/t2s/exps/tacotron2/preprocess.py index 7f41089ebf9..14a0d7eae22 100644 --- a/paddlespeech/t2s/exps/tacotron2/preprocess.py +++ b/paddlespeech/t2s/exps/tacotron2/preprocess.py @@ -82,6 +82,9 @@ def process_sentence(config: Dict[str, Any], logmel = mel_extractor.get_log_mel_fbank(wav) # change duration according to mel_length compare_duration_and_mel_length(sentences, utt_id, logmel) + # utt_id may be popped in compare_duration_and_mel_length + if utt_id not in sentences: + return None phones = sentences[utt_id][0] durations = sentences[utt_id][1] num_frames = logmel.shape[0]