Skip to content

Commit

Permalink
Bug fix in MP3 length on TTSDataset
Browse files Browse the repository at this point in the history
  • Loading branch information
Edresson committed Oct 20, 2023
1 parent 747f688 commit 2d69a99
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 2 deletions.
19 changes: 17 additions & 2 deletions TTS/tts/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from TTS.utils.audio import AudioProcessor
from TTS.utils.audio.numpy_transforms import compute_energy as calculate_energy

from mutagen.mp3 import MP3

# to prevent too many open files error as suggested here
# https://github.com/pytorch/pytorch/issues/11201#issuecomment-421146936
torch.multiprocessing.set_sharing_strategy("file_system")
Expand Down Expand Up @@ -42,6 +44,19 @@ def string2filename(string):
return filename


def get_audio_size(audiopath):
if audiopath[-4:] == ".mp3":
audio_info = MP3(audiopath).info
return int(audio_info.length * audio_info.sample_rate)
elif audiopath[-4:] == ".wav" or audiopath[-5:] == ".flac":
compress_factor = 8
bitrate = 16 # assuming 16bit audio
return int(os.path.getsize(audiopath) / bitrate * compress_factor)
else:
audio_format = audiopath.split(".")[-1]
raise RuntimeError(f"The audio format {audio_format} is not supported, please convert the audio files for mp3, flac or wav format!")


class TTSDataset(Dataset):
def __init__(
self,
Expand Down Expand Up @@ -176,7 +191,7 @@ def lengths(self):
lens = []
for item in self.samples:
_, wav_file, *_ = _parse_sample(item)
audio_len = os.path.getsize(wav_file) / 16 * 8 # assuming 16bit audio
audio_len = get_audio_size(wav_file)
lens.append(audio_len)
return lens

Expand Down Expand Up @@ -295,7 +310,7 @@ def load_data(self, idx):
def _compute_lengths(samples):
new_samples = []
for item in samples:
audio_length = os.path.getsize(item["audio_file"]) / 16 * 8 # assuming 16bit audio
audio_length = get_audio_size(item["audio_file"])
text_lenght = len(item["text"])
item["audio_length"] = audio_length
item["text_length"] = text_lenght
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ pyyaml==6.*
fsspec==2023.6.0 # <= 2023.9.1 makes aux tests fail
aiohttp==3.8.*
packaging==23.1
mutagen==1.47.0
# deps for examples
flask==2.*
# deps for inference
Expand Down

0 comments on commit 2d69a99

Please sign in to comment.