Skip to content

Commit

Permalink
Update synthesizer.py
Browse files Browse the repository at this point in the history
Configurable verbose output
  • Loading branch information
davidmartinrius authored Mar 5, 2024
1 parent 64fdd0e commit 275229a
Showing 1 changed file with 16 additions and 9 deletions.
25 changes: 16 additions & 9 deletions TTS/utils/synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ def tts(
style_text=None,
reference_wav=None,
reference_speaker_name=None,
verbose: bool = True,
split_sentences: bool = True,
**kwargs,
) -> List[int]:
Expand All @@ -278,6 +279,7 @@ def tts(
style_text ([type], optional): transcription of style_wav for Capacitron. Defaults to None.
reference_wav ([type], optional): reference waveform for voice conversion. Defaults to None.
reference_speaker_name ([type], optional): speaker id of reference waveform. Defaults to None.
verbose (bool, optional): print verbose output. Defaults to True.
split_sentences (bool, optional): split the input text into sentences. Defaults to True.
**kwargs: additional arguments to pass to the TTS model.
Returns:
Expand All @@ -294,9 +296,11 @@ def tts(
if text:
sens = [text]
if split_sentences:
print(" > Text splitted to sentences.")
if verbose:
print(" > Text splitted to sentences.")
sens = self.split_into_sentences(text)
print(sens)
if verbose:
print(sens)

# handle multi-speaker
if "voice_dir" in kwargs:
Expand Down Expand Up @@ -420,7 +424,8 @@ def tts(
self.vocoder_config["audio"]["sample_rate"] / self.tts_model.ap.sample_rate,
]
if scale_factor[1] != 1:
print(" > interpolating tts model output.")
if verbose:
print(" > interpolating tts model output.")
vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input)
else:
vocoder_input = torch.tensor(vocoder_input).unsqueeze(0) # pylint: disable=not-callable
Expand Down Expand Up @@ -484,7 +489,8 @@ def tts(
self.vocoder_config["audio"]["sample_rate"] / self.tts_model.ap.sample_rate,
]
if scale_factor[1] != 1:
print(" > interpolating tts model output.")
if verbose:
print(" > interpolating tts model output.")
vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input)
else:
vocoder_input = torch.tensor(vocoder_input).unsqueeze(0) # pylint: disable=not-callable
Expand All @@ -497,9 +503,10 @@ def tts(
waveform = waveform.numpy()
wavs = waveform.squeeze()

# compute stats
process_time = time.time() - start_time
audio_time = len(wavs) / self.tts_config.audio["sample_rate"]
print(f" > Processing time: {process_time}")
print(f" > Real-time factor: {process_time / audio_time}")
if verbose:
# compute stats
process_time = time.time() - start_time
audio_time = len(wavs) / self.tts_config.audio["sample_rate"]
print(f" > Processing time: {process_time}")
print(f" > Real-time factor: {process_time / audio_time}")
return wavs

0 comments on commit 275229a

Please sign in to comment.