Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add tests for stream mode. #366

Merged
merged 5 commits into from
Jun 22, 2024
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 26 additions & 7 deletions examples/cmd/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
import wave
import ChatTTS
from IPython.display import Audio
import numpy as np
import argparse


def save_wav_file(wav, index):
wav_filename = f"output_audio_{index}.wav"
Expand All @@ -24,28 +27,44 @@ def save_wav_file(wav, index):
wf.writeframes(wav_bytes)
print(f"Audio saved to {wav_filename}")

def main():
# Retrieve text from command line argument
text_input = sys.argv[1] if len(sys.argv) > 1 else "<YOUR TEXT HERE>"
print("Received text input:", text_input)
def main(text="<YOUR TEXT HERE>", stream=False):
print(f"{stream=} Received text input: {text}")

chat = ChatTTS.Chat()
print("Initializing ChatTTS...")
# if using macbook(M1), I suggest you set `device='cpu', compile=False`
fumiama marked this conversation as resolved.
Show resolved Hide resolved
chat.load_models()
print("Models loaded successfully.")

texts = [text_input]
texts = [text]
print("Text prepared for inference:", texts)

wavs = chat.infer(texts, use_decoder=True)
wavs_gen = chat.infer(texts, use_decoder=True, stream=stream)
print("Inference completed. Audio generation successful.")
# Save each generated wav file to a local file

if stream:
print('generate with stream mode ..')
wavs = [np.array([[]])]
for gen in wavs_gen:
print('got new chunk', gen)
# play chunk or combine into one complete audio;
wavs[0] = np.hstack([wavs[0], np.array(gen[0])])
else:
print('generate without stream mode ..')
wavs = wavs_gen

for index, wav in enumerate(wavs):
save_wav_file(wav, index)

return Audio(wavs[0], rate=24_000, autoplay=True)

if __name__ == "__main__":
print("Starting the TTS application...")
main()

parser = argparse.ArgumentParser(description='ChatTTS cmd demo', usage="--stream hello, my name is bob.")
parser.add_argument("--stream", action="store_true", default=False, help="Use stream infer")
parser.add_argument("text", help="Original text", default='YOUR TEXT HERE', nargs='*')
args = parser.parse_args()
main(text=' '.join(args.text), stream=args.stream)
fumiama marked this conversation as resolved.
Show resolved Hide resolved
print("TTS application finished.")