coqui-ai · erogol · Oct 16, 2023 · Oct 3, 2023 · Oct 3, 2023 · Oct 3, 2023
diff --git a/README.md b/README.md
@@ -347,6 +347,18 @@ If you don't specify any models, then it uses LJSpeech based English model.
   $ tts --text "Text for TTS" --out_path output/path/speech.wav
   ```
 
+- Run TTS and pipe out the generated TTS wav file data:
+
+  ```
+  $ tts --text "Text for TTS" --pipe_out --out_path output/path/speech.wav | aplay
+  ```
+
+- Run TTS and define speed factor to use for 🐸Coqui Studio models, between 0.0 and 2.0:
+
+  ```
+  $ tts --text "Text for TTS" --model_name "coqui_studio/<language>/<dataset>/<model_name>" --speed 1.2 --out_path output/path/speech.wav
+  ```
+
 - Run a TTS model with its default vocoder model:
 
   ```

diff --git a/TTS/api.py b/TTS/api.py
@@ -112,7 +112,6 @@ def is_multi_lingual(self):
             return self.synthesizer.tts_model.language_manager.num_languages > 1
         return False
 
-
     @property
     def speakers(self):
         if not self.is_multi_speaker:
@@ -265,6 +264,7 @@ def tts_coqui_studio(
         language: str = None,
         emotion: str = None,
         speed: float = 1.0,
+        pipe_out = None,
         file_path: str = None,
     ) -> Union[np.ndarray, str]:
         """Convert text to speech using Coqui Studio models. Use `CS_API` class if you are only interested in the API.
@@ -281,6 +281,8 @@ def tts_coqui_studio(
                 with "V1" model. Defaults to None.
             speed (float, optional):
                 Speed of the speech. Defaults to 1.0.
+            pipe_out (BytesIO, optional):
+                Flag to stdout the generated TTS wav file for shell pipe.
             file_path (str, optional):
                 Path to save the output file. When None it returns the `np.ndarray` of waveform. Defaults to None.
 
@@ -294,6 +296,7 @@ def tts_coqui_studio(
                 speaker_name=speaker_name,
                 language=language,
                 speed=speed,
+                pipe_out=pipe_out,
                 emotion=emotion,
                 file_path=file_path,
             )[0]
@@ -356,6 +359,7 @@ def tts_to_file(
         speaker_wav: str = None,
         emotion: str = None,
         speed: float = 1.0,
+        pipe_out = None,
         file_path: str = "output.wav",
         **kwargs,
     ):
@@ -377,6 +381,8 @@ def tts_to_file(
                 Emotion to use for 🐸Coqui Studio models. Defaults to "Neutral".
             speed (float, optional):
                 Speed factor to use for 🐸Coqui Studio models, between 0.0 and 2.0. Defaults to None.
+            pipe_out (BytesIO, optional):
+                Flag to stdout the generated TTS wav file for shell pipe.
             file_path (str, optional):
                 Output file path. Defaults to "output.wav".
             kwargs (dict, optional):
@@ -386,10 +392,16 @@ def tts_to_file(
 
         if self.csapi is not None:
             return self.tts_coqui_studio(
-                text=text, speaker_name=speaker, language=language, emotion=emotion, speed=speed, file_path=file_path
+                text=text,
+                speaker_name=speaker,
+                language=language,
+                emotion=emotion,
+                speed=speed,
+                file_path=file_path,
+                pipe_out=pipe_out,
             )
         wav = self.tts(text=text, speaker=speaker, language=language, speaker_wav=speaker_wav, **kwargs)
-        self.synthesizer.save_wav(wav=wav, path=file_path)
+        self.synthesizer.save_wav(wav=wav, path=file_path, pipe_out=pipe_out)
         return file_path
 
     def voice_conversion(