coqui-ai · Edresson · Oct 6, 2023 · Oct 6, 2023
diff --git a/TTS/.models.json b/TTS/.models.json
@@ -4,6 +4,19 @@
             "multi-dataset": {
                 "xtts_v1": {
                     "description": "XTTS-v1 by Coqui with 13 languages and cross-language voice cloning.",
+                    "hf_url": [
+                        "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/model.pth",
+                        "https://huggingface.co/coqui/XTTS-v1/resolve/xtts_v1/config.json",
+                        "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/vocab.json"
+                    ],
+                    "default_vocoder": null,
+                    "commit": "e5140314",
+                    "license": "CPML",
+                    "contact": "info@coqui.ai",
+                    "tos_required": true
+                },
+                "xtts_v1.1": {
+                    "description": "XTTS-v1.1 by Coqui with 13 languages and cross-language voice cloning with faster inference and streaming support.",
                     "hf_url": [
                         "https://huggingface.co/coqui/XTTS-v1/resolve/hifigan/model.pth",
                         "https://huggingface.co/coqui/XTTS-v1/resolve/hifigan/config.json",

diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py
@@ -203,7 +203,7 @@ class XttsArgs(Coqpit):
         clvp_checkpoint (str, optional): The checkpoint for the ConditionalLatentVariablePerseq model. Defaults to None.
         decoder_checkpoint (str, optional): The checkpoint for the DiffTTS model. Defaults to None.
         num_chars (int, optional): The maximum number of characters to generate. Defaults to 255.
-        use_hifigan (bool, optional): Whether to use hifigan or diffusion + univnet as a decoder. Defaults to True.
+        use_hifigan (bool, optional): Whether to use hifigan or diffusion + univnet as a decoder. Defaults to False.
 
         For GPT model:
         ar_max_audio_tokens (int, optional): The maximum mel tokens for the autoregressive model. Defaults to 604.
@@ -238,7 +238,7 @@ class XttsArgs(Coqpit):
     clvp_checkpoint: str = None
     decoder_checkpoint: str = None
     num_chars: int = 255
-    use_hifigan: bool = True
+    use_hifigan: bool = False
 
     # XTTS GPT Encoder params
     tokenizer_file: str = ""

diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py
@@ -364,7 +364,7 @@ def download_model(self, model_name):
         output_model_path = output_path
         output_config_path = None
         if (
-            model not in ["tortoise-v2", "bark", "xtts_v1"] and "fairseq" not in model_name
+            model not in ["tortoise-v2", "bark", "xtts_v1", "xtts_v1.1"] and "fairseq" not in model_name
         ):  # TODO:This is stupid but don't care for now.
             output_model_path, output_config_path = self._find_files(output_path)
         # update paths in the config.json

diff --git a/docs/source/models/xtts.md b/docs/source/models/xtts.md
@@ -33,7 +33,7 @@ You can also mail us at info@coqui.ai.
 
 ```python
 from TTS.api import TTS
-tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1", gpu=True)
+tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1.1", gpu=True)
 
 # generate speech by cloning a voice using default settings
 tts.tts_to_file(text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
@@ -45,7 +45,7 @@ tts.tts_to_file(text="It took me quite a long time to develop a voice, and now t
 #### 🐸TTS Command line
 
 ```console
- tts --model_name tts_models/multilingual/multi-dataset/xtts_v1 \
+ tts --model_name tts_models/multilingual/multi-dataset/xtts_v1.1 \
      --text "Bugün okula gitmek istemiyorum." \
      --speaker_wav /path/to/target/speaker.wav \
      --language_idx tr \

diff --git a/tests/zoo_tests/test_models.py b/tests/zoo_tests/test_models.py
@@ -14,7 +14,7 @@
 MODELS_WITH_SEP_TESTS = [
     "tts_models/multilingual/multi-dataset/bark",
     "tts_models/en/multi-dataset/tortoise-v2",
-    "tts_models/multilingual/multi-dataset/xtts_v1",
+    "tts_models/multilingual/multi-dataset/xtts_v1"
 ]