merge dec into dev-capacitron

a-froghyar · May 25, 2021 · 7030dff · 7030dff
2 parents 64cff14 + 19f1652
commit 7030dff
Show file tree

Hide file tree

Showing 207 changed files with 106,464 additions and 6,156 deletions.
diff --git a/.compute b/.compute
@@ -1,7 +1,6 @@
 #!/bin/bash
 yes | apt-get install sox
 yes | apt-get install ffmpeg
-yes | apt-get install espeak
 yes | apt-get install tmux
 yes | apt-get install zsh
 sh -c "$(curl -fsSL https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh)"

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -30,6 +30,7 @@ jobs:
         uses: actions/setup-python@v2
         with:
           python-version: ${{ matrix.python-version }}
+          architecture: x64
       - name: check OS
         run: cat /etc/os-release
       - name: Install dependencies
@@ -39,11 +40,10 @@ jobs:
           sudo apt install -y python3-wheel gcc
           make system-deps
       - name: Upgrade pip
-        # so we can take advantage of pyproject.toml build-dependency support
         run: python3 -m pip install --upgrade pip
       - name: Install TTS
         run: |
-          python3 -m pip install .
+          python3 -m pip install .[all]
           python3 setup.py egg_info
       - name: Lint check
         run: |

diff --git a/.github/workflows/models.yml b/.github/workflows/models.yml
@@ -0,0 +1,49 @@
+name: CI
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    types: [opened, synchronize, reopened]
+jobs:
+  check_skip:
+    runs-on: ubuntu-latest
+    if: "! contains(github.event.head_commit.message, '[ci skip]')"
+    steps:
+      - run: echo "${{ github.event.head_commit.message }}"
+
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [3.8]
+
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/cache@v1
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('**/setup.py') }}
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: check OS
+        run: cat /etc/os-release
+      - name: Install dependencies
+        run: |
+          sudo apt update
+          sudo apt install -y git make
+          sudo apt install -y python3-wheel gcc
+          make system-deps
+      - name: Upgrade pip
+        run: python3 -m pip install --upgrade pip
+      - name: Install TTS
+        run: |
+          python3 -m pip install .[all]
+          python3 setup.py egg_info
+      - name: Check models
+        run: |
+          nosetests tests.model_manager --nocapture
diff --git a/.gitignore b/.gitignore
@@ -136,4 +136,3 @@ temp_build/*
 playground.ipynb
 .vscode-upload.json
 temp_build/*
-recipes/*
diff --git a/.pylintrc b/.pylintrc
@@ -563,7 +563,7 @@ max-branches=12
 max-locals=15
 
 # Maximum number of parents for a class (see R0901).
-max-parents=7
+max-parents=15
 
 # Maximum number of public methods for a class (see R0904).
 max-public-methods=20

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,6 +1,6 @@
 include README.md
 include LICENSE.txt
-include requirements.txt
+include requirements.*.txt
 recursive-include TTS *.json
 recursive-include TTS *.html
 recursive-include TTS *.png

diff --git a/Makefile b/Makefile
@@ -1,22 +1,28 @@
 .DEFAULT_GOAL := help
-.PHONY: test deps style lint install help
+.PHONY: test system-deps dev-deps deps style lint install help
 
 help:
 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
 
 target_dirs := tests TTS notebooks
 
 system-deps:	## install linux system deps
-	sudo apt-get install -y espeak-ng
 	sudo apt-get install -y libsndfile1-dev
 
+dev-deps:  ## install development deps
+	pip install -r requirements.dev.txt
+	pip install -r requirements.tf.txt
+
 deps:	## install 🐸 requirements.
 	pip install -r requirements.txt
 
 test:	## run tests.
-	nosetests -x --with-cov -cov  --cover-erase --cover-package TTS tests
+	nosetests -x --with-cov -cov  --cover-erase --cover-package TTS tests --nologcapture --with-id
 	./run_bash_tests.sh
 
+test_failed:  ## only run tests failed the last time.
+	nosetests -x --with-cov -cov  --cover-erase --cover-package TTS tests --nologcapture --failed
+
 style:	## update code style.
 	black ${target_dirs}
 	isort ${target_dirs}
@@ -25,4 +31,4 @@ lint:	## run pylint linter.
 	pylint ${target_dirs}
 
 install:	## install 🐸 TTS for development.
-	pip install -e .
+	pip install -e .[all]
diff --git a/README.md b/README.md
@@ -45,9 +45,9 @@ Please use our dedicated channels for questions and discussion. Help is much mor
 | 📌 **Road Map**                   | [Main Development Plans](https://github.com/coqui-ai/TTS/issues/378)
 | 👩🏾‍🏫 **Tutorials and Examples**     | [TTS/Wiki](https://github.com/coqui-ai/TTS/wiki/%F0%9F%90%B8-TTS-Notebooks,-Examples-and-Tutorials) |
 | 🚀 **Released Models**            | [TTS Releases](https://github.com/coqui-ai/TTS/releases) and [Experimental Models](https://github.com/coqui-ai/TTS/wiki/Experimental-Released-Models)|
-| 💻 **Docker Image**               | [Repository by @synesthesiam](https://github.com/synesthesiam/docker-coqui-aitts)|
 | 🖥️ **Demo Server**                | [TTS/server](https://github.com/coqui-ai/TTS/tree/master/TTS/server)|
 | 🤖 **Synthesize speech**          | [TTS/README.md](https://github.com/coqui-ai/TTS#example-synthesizing-speech-on-terminal-using-the-released-models)|
+| 🛠️ **Implementing a New Model**   | [TTS/Wiki](https://github.com/coqui-ai/TTS/wiki/Implementing-a-New-Model-in-%F0%9F%90%B8TTS)|
 
 ## 🥇 TTS Performance
 <p align="center"><img src="https://raw.githubusercontent.com/coqui-ai/TTS/main/images/TTS-performance.png" width="800" /></p>
@@ -110,11 +110,17 @@ If you are only interested in [synthesizing speech](https://github.com/coqui-ai/
 pip install TTS
 ```
 
+By default this only installs the requirements for PyTorch. To install the tensorflow dependencies as well, use the `tf` extra.
+
+```bash
+pip install TTS[tf]
+```
+
 If you plan to code or train models, clone 🐸TTS and install it locally.
 
 ```bash
 git clone https://github.com/coqui-ai/TTS
-pip install -e .
+pip install -e .[all,dev,notebooks,tf]  # Select the relevant extras
 ```
 
 We use ```espeak-ng``` to convert graphemes to phonemes. You might need to install separately.
@@ -176,6 +182,7 @@ Some of the public datasets that we successfully applied 🐸TTS:
 - [Spanish](https://drive.google.com/file/d/1Sm_zyBo67XHkiFhcRSQ4YaHPYM0slO_e/view?usp=sharing) - thx! @carlfm01
 
 ## Example: Synthesizing Speech on Terminal Using the Released Models.
+<img src="images/tts_cli.gif"/>
 
 After the installation, 🐸TTS provides a CLI interface for synthesizing speech using pre-trained models. You can either use your own model or the release models under 🐸TTS.
 
@@ -185,7 +192,15 @@ Listing released 🐸TTS models.
 tts --list_models
 ```
 
-Run a tts and a vocoder model from the released model list. (Simply copy and paste the full model names from the list as arguments for the command below.)
+Run a TTS model, from the release models list, with its default vocoder. (Simply copy and paste the full model names from the list as arguments for the command below.)
+
+```bash
+tts --text "Text for TTS" \
+    --model_name "<type>/<language>/<dataset>/<model_name>" \
+    --out_path folder/to/save/output.wav
+```
+
+Run a tts and a vocoder model from the released model list. Note that not every vocoder is compatible with every TTS model.
 
 ```bash
 tts --text "Text for TTS" \
@@ -214,8 +229,46 @@ tts --text "Text for TTS" \
     --vocoder_config_path path/to/vocoder_config.json
 ```
 
+Run a multi-speaker TTS model from the released models list.
+
+```bash
+tts --model_name "<type>/<language>/<dataset>/<model_name>"  --list_speaker_idxs  # list the possible speaker IDs.
+tts --text "Text for TTS." --out_path output/path/speech.wav --model_name "<language>/<dataset>/<model_name>"  --speaker_idx "<speaker_id>"
+```
+
 **Note:** You can use ```./TTS/bin/synthesize.py``` if you prefer running ```tts``` from the TTS project folder.
 
+## Example: Using the Demo Server for Synthesizing Speech
+
+ <!-- <img src="https://raw.githubusercontent.com/coqui-ai/TTS/main/images/demo_server.gif" height="56"/> -->
+ <img src="images/demo_server.gif"/>
+
+You can boot up a demo 🐸TTS server to run inference with your models. Note that the server is not optimized for performance
+but gives you an easy way to interact with the models.
+
+The demo server provides pretty much the same interface as the CLI command.
+
+```bash
+tts-server -h # see the help
+tts-server --list_models  # list the available models.
+```
+
+Run a TTS model, from the release models list, with its default vocoder.
+If the model you choose is a multi-speaker TTS model, you can select different speakers on the Web interface and synthesize
+speech.
+
+```bash
+tts-server --model_name "<type>/<language>/<dataset>/<model_name>"
+```
+
+Run a TTS and a vocoder model from the released model list. Note that not every vocoder is compatible with every TTS model.
+
+```bash
+tts-server --model_name "<type>/<language>/<dataset>/<model_name>" \
+           --vocoder_name "<type>/<language>/<dataset>/<model_name>"
+```
+
+
 ## Example: Training and Fine-tuning LJ-Speech Dataset
 Here you can find a [CoLab](https://gist.github.com/erogol/97516ad65b44dbddb8cd694953187c5b) notebook for a hands-on example, training LJSpeech. Or you can manually follow the guideline below.
 

diff --git a/TTS/.models.json b/TTS/.models.json
@@ -6,7 +6,8 @@
                     "description": "EK1 en-rp tacotron2 by NMStoker",
                     "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.10/tts_models--en--ek1--tacotron2.zip",
                     "default_vocoder": "vocoder_models/en/ek1/wavegrad",
-                    "commit": "c802255"
+                    "commit": "c802255",
+                    "needs_phonemizer": true
                 }
             },
             "ljspeech":{
@@ -17,7 +18,8 @@
                     "commit": "bae2ad0f",
                     "author": "Eren Gölge @erogol",
                     "license": "",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": false
                 },
                 "glow-tts":{
                     "description": "",
@@ -27,7 +29,8 @@
                     "commit": "",
                     "author": "Eren Gölge @erogol",
                     "license": "MPL",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": true
                 },
                 "tacotron2-DCA": {
                     "description": "",
@@ -36,7 +39,8 @@
                     "commit": "",
                     "author": "Eren Gölge @erogol",
                     "license": "MPL",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": true
                 },
                 "speedy-speech-wn":{
                     "description": "Speedy Speech model with wavenet decoder.",
@@ -45,7 +49,34 @@
                     "commit": "77b6145",
                     "author": "Eren Gölge @erogol",
                     "license": "MPL",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": true
+                }
+            },
+            "vctk":{
+                "sc-glow-tts": {
+                    "description": "Multi-Speaker Transformers based SC-Glow model from https://arxiv.org/abs/2104.05557.",
+                    "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.12/tts_models--en--vctk--sc-glowtts-transformer.zip",
+                    "default_vocoder": null,
+                    "commit": "b531fa69",
+                    "author": "Edresson Casanova",
+                    "license": "",
+                    "contact":"",
+                    "needs_phonemizer": true
+
+
+                }
+            },
+            "sam":{
+                "tacotron-DDC": {
+                    "description": "Tacotron2 with Double Decoder Consistency trained with Aceenture's Sam dataset.",
+                    "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.13/tts_models--en--sam--tacotron_DDC.zip",
+                    "default_vocoder": "vocoder_models/en/sam/hifigan_v2",
+                    "commit": "bae2ad0f",
+                    "author": "Eren Gölge @erogol",
+                    "license": "",
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": true
                 }
             }
         },
@@ -57,7 +88,8 @@
                     "commit": "",
                     "author": "Eren Gölge @erogol",
                     "license": "MPL",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": true
                 }
             }
         },
@@ -69,7 +101,8 @@
                     "commit": "",
                     "author": "Eren Gölge @erogol",
                     "license": "MPL",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": true
                 }
             }
         },
@@ -89,7 +122,8 @@
                     "author": "@r-dh",
                     "default_vocoder": "vocoder_models/nl/mai/parallel-wavegan",
                     "stats_file": null,
-                    "commit": "540d811"
+                    "commit": "540d811",
+                    "needs_phonemizer": true
                 }
             }
         },
@@ -100,7 +134,8 @@
                     "author": "@erogol",
                     "default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
                     "license":"",
-                    "contact": "egolge@coqui.com"
+                    "contact": "egolge@coqui.com",
+                    "needs_phonemizer": true
                 }
             }
         },
@@ -110,7 +145,8 @@
                     "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.11/tts_models--de--thorsten--tacotron2-DCA.zip",
                     "default_vocoder": "vocoder_models/de/thorsten/wavegrad",
                     "author": "@thorstenMueller",
-                    "commit": "unknown"
+                    "commit": "unknown",
+                    "needs_phonemizer": true
                 }
             }
         }
@@ -158,6 +194,26 @@
                     "license": "",
                     "contact": "egolge@coqui.ai"
                 }
+            },
+            "vctk":{
+                "hifigan_v2":{
+                    "description": "Finetuned and intended to be used with tts_models/en/vctk/sc-glow-tts",
+                    "github_rls_url":"https://github.com/coqui-ai/TTS/releases/download/v0.0.12/vocoder_model--en--vctk--hifigan_v2.zip",
+                    "commit": "2f07160",
+                    "author": "Edresson Casanova",
+                    "license": "",
+                    "contact": ""
+                }
+            },
+            "sam": {
+                "hifigan_v2":{
+                    "description": "Finetuned and intended to be used with tts_models/en/sam/tacotron_DDC",
+                    "github_rls_url":"https://github.com/coqui-ai/TTS/releases/download/v0.0.13/vocoder_models--en--sam--hifigan_v2.zip",
+                    "commit": "2f07160",
+                    "author": "Eren Gölge @erogol",
+                    "license": "",
+                    "contact": "egolge@coqui.ai"
+                }
             }
         },
         "nl":{

diff --git a/TTS/__init__.py b/TTS/__init__.py
@@ -0,0 +1 @@
+from ._version import __version__
diff --git a/TTS/_version.py b/TTS/_version.py
@@ -0,0 +1 @@
+__version__ = "0.0.14"