Skip to content

Commit

Permalink
merge dec into dev-capacitron
Browse files Browse the repository at this point in the history
  • Loading branch information
a-froghyar committed May 25, 2021
2 parents 64cff14 + 19f1652 commit 7030dff
Show file tree
Hide file tree
Showing 207 changed files with 106,464 additions and 6,156 deletions.
1 change: 0 additions & 1 deletion .compute
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/bin/bash
yes | apt-get install sox
yes | apt-get install ffmpeg
yes | apt-get install espeak
yes | apt-get install tmux
yes | apt-get install zsh
sh -c "$(curl -fsSL https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh)"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ jobs:
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
architecture: x64
- name: check OS
run: cat /etc/os-release
- name: Install dependencies
Expand All @@ -39,11 +40,10 @@ jobs:
sudo apt install -y python3-wheel gcc
make system-deps
- name: Upgrade pip
# so we can take advantage of pyproject.toml build-dependency support
run: python3 -m pip install --upgrade pip
- name: Install TTS
run: |
python3 -m pip install .
python3 -m pip install .[all]
python3 setup.py egg_info
- name: Lint check
run: |
Expand Down
49 changes: 49 additions & 0 deletions .github/workflows/models.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: CI

on:
push:
branches:
- main
pull_request:
types: [opened, synchronize, reopened]
jobs:
check_skip:
runs-on: ubuntu-latest
if: "! contains(github.event.head_commit.message, '[ci skip]')"
steps:
- run: echo "${{ github.event.head_commit.message }}"

test:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: [3.8]

steps:
- uses: actions/checkout@v2
- uses: actions/cache@v1
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('**/setup.py') }}
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: check OS
run: cat /etc/os-release
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y git make
sudo apt install -y python3-wheel gcc
make system-deps
- name: Upgrade pip
run: python3 -m pip install --upgrade pip
- name: Install TTS
run: |
python3 -m pip install .[all]
python3 setup.py egg_info
- name: Check models
run: |
nosetests tests.model_manager --nocapture
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -136,4 +136,3 @@ temp_build/*
playground.ipynb
.vscode-upload.json
temp_build/*
recipes/*
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ max-branches=12
max-locals=15

# Maximum number of parents for a class (see R0901).
max-parents=7
max-parents=15

# Maximum number of public methods for a class (see R0904).
max-public-methods=20
Expand Down
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
include README.md
include LICENSE.txt
include requirements.txt
include requirements.*.txt
recursive-include TTS *.json
recursive-include TTS *.html
recursive-include TTS *.png
Expand Down
14 changes: 10 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,22 +1,28 @@
.DEFAULT_GOAL := help
.PHONY: test deps style lint install help
.PHONY: test system-deps dev-deps deps style lint install help

help:
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'

target_dirs := tests TTS notebooks

system-deps: ## install linux system deps
sudo apt-get install -y espeak-ng
sudo apt-get install -y libsndfile1-dev

dev-deps: ## install development deps
pip install -r requirements.dev.txt
pip install -r requirements.tf.txt

deps: ## install 🐸 requirements.
pip install -r requirements.txt

test: ## run tests.
nosetests -x --with-cov -cov --cover-erase --cover-package TTS tests
nosetests -x --with-cov -cov --cover-erase --cover-package TTS tests --nologcapture --with-id
./run_bash_tests.sh

test_failed: ## only run tests failed the last time.
nosetests -x --with-cov -cov --cover-erase --cover-package TTS tests --nologcapture --failed

style: ## update code style.
black ${target_dirs}
isort ${target_dirs}
Expand All @@ -25,4 +31,4 @@ lint: ## run pylint linter.
pylint ${target_dirs}

install: ## install 🐸 TTS for development.
pip install -e .
pip install -e .[all]
59 changes: 56 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ Please use our dedicated channels for questions and discussion. Help is much mor
| 📌 **Road Map** | [Main Development Plans](https://github.com/coqui-ai/TTS/issues/378)
| 👩🏾‍🏫 **Tutorials and Examples** | [TTS/Wiki](https://github.com/coqui-ai/TTS/wiki/%F0%9F%90%B8-TTS-Notebooks,-Examples-and-Tutorials) |
| 🚀 **Released Models** | [TTS Releases](https://github.com/coqui-ai/TTS/releases) and [Experimental Models](https://github.com/coqui-ai/TTS/wiki/Experimental-Released-Models)|
| 💻 **Docker Image** | [Repository by @synesthesiam](https://github.com/synesthesiam/docker-coqui-aitts)|
| 🖥️ **Demo Server** | [TTS/server](https://github.com/coqui-ai/TTS/tree/master/TTS/server)|
| 🤖 **Synthesize speech** | [TTS/README.md](https://github.com/coqui-ai/TTS#example-synthesizing-speech-on-terminal-using-the-released-models)|
| 🛠️ **Implementing a New Model** | [TTS/Wiki](https://github.com/coqui-ai/TTS/wiki/Implementing-a-New-Model-in-%F0%9F%90%B8TTS)|

## 🥇 TTS Performance
<p align="center"><img src="https://raw.githubusercontent.com/coqui-ai/TTS/main/images/TTS-performance.png" width="800" /></p>
Expand Down Expand Up @@ -110,11 +110,17 @@ If you are only interested in [synthesizing speech](https://github.com/coqui-ai/
pip install TTS
```

By default this only installs the requirements for PyTorch. To install the tensorflow dependencies as well, use the `tf` extra.

```bash
pip install TTS[tf]
```

If you plan to code or train models, clone 🐸TTS and install it locally.

```bash
git clone https://github.com/coqui-ai/TTS
pip install -e .
pip install -e .[all,dev,notebooks,tf] # Select the relevant extras
```

We use ```espeak-ng``` to convert graphemes to phonemes. You might need to install separately.
Expand Down Expand Up @@ -176,6 +182,7 @@ Some of the public datasets that we successfully applied 🐸TTS:
- [Spanish](https://drive.google.com/file/d/1Sm_zyBo67XHkiFhcRSQ4YaHPYM0slO_e/view?usp=sharing) - thx! @carlfm01

## Example: Synthesizing Speech on Terminal Using the Released Models.
<img src="images/tts_cli.gif"/>

After the installation, 🐸TTS provides a CLI interface for synthesizing speech using pre-trained models. You can either use your own model or the release models under 🐸TTS.

Expand All @@ -185,7 +192,15 @@ Listing released 🐸TTS models.
tts --list_models
```

Run a tts and a vocoder model from the released model list. (Simply copy and paste the full model names from the list as arguments for the command below.)
Run a TTS model, from the release models list, with its default vocoder. (Simply copy and paste the full model names from the list as arguments for the command below.)

```bash
tts --text "Text for TTS" \
--model_name "<type>/<language>/<dataset>/<model_name>" \
--out_path folder/to/save/output.wav
```

Run a tts and a vocoder model from the released model list. Note that not every vocoder is compatible with every TTS model.

```bash
tts --text "Text for TTS" \
Expand Down Expand Up @@ -214,8 +229,46 @@ tts --text "Text for TTS" \
--vocoder_config_path path/to/vocoder_config.json
```

Run a multi-speaker TTS model from the released models list.

```bash
tts --model_name "<type>/<language>/<dataset>/<model_name>" --list_speaker_idxs # list the possible speaker IDs.
tts --text "Text for TTS." --out_path output/path/speech.wav --model_name "<language>/<dataset>/<model_name>" --speaker_idx "<speaker_id>"
```

**Note:** You can use ```./TTS/bin/synthesize.py``` if you prefer running ```tts``` from the TTS project folder.

## Example: Using the Demo Server for Synthesizing Speech

<!-- <img src="https://raw.githubusercontent.com/coqui-ai/TTS/main/images/demo_server.gif" height="56"/> -->
<img src="images/demo_server.gif"/>

You can boot up a demo 🐸TTS server to run inference with your models. Note that the server is not optimized for performance
but gives you an easy way to interact with the models.

The demo server provides pretty much the same interface as the CLI command.

```bash
tts-server -h # see the help
tts-server --list_models # list the available models.
```

Run a TTS model, from the release models list, with its default vocoder.
If the model you choose is a multi-speaker TTS model, you can select different speakers on the Web interface and synthesize
speech.

```bash
tts-server --model_name "<type>/<language>/<dataset>/<model_name>"
```

Run a TTS and a vocoder model from the released model list. Note that not every vocoder is compatible with every TTS model.

```bash
tts-server --model_name "<type>/<language>/<dataset>/<model_name>" \
--vocoder_name "<type>/<language>/<dataset>/<model_name>"
```


## Example: Training and Fine-tuning LJ-Speech Dataset
Here you can find a [CoLab](https://gist.github.com/erogol/97516ad65b44dbddb8cd694953187c5b) notebook for a hands-on example, training LJSpeech. Or you can manually follow the guideline below.

Expand Down
76 changes: 66 additions & 10 deletions TTS/.models.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
"description": "EK1 en-rp tacotron2 by NMStoker",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.10/tts_models--en--ek1--tacotron2.zip",
"default_vocoder": "vocoder_models/en/ek1/wavegrad",
"commit": "c802255"
"commit": "c802255",
"needs_phonemizer": true
}
},
"ljspeech":{
Expand All @@ -17,7 +18,8 @@
"commit": "bae2ad0f",
"author": "Eren Gölge @erogol",
"license": "",
"contact":"egolge@coqui.com"
"contact":"egolge@coqui.com",
"needs_phonemizer": false
},
"glow-tts":{
"description": "",
Expand All @@ -27,7 +29,8 @@
"commit": "",
"author": "Eren Gölge @erogol",
"license": "MPL",
"contact":"egolge@coqui.com"
"contact":"egolge@coqui.com",
"needs_phonemizer": true
},
"tacotron2-DCA": {
"description": "",
Expand All @@ -36,7 +39,8 @@
"commit": "",
"author": "Eren Gölge @erogol",
"license": "MPL",
"contact":"egolge@coqui.com"
"contact":"egolge@coqui.com",
"needs_phonemizer": true
},
"speedy-speech-wn":{
"description": "Speedy Speech model with wavenet decoder.",
Expand All @@ -45,7 +49,34 @@
"commit": "77b6145",
"author": "Eren Gölge @erogol",
"license": "MPL",
"contact":"egolge@coqui.com"
"contact":"egolge@coqui.com",
"needs_phonemizer": true
}
},
"vctk":{
"sc-glow-tts": {
"description": "Multi-Speaker Transformers based SC-Glow model from https://arxiv.org/abs/2104.05557.",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.12/tts_models--en--vctk--sc-glowtts-transformer.zip",
"default_vocoder": null,
"commit": "b531fa69",
"author": "Edresson Casanova",
"license": "",
"contact":"",
"needs_phonemizer": true


}
},
"sam":{
"tacotron-DDC": {
"description": "Tacotron2 with Double Decoder Consistency trained with Aceenture's Sam dataset.",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.13/tts_models--en--sam--tacotron_DDC.zip",
"default_vocoder": "vocoder_models/en/sam/hifigan_v2",
"commit": "bae2ad0f",
"author": "Eren Gölge @erogol",
"license": "",
"contact":"egolge@coqui.com",
"needs_phonemizer": true
}
}
},
Expand All @@ -57,7 +88,8 @@
"commit": "",
"author": "Eren Gölge @erogol",
"license": "MPL",
"contact":"egolge@coqui.com"
"contact":"egolge@coqui.com",
"needs_phonemizer": true
}
}
},
Expand All @@ -69,7 +101,8 @@
"commit": "",
"author": "Eren Gölge @erogol",
"license": "MPL",
"contact":"egolge@coqui.com"
"contact":"egolge@coqui.com",
"needs_phonemizer": true
}
}
},
Expand All @@ -89,7 +122,8 @@
"author": "@r-dh",
"default_vocoder": "vocoder_models/nl/mai/parallel-wavegan",
"stats_file": null,
"commit": "540d811"
"commit": "540d811",
"needs_phonemizer": true
}
}
},
Expand All @@ -100,7 +134,8 @@
"author": "@erogol",
"default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
"license":"",
"contact": "egolge@coqui.com"
"contact": "egolge@coqui.com",
"needs_phonemizer": true
}
}
},
Expand All @@ -110,7 +145,8 @@
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.11/tts_models--de--thorsten--tacotron2-DCA.zip",
"default_vocoder": "vocoder_models/de/thorsten/wavegrad",
"author": "@thorstenMueller",
"commit": "unknown"
"commit": "unknown",
"needs_phonemizer": true
}
}
}
Expand Down Expand Up @@ -158,6 +194,26 @@
"license": "",
"contact": "egolge@coqui.ai"
}
},
"vctk":{
"hifigan_v2":{
"description": "Finetuned and intended to be used with tts_models/en/vctk/sc-glow-tts",
"github_rls_url":"https://github.com/coqui-ai/TTS/releases/download/v0.0.12/vocoder_model--en--vctk--hifigan_v2.zip",
"commit": "2f07160",
"author": "Edresson Casanova",
"license": "",
"contact": ""
}
},
"sam": {
"hifigan_v2":{
"description": "Finetuned and intended to be used with tts_models/en/sam/tacotron_DDC",
"github_rls_url":"https://github.com/coqui-ai/TTS/releases/download/v0.0.13/vocoder_models--en--sam--hifigan_v2.zip",
"commit": "2f07160",
"author": "Eren Gölge @erogol",
"license": "",
"contact": "egolge@coqui.ai"
}
}
},
"nl":{
Expand Down
1 change: 1 addition & 0 deletions TTS/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from ._version import __version__
1 change: 1 addition & 0 deletions TTS/_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "0.0.14"
Loading

0 comments on commit 7030dff

Please sign in to comment.