diff --git a/.cardboardlint.yml b/.cardboardlint.yml index 4a115a37cd..fafb88dcbe 100644 --- a/.cardboardlint.yml +++ b/.cardboardlint.yml @@ -2,4 +2,4 @@ linters: - pylint: # pylintrc: pylintrc filefilter: ['- test_*.py', '+ *.py', '- *.npy'] - # exclude: \ No newline at end of file + # exclude: diff --git a/.dockerignore b/.dockerignore index 8d8ad918c9..5b28aa99dc 100644 --- a/.dockerignore +++ b/.dockerignore @@ -6,4 +6,4 @@ TTS.egg-info/ tests/outputs/* tests/train_outputs/* __pycache__/ -*.pyc \ No newline at end of file +*.pyc diff --git a/.github/stale.yml b/.github/stale.yml index e05eaf0b57..dd45bf098f 100644 --- a/.github/stale.yml +++ b/.github/stale.yml @@ -15,4 +15,3 @@ markComment: > for your contributions. You might also look our discussion channels. # Comment to post when closing a stale issue. Set to `false` to disable closeComment: false - diff --git a/.github/workflows/style_check.yml b/.github/workflows/style_check.yml index c167f7ca44..287fa6bd88 100644 --- a/.github/workflows/style_check.yml +++ b/.github/workflows/style_check.yml @@ -44,4 +44,4 @@ jobs: python3 setup.py egg_info # - name: Lint check # run: | - # make lint \ No newline at end of file + # make lint diff --git a/CITATION.cff b/CITATION.cff index 6b0c8f19af..deeac09799 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -17,4 +17,4 @@ keywords: - deep learning - artificial intelligence - text to speech - - TTS \ No newline at end of file + - TTS diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index b80639d63c..9c83ebcf12 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -119,11 +119,11 @@ This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0]. -Community Impact Guidelines were inspired by +Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. For answers to common questions about this code of conduct, see the FAQ at -[https://www.contributor-covenant.org/faq][FAQ]. Translations are available +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at [https://www.contributor-covenant.org/translations][translations]. [homepage]: https://www.contributor-covenant.org diff --git a/LICENSE.txt b/LICENSE.txt index 14e2f777f6..a612ad9813 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -35,7 +35,7 @@ Mozilla Public License Version 2.0 means any form of the work other than Source Code Form. 1.7. "Larger Work" - means a work that combines Covered Software with other material, in + means a work that combines Covered Software with other material, in a separate file or files, that is not Covered Software. 1.8. "License" diff --git a/TTS/.models.json b/TTS/.models.json index 1eaaab713c..f8b458e979 100644 --- a/TTS/.models.json +++ b/TTS/.models.json @@ -894,4 +894,4 @@ } } } -} \ No newline at end of file +} diff --git a/TTS/server/templates/details.html b/TTS/server/templates/details.html index 51c9ed85a8..85ff959591 100644 --- a/TTS/server/templates/details.html +++ b/TTS/server/templates/details.html @@ -128,4 +128,4 @@ - \ No newline at end of file + diff --git a/TTS/server/templates/index.html b/TTS/server/templates/index.html index 6354d3919d..0becca97b5 100644 --- a/TTS/server/templates/index.html +++ b/TTS/server/templates/index.html @@ -151,4 +151,4 @@ - \ No newline at end of file + diff --git a/TTS/tts/utils/assets/tortoise/tokenizer.json b/TTS/tts/utils/assets/tortoise/tokenizer.json index a128f27305..c2fb44a729 100644 --- a/TTS/tts/utils/assets/tortoise/tokenizer.json +++ b/TTS/tts/utils/assets/tortoise/tokenizer.json @@ -1 +1 @@ -{"version":"1.0","truncation":null,"padding":null,"added_tokens":[{"id":0,"special":true,"content":"[STOP]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":1,"special":true,"content":"[UNK]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":2,"special":true,"content":"[SPACE]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false}],"normalizer":null,"pre_tokenizer":{"type":"Whitespace"},"post_processor":null,"decoder":null,"model":{"type":"BPE","dropout":null,"unk_token":"[UNK]","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"vocab":{"[STOP]":0,"[UNK]":1,"[SPACE]":2,"!":3,"'":4,"(":5,")":6,",":7,"-":8,".":9,"/":10,":":11,";":12,"?":13,"a":14,"b":15,"c":16,"d":17,"e":18,"f":19,"g":20,"h":21,"i":22,"j":23,"k":24,"l":25,"m":26,"n":27,"o":28,"p":29,"q":30,"r":31,"s":32,"t":33,"u":34,"v":35,"w":36,"x":37,"y":38,"z":39,"th":40,"in":41,"the":42,"an":43,"er":44,"ou":45,"re":46,"on":47,"at":48,"ed":49,"en":50,"to":51,"ing":52,"and":53,"is":54,"as":55,"al":56,"or":57,"of":58,"ar":59,"it":60,"es":61,"he":62,"st":63,"le":64,"om":65,"se":66,"be":67,"ad":68,"ow":69,"ly":70,"ch":71,"wh":72,"that":73,"you":74,"li":75,"ve":76,"ac":77,"ti":78,"ld":79,"me":80,"was":81,"gh":82,"id":83,"ll":84,"wi":85,"ent":86,"for":87,"ay":88,"ro":89,"ver":90,"ic":91,"her":92,"ke":93,"his":94,"no":95,"ut":96,"un":97,"ir":98,"lo":99,"we":100,"ri":101,"ha":102,"with":103,"ght":104,"out":105,"im":106,"ion":107,"all":108,"ab":109,"one":110,"ne":111,"ge":112,"ould":113,"ter":114,"mo":115,"had":116,"ce":117,"she":118,"go":119,"sh":120,"ur":121,"am":122,"so":123,"pe":124,"my":125,"de":126,"are":127,"but":128,"ome":129,"fr":130,"ther":131,"fe":132,"su":133,"do":134,"con":135,"te":136,"ain":137,"ere":138,"po":139,"if":140,"they":141,"us":142,"ag":143,"tr":144,"now":145,"oun":146,"this":147,"have":148,"not":149,"sa":150,"il":151,"up":152,"thing":153,"from":154,"ap":155,"him":156,"ack":157,"ation":158,"ant":159,"our":160,"op":161,"like":162,"ust":163,"ess":164,"bo":165,"ok":166,"ul":167,"ind":168,"ex":169,"com":170,"some":171,"there":172,"ers":173,"co":174,"res":175,"man":176,"ard":177,"pl":178,"wor":179,"way":180,"tion":181,"fo":182,"ca":183,"were":184,"by":185,"ate":186,"pro":187,"ted":188,"ound":189,"own":190,"would":191,"ts":192,"what":193,"qu":194,"ally":195,"ight":196,"ck":197,"gr":198,"when":199,"ven":200,"can":201,"ough":202,"ine":203,"end":204,"per":205,"ous":206,"od":207,"ide":208,"know":209,"ty":210,"very":211,"si":212,"ak":213,"who":214,"about":215,"ill":216,"them":217,"est":218,"red":219,"ye":220,"could":221,"ong":222,"your":223,"their":224,"em":225,"just":226,"other":227,"into":228,"any":229,"whi":230,"um":231,"tw":232,"ast":233,"der":234,"did":235,"ie":236,"been":237,"ace":238,"ink":239,"ity":240,"back":241,"ting":242,"br":243,"more":244,"ake":245,"pp":246,"then":247,"sp":248,"el":249,"use":250,"bl":251,"said":252,"over":253,"get":254},"merges":["t h","i n","th e","a n","e r","o u","r e","o n","a t","e d","e n","t o","in g","an d","i s","a s","a l","o r","o f","a r","i t","e s","h e","s t","l e","o m","s e","b e","a d","o w","l y","c h","w h","th at","y ou","l i","v e","a c","t i","l d","m e","w as","g h","i d","l l","w i","en t","f or","a y","r o","v er","i c","h er","k e","h is","n o","u t","u n","i r","l o","w e","r i","h a","wi th","gh t","ou t","i m","i on","al l","a b","on e","n e","g e","ou ld","t er","m o","h ad","c e","s he","g o","s h","u r","a m","s o","p e","m y","d e","a re","b ut","om e","f r","the r","f e","s u","d o","c on","t e","a in","er e","p o","i f","the y","u s","a g","t r","n ow","ou n","th is","ha ve","no t","s a","i l","u p","th ing","fr om","a p","h im","ac k","at ion","an t","ou r","o p","li ke","u st","es s","b o","o k","u l","in d","e x","c om","s ome","the re","er s","c o","re s","m an","ar d","p l","w or","w ay","ti on","f o","c a","w ere","b y","at e","p ro","t ed","oun d","ow n","w ould","t s","wh at","q u","al ly","i ght","c k","g r","wh en","v en","c an","ou gh","in e","en d","p er","ou s","o d","id e","k now","t y","ver y","s i","a k","wh o","ab out","i ll","the m","es t","re d","y e","c ould","on g","you r","the ir","e m","j ust","o ther","in to","an y","wh i","u m","t w","as t","d er","d id","i e","be en","ac e","in k","it y","b ack","t ing","b r","mo re","a ke","p p","the n","s p","e l","u se","b l","sa id","o ver","ge t"]}} \ No newline at end of file +{"version":"1.0","truncation":null,"padding":null,"added_tokens":[{"id":0,"special":true,"content":"[STOP]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":1,"special":true,"content":"[UNK]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":2,"special":true,"content":"[SPACE]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false}],"normalizer":null,"pre_tokenizer":{"type":"Whitespace"},"post_processor":null,"decoder":null,"model":{"type":"BPE","dropout":null,"unk_token":"[UNK]","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"vocab":{"[STOP]":0,"[UNK]":1,"[SPACE]":2,"!":3,"'":4,"(":5,")":6,",":7,"-":8,".":9,"/":10,":":11,";":12,"?":13,"a":14,"b":15,"c":16,"d":17,"e":18,"f":19,"g":20,"h":21,"i":22,"j":23,"k":24,"l":25,"m":26,"n":27,"o":28,"p":29,"q":30,"r":31,"s":32,"t":33,"u":34,"v":35,"w":36,"x":37,"y":38,"z":39,"th":40,"in":41,"the":42,"an":43,"er":44,"ou":45,"re":46,"on":47,"at":48,"ed":49,"en":50,"to":51,"ing":52,"and":53,"is":54,"as":55,"al":56,"or":57,"of":58,"ar":59,"it":60,"es":61,"he":62,"st":63,"le":64,"om":65,"se":66,"be":67,"ad":68,"ow":69,"ly":70,"ch":71,"wh":72,"that":73,"you":74,"li":75,"ve":76,"ac":77,"ti":78,"ld":79,"me":80,"was":81,"gh":82,"id":83,"ll":84,"wi":85,"ent":86,"for":87,"ay":88,"ro":89,"ver":90,"ic":91,"her":92,"ke":93,"his":94,"no":95,"ut":96,"un":97,"ir":98,"lo":99,"we":100,"ri":101,"ha":102,"with":103,"ght":104,"out":105,"im":106,"ion":107,"all":108,"ab":109,"one":110,"ne":111,"ge":112,"ould":113,"ter":114,"mo":115,"had":116,"ce":117,"she":118,"go":119,"sh":120,"ur":121,"am":122,"so":123,"pe":124,"my":125,"de":126,"are":127,"but":128,"ome":129,"fr":130,"ther":131,"fe":132,"su":133,"do":134,"con":135,"te":136,"ain":137,"ere":138,"po":139,"if":140,"they":141,"us":142,"ag":143,"tr":144,"now":145,"oun":146,"this":147,"have":148,"not":149,"sa":150,"il":151,"up":152,"thing":153,"from":154,"ap":155,"him":156,"ack":157,"ation":158,"ant":159,"our":160,"op":161,"like":162,"ust":163,"ess":164,"bo":165,"ok":166,"ul":167,"ind":168,"ex":169,"com":170,"some":171,"there":172,"ers":173,"co":174,"res":175,"man":176,"ard":177,"pl":178,"wor":179,"way":180,"tion":181,"fo":182,"ca":183,"were":184,"by":185,"ate":186,"pro":187,"ted":188,"ound":189,"own":190,"would":191,"ts":192,"what":193,"qu":194,"ally":195,"ight":196,"ck":197,"gr":198,"when":199,"ven":200,"can":201,"ough":202,"ine":203,"end":204,"per":205,"ous":206,"od":207,"ide":208,"know":209,"ty":210,"very":211,"si":212,"ak":213,"who":214,"about":215,"ill":216,"them":217,"est":218,"red":219,"ye":220,"could":221,"ong":222,"your":223,"their":224,"em":225,"just":226,"other":227,"into":228,"any":229,"whi":230,"um":231,"tw":232,"ast":233,"der":234,"did":235,"ie":236,"been":237,"ace":238,"ink":239,"ity":240,"back":241,"ting":242,"br":243,"more":244,"ake":245,"pp":246,"then":247,"sp":248,"el":249,"use":250,"bl":251,"said":252,"over":253,"get":254},"merges":["t h","i n","th e","a n","e r","o u","r e","o n","a t","e d","e n","t o","in g","an d","i s","a s","a l","o r","o f","a r","i t","e s","h e","s t","l e","o m","s e","b e","a d","o w","l y","c h","w h","th at","y ou","l i","v e","a c","t i","l d","m e","w as","g h","i d","l l","w i","en t","f or","a y","r o","v er","i c","h er","k e","h is","n o","u t","u n","i r","l o","w e","r i","h a","wi th","gh t","ou t","i m","i on","al l","a b","on e","n e","g e","ou ld","t er","m o","h ad","c e","s he","g o","s h","u r","a m","s o","p e","m y","d e","a re","b ut","om e","f r","the r","f e","s u","d o","c on","t e","a in","er e","p o","i f","the y","u s","a g","t r","n ow","ou n","th is","ha ve","no t","s a","i l","u p","th ing","fr om","a p","h im","ac k","at ion","an t","ou r","o p","li ke","u st","es s","b o","o k","u l","in d","e x","c om","s ome","the re","er s","c o","re s","m an","ar d","p l","w or","w ay","ti on","f o","c a","w ere","b y","at e","p ro","t ed","oun d","ow n","w ould","t s","wh at","q u","al ly","i ght","c k","g r","wh en","v en","c an","ou gh","in e","en d","p er","ou s","o d","id e","k now","t y","ver y","s i","a k","wh o","ab out","i ll","the m","es t","re d","y e","c ould","on g","you r","the ir","e m","j ust","o ther","in to","an y","wh i","u m","t w","as t","d er","d id","i e","be en","ac e","in k","it y","b ack","t ing","b r","mo re","a ke","p p","the n","s p","e l","u se","b l","sa id","o ver","ge t"]}} diff --git a/TTS/vc/modules/freevc/wavlm/config.json b/TTS/vc/modules/freevc/wavlm/config.json index c6f851b93d..c2e414cf0b 100644 --- a/TTS/vc/modules/freevc/wavlm/config.json +++ b/TTS/vc/modules/freevc/wavlm/config.json @@ -96,4 +96,4 @@ "transformers_version": "4.15.0.dev0", "use_weighted_layer_sum": false, "vocab_size": 32 - } \ No newline at end of file + } diff --git a/docs/requirements.txt b/docs/requirements.txt index efbefec44b..86ccae9cca 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -3,4 +3,4 @@ myst-parser == 2.0.0 sphinx == 7.2.5 sphinx_inline_tabs sphinx_copybutton -linkify-it-py \ No newline at end of file +linkify-it-py diff --git a/docs/source/conf.py b/docs/source/conf.py index b85324fd40..8d392f8a1a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -13,15 +13,15 @@ import os import sys -sys.path.insert(0, os.path.abspath('../..')) +sys.path.insert(0, os.path.abspath("../..")) # mock deps with system level requirements. autodoc_mock_imports = ["soundfile"] # -- Project information ----------------------------------------------------- -project = 'TTS' +project = "TTS" copyright = "2021 Coqui GmbH, 2020 TTS authors" -author = 'Coqui GmbH' +author = "Coqui GmbH" with open("../../TTS/VERSION", "r") as ver: version = ver.read().strip() @@ -40,32 +40,34 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.doctest', - 'sphinx.ext.intersphinx', - 'sphinx.ext.todo', - 'sphinx.ext.coverage', - 'sphinx.ext.napoleon', - 'sphinx.ext.viewcode', - 'sphinx.ext.autosectionlabel', - 'myst_parser', + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.doctest", + "sphinx.ext.intersphinx", + "sphinx.ext.todo", + "sphinx.ext.coverage", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "sphinx.ext.autosectionlabel", + "myst_parser", "sphinx_copybutton", "sphinx_inline_tabs", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'TODO/*'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "TODO/*"] source_suffix = [".rst", ".md"] -myst_enable_extensions = ['linkify',] +myst_enable_extensions = [ + "linkify", +] # 'sphinxcontrib.katex', # 'sphinx.ext.autosectionlabel', @@ -76,17 +78,17 @@ # duplicated section names that are in different documents. autosectionlabel_prefix_document = True -language = 'en' +language = "en" autodoc_inherit_docstrings = False # Disable displaying type annotations, these can be very verbose -autodoc_typehints = 'none' +autodoc_typehints = "none" # Enable overriding of function signatures in the first line of the docstring. autodoc_docstring_signature = True -napoleon_custom_sections = [('Shapes', 'shape')] +napoleon_custom_sections = [("Shapes", "shape")] # -- Options for HTML output ------------------------------------------------- @@ -94,7 +96,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'furo' +html_theme = "furo" html_tite = "TTS" html_theme_options = { "light_logo": "logo.png", @@ -103,18 +105,18 @@ } html_sidebars = { - '**': [ - "sidebar/scroll-start.html", - "sidebar/brand.html", - "sidebar/search.html", - "sidebar/navigation.html", - "sidebar/ethical-ads.html", - "sidebar/scroll-end.html", - ] - } + "**": [ + "sidebar/scroll-start.html", + "sidebar/brand.html", + "sidebar/search.html", + "sidebar/navigation.html", + "sidebar/ethical-ads.html", + "sidebar/scroll-end.html", + ] +} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] diff --git a/docs/source/configuration.md b/docs/source/configuration.md index cde7e073e9..b69fd84337 100644 --- a/docs/source/configuration.md +++ b/docs/source/configuration.md @@ -56,4 +56,4 @@ ModelConfig() In the example above, ```ModelConfig()``` is the final configuration that the model receives and it has all the fields necessary for the model. -We host pre-defined model configurations under ```TTS//configs/```.Although we recommend a unified config class, you can decompose it as you like as for your custom models as long as all the fields for the trainer, model, and inference APIs are provided. \ No newline at end of file +We host pre-defined model configurations under ```TTS//configs/```.Although we recommend a unified config class, you can decompose it as you like as for your custom models as long as all the fields for the trainer, model, and inference APIs are provided. diff --git a/docs/source/docker_images.md b/docs/source/docker_images.md index d08a55837d..8df5185505 100644 --- a/docs/source/docker_images.md +++ b/docs/source/docker_images.md @@ -53,4 +53,4 @@ python3 TTS/server/server.py --list_models #To get the list of available models python3 TTS/server/server.py --model_name tts_models/en/vctk/vits --use_cuda true ``` -Click [there](http://[::1]:5002/) and have fun with the server! \ No newline at end of file +Click [there](http://[::1]:5002/) and have fun with the server! diff --git a/docs/source/finetuning.md b/docs/source/finetuning.md index c236260d0c..6b58771a21 100644 --- a/docs/source/finetuning.md +++ b/docs/source/finetuning.md @@ -111,4 +111,3 @@ them and fine-tune it for your own dataset. This will help you in two main ways: --coqpit.run_name "glow-tts-finetune" \ --coqpit.lr 0.00001 ``` - diff --git a/docs/source/implementing_a_new_language_frontend.md b/docs/source/implementing_a_new_language_frontend.md index f4f6a04a5f..4d5348305c 100644 --- a/docs/source/implementing_a_new_language_frontend.md +++ b/docs/source/implementing_a_new_language_frontend.md @@ -9,4 +9,4 @@ from the previous step and used to convert the text to phonemes or graphemes for map the language code in the model config - `config.phoneme_language` - to the phonemizer class and initiate the phonemizer automatically. - You should also add tests to `tests/text_tests` if you want to make a PR. -We suggest you to check the available implementations as reference. Good luck! \ No newline at end of file +We suggest you to check the available implementations as reference. Good luck! diff --git a/docs/source/inference.md b/docs/source/inference.md index 3071be4f4d..b1d620dc28 100644 --- a/docs/source/inference.md +++ b/docs/source/inference.md @@ -232,4 +232,4 @@ api.tts_with_vc_to_file( speaker_wav="target/speaker.wav", file_path="ouptut.wav" ) -``` \ No newline at end of file +``` diff --git a/docs/source/installation.md b/docs/source/installation.md index c4d05361f4..4fecedcddd 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -30,4 +30,4 @@ make install ``` ## On Windows -If you are on Windows, 👑@GuyPaddock wrote installation instructions [here](https://stackoverflow.com/questions/66726331/ \ No newline at end of file +If you are on Windows, 👑@GuyPaddock wrote installation instructions [here](https://stackoverflow.com/questions/66726331/ diff --git a/docs/source/main_classes/audio_processor.md b/docs/source/main_classes/audio_processor.md index 600b0db582..98e94a8789 100644 --- a/docs/source/main_classes/audio_processor.md +++ b/docs/source/main_classes/audio_processor.md @@ -22,4 +22,4 @@ also must inherit or initiate `BaseAudioConfig`. ```{eval-rst} .. autoclass:: TTS.config.shared_configs.BaseAudioConfig :members: -``` \ No newline at end of file +``` diff --git a/docs/source/main_classes/dataset.md b/docs/source/main_classes/dataset.md index 92d381aca5..1566488194 100644 --- a/docs/source/main_classes/dataset.md +++ b/docs/source/main_classes/dataset.md @@ -22,4 +22,4 @@ ```{eval-rst} .. autoclass:: TTS.vocoder.datasets.wavernn_dataset.WaveRNNDataset :members: -``` \ No newline at end of file +``` diff --git a/docs/source/main_classes/gan.md b/docs/source/main_classes/gan.md index 4524b4b5c5..e143f6431e 100644 --- a/docs/source/main_classes/gan.md +++ b/docs/source/main_classes/gan.md @@ -9,4 +9,4 @@ to do its ✨️. ```{eval-rst} .. autoclass:: TTS.vocoder.models.gan.GAN :members: -``` \ No newline at end of file +``` diff --git a/docs/source/main_classes/model_api.md b/docs/source/main_classes/model_api.md index 0e6f2d9427..71b3d41640 100644 --- a/docs/source/main_classes/model_api.md +++ b/docs/source/main_classes/model_api.md @@ -21,4 +21,4 @@ Model API provides you a set of functions that easily make your model compatible ```{eval-rst} .. autoclass:: TTS.vocoder.models.base_vocoder.BaseVocoder :members: -``` \ No newline at end of file +``` diff --git a/docs/source/main_classes/speaker_manager.md b/docs/source/main_classes/speaker_manager.md index ba4b55dc78..fe98823956 100644 --- a/docs/source/main_classes/speaker_manager.md +++ b/docs/source/main_classes/speaker_manager.md @@ -8,4 +8,4 @@ especially useful for multi-speaker models. ```{eval-rst} .. automodule:: TTS.tts.utils.speakers :members: -``` \ No newline at end of file +``` diff --git a/docs/source/marytts.md b/docs/source/marytts.md index 81d547107d..4d016e69da 100644 --- a/docs/source/marytts.md +++ b/docs/source/marytts.md @@ -40,4 +40,4 @@ You can enter the same URLs in your browser and check-out the results there as w ### How it works and limitations A classic Mary-TTS server would usually show all installed locales and voices via the corresponding endpoints and accept the parameters `LOCALE` and `VOICE` for processing. For Coqui-TTS we usually start the server with one specific locale and model and thus cannot return all available options. Instead we return the active locale and use the model name as "voice". Since we only have one active model and always want to return a WAV-file, we currently ignore all other processing parameters except `INPUT_TEXT`. Since the gender is not defined for models in Coqui-TTS we always return `u` (undefined). -We think that this is an acceptable compromise, since users are often only interested in one specific voice anyways, but the API might get extended in the future to support multiple languages and voices at the same time. \ No newline at end of file +We think that this is an acceptable compromise, since users are often only interested in one specific voice anyways, but the API might get extended in the future to support multiple languages and voices at the same time. diff --git a/docs/source/models/forward_tts.md b/docs/source/models/forward_tts.md index 4739496770..67af222471 100644 --- a/docs/source/models/forward_tts.md +++ b/docs/source/models/forward_tts.md @@ -61,5 +61,3 @@ Currently we provide the following pre-configured architectures: .. autoclass:: TTS.tts.configs.fast_speech_config.FastSpeechConfig :members: ``` - - diff --git a/docs/source/models/overflow.md b/docs/source/models/overflow.md index 09e270eae5..042ad47474 100644 --- a/docs/source/models/overflow.md +++ b/docs/source/models/overflow.md @@ -33,4 +33,4 @@ are available at https://shivammehta25.github.io/OverFlow/. ```{eval-rst} .. autoclass:: TTS.tts.models.overflow.Overflow :members: -``` \ No newline at end of file +``` diff --git a/docs/source/models/tacotron1-2.md b/docs/source/models/tacotron1-2.md index 25721eba4c..f35cfeca4c 100644 --- a/docs/source/models/tacotron1-2.md +++ b/docs/source/models/tacotron1-2.md @@ -59,5 +59,3 @@ If you have a limited VRAM, then you can try using the Guided Attention Loss or .. autoclass:: TTS.tts.configs.tacotron2_config.Tacotron2Config :members: ``` - - diff --git a/docs/source/what_makes_a_good_dataset.md b/docs/source/what_makes_a_good_dataset.md index 18c87453f7..44a93a39da 100644 --- a/docs/source/what_makes_a_good_dataset.md +++ b/docs/source/what_makes_a_good_dataset.md @@ -17,4 +17,4 @@ If you like to use a bespoken dataset, you might like to perform a couple of qua * **CheckSpectrograms** is to measure the noise level of the clips and find good audio processing parameters. The noise level might be observed by checking spectrograms. If spectrograms look cluttered, especially in silent parts, this dataset might not be a good candidate for a TTS project. If your voice clips are too noisy in the background, it makes things harder for your model to learn the alignment, and the final result might be different than the voice you are given. If the spectrograms look good, then the next step is to find a good set of audio processing parameters, defined in ```config.json```. In the notebook, you can compare different sets of parameters and see the resynthesis results in relation to the given ground-truth. Find the best parameters that give the best possible synthesis performance. -Another practical detail is the quantization level of the clips. If your dataset has a very high bit-rate, that might cause slow data-load time and consequently slow training. It is better to reduce the sample-rate of your dataset to around 16000-22050. \ No newline at end of file +Another practical detail is the quantization level of the clips. If your dataset has a very high bit-rate, that might cause slow data-load time and consequently slow training. It is better to reduce the sample-rate of your dataset to around 16000-22050. diff --git a/hubconf.py b/hubconf.py index 0c9c5930fc..6e10928265 100644 --- a/hubconf.py +++ b/hubconf.py @@ -1,15 +1,11 @@ -dependencies = [ - 'torch', 'gdown', 'pysbd', 'gruut', 'anyascii', 'pypinyin', 'coqpit', 'mecab-python3', 'unidic-lite' -] +dependencies = ["torch", "gdown", "pysbd", "gruut", "anyascii", "pypinyin", "coqpit", "mecab-python3", "unidic-lite"] import torch from TTS.utils.manage import ModelManager from TTS.utils.synthesizer import Synthesizer -def tts(model_name='tts_models/en/ljspeech/tacotron2-DCA', - vocoder_name=None, - use_cuda=False): +def tts(model_name="tts_models/en/ljspeech/tacotron2-DCA", vocoder_name=None, use_cuda=False): """TTS entry point for PyTorch Hub that provides a Synthesizer object to synthesize speech from a give text. Example: @@ -28,19 +24,20 @@ def tts(model_name='tts_models/en/ljspeech/tacotron2-DCA', manager = ModelManager() model_path, config_path, model_item = manager.download_model(model_name) - vocoder_name = model_item[ - 'default_vocoder'] if vocoder_name is None else vocoder_name + vocoder_name = model_item["default_vocoder"] if vocoder_name is None else vocoder_name vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name) # create synthesizer - synt = Synthesizer(tts_checkpoint=model_path, - tts_config_path=config_path, - vocoder_checkpoint=vocoder_path, - vocoder_config=vocoder_config_path, - use_cuda=use_cuda) + synt = Synthesizer( + tts_checkpoint=model_path, + tts_config_path=config_path, + vocoder_checkpoint=vocoder_path, + vocoder_config=vocoder_config_path, + use_cuda=use_cuda, + ) return synt -if __name__ == '__main__': - synthesizer = torch.hub.load('coqui-ai/TTS:dev', 'tts', source='github') +if __name__ == "__main__": + synthesizer = torch.hub.load("coqui-ai/TTS:dev", "tts", source="github") synthesizer.tts("This is a test!") diff --git a/notebooks/TestAttention.ipynb b/notebooks/TestAttention.ipynb index 65edf98ca4..d85ca1035a 100644 --- a/notebooks/TestAttention.ipynb +++ b/notebooks/TestAttention.ipynb @@ -185,4 +185,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/notebooks/dataset_analysis/CheckPitch.ipynb b/notebooks/dataset_analysis/CheckPitch.ipynb index 72afbc64a1..ebdac87378 100644 --- a/notebooks/dataset_analysis/CheckPitch.ipynb +++ b/notebooks/dataset_analysis/CheckPitch.ipynb @@ -176,4 +176,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/notebooks/dataset_analysis/README.md b/notebooks/dataset_analysis/README.md index 79faf52159..9fe40d01a4 100644 --- a/notebooks/dataset_analysis/README.md +++ b/notebooks/dataset_analysis/README.md @@ -2,6 +2,6 @@ By the use of this notebook, you can easily analyze a brand new dataset, find exceptional cases and define your training set. -What we are looking in here is reasonable distribution of instances in terms of sequence-length, audio-length and word-coverage. +What we are looking in here is reasonable distribution of instances in terms of sequence-length, audio-length and word-coverage. This notebook is inspired from https://github.com/MycroftAI/mimic2 diff --git a/recipes/README.md b/recipes/README.md index 21a6727d8b..fcc4719aaa 100644 --- a/recipes/README.md +++ b/recipes/README.md @@ -19,4 +19,4 @@ python TTS/bin/resample.py --input_dir recipes/vctk/VCTK/wav48_silence_trimmed - If you train a new model using TTS, feel free to share your training to expand the list of recipes. -You can also open a new discussion and share your progress with the 🐸 community. \ No newline at end of file +You can also open a new discussion and share your progress with the 🐸 community. diff --git a/recipes/blizzard2013/README.md b/recipes/blizzard2013/README.md index 9dcb739728..75f17a5513 100644 --- a/recipes/blizzard2013/README.md +++ b/recipes/blizzard2013/README.md @@ -9,4 +9,4 @@ To get a license and download link for this dataset, you need to visit the [webs You get access to the raw dataset in a couple of days. There are a few preprocessing steps you need to do to be able to use the high fidelity dataset. 1. Get the forced time alignments for the blizzard dataset from [here](https://github.com/mueller91/tts_alignments). -2. Segment the high fidelity audio-book files based on the instructions [here](https://github.com/Tomiinek/Blizzard2013_Segmentation). \ No newline at end of file +2. Segment the high fidelity audio-book files based on the instructions [here](https://github.com/Tomiinek/Blizzard2013_Segmentation). diff --git a/recipes/kokoro/tacotron2-DDC/run.sh b/recipes/kokoro/tacotron2-DDC/run.sh index 69800cf7b4..3f18f2c3fb 100644 --- a/recipes/kokoro/tacotron2-DDC/run.sh +++ b/recipes/kokoro/tacotron2-DDC/run.sh @@ -20,4 +20,4 @@ CUDA_VISIBLE_DEVICES="0" python TTS/bin/train_tts.py --config_path $RUN_DIR/taco --coqpit.output_path $RUN_DIR \ --coqpit.datasets.0.path $RUN_DIR/$CORPUS \ --coqpit.audio.stats_path $RUN_DIR/scale_stats.npy \ - --coqpit.phoneme_cache_path $RUN_DIR/phoneme_cache \ \ No newline at end of file + --coqpit.phoneme_cache_path $RUN_DIR/phoneme_cache \ diff --git a/recipes/kokoro/tacotron2-DDC/tacotron2-DDC.json b/recipes/kokoro/tacotron2-DDC/tacotron2-DDC.json index c2e526f46c..f422203a31 100644 --- a/recipes/kokoro/tacotron2-DDC/tacotron2-DDC.json +++ b/recipes/kokoro/tacotron2-DDC/tacotron2-DDC.json @@ -122,4 +122,4 @@ "use_gst": false, "use_external_speaker_embedding_file": false, "external_speaker_embedding_file": "../../speakers-vctk-en.json" -} \ No newline at end of file +} diff --git a/recipes/ljspeech/download_ljspeech.sh b/recipes/ljspeech/download_ljspeech.sh index 9468988a99..21c3e0e2d7 100644 --- a/recipes/ljspeech/download_ljspeech.sh +++ b/recipes/ljspeech/download_ljspeech.sh @@ -11,4 +11,4 @@ shuf LJSpeech-1.1/metadata.csv > LJSpeech-1.1/metadata_shuf.csv head -n 12000 LJSpeech-1.1/metadata_shuf.csv > LJSpeech-1.1/metadata_train.csv tail -n 1100 LJSpeech-1.1/metadata_shuf.csv > LJSpeech-1.1/metadata_val.csv mv LJSpeech-1.1 $RUN_DIR/recipes/ljspeech/ -rm LJSpeech-1.1.tar.bz2 \ No newline at end of file +rm LJSpeech-1.1.tar.bz2 diff --git a/requirements.notebooks.txt b/requirements.notebooks.txt index 65d3f642c9..c55cfdea0e 100644 --- a/requirements.notebooks.txt +++ b/requirements.notebooks.txt @@ -1 +1 @@ -bokeh==1.4.0 \ No newline at end of file +bokeh==1.4.0 diff --git a/setup.py b/setup.py index df14b41adc..b01b655877 100644 --- a/setup.py +++ b/setup.py @@ -23,12 +23,12 @@ import os import subprocess import sys -from packaging.version import Version import numpy import setuptools.command.build_py import setuptools.command.develop from Cython.Build import cythonize +from packaging.version import Version from setuptools import Extension, find_packages, setup python_version = sys.version.split()[0] diff --git a/tests/bash_tests/test_compute_statistics.sh b/tests/bash_tests/test_compute_statistics.sh index d7f0ab9d4c..721777f852 100755 --- a/tests/bash_tests/test_compute_statistics.sh +++ b/tests/bash_tests/test_compute_statistics.sh @@ -4,4 +4,3 @@ BASEDIR=$(dirname "$0") echo "$BASEDIR" # run training CUDA_VISIBLE_DEVICES="" python TTS/bin/compute_statistics.py --config_path $BASEDIR/../inputs/test_glow_tts.json --out_path $BASEDIR/../outputs/scale_stats.npy - diff --git a/tests/data/dummy_speakers.json b/tests/data/dummy_speakers.json index 233533b796..507b57b5a5 100644 --- a/tests/data/dummy_speakers.json +++ b/tests/data/dummy_speakers.json @@ -100222,5 +100222,5 @@ 0.04999300092458725, -0.12125937640666962 ] - } + } } diff --git a/tests/inputs/common_voice.tsv b/tests/inputs/common_voice.tsv index 39fc4190ac..b4351d6739 100644 --- a/tests/inputs/common_voice.tsv +++ b/tests/inputs/common_voice.tsv @@ -1,6 +1,6 @@ client_id path sentence up_votes down_votes age gender accent locale segment -95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005954.mp3 The applicants are invited for coffee and visa is given immediately. 3 0 en -95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005955.mp3 Developmental robotics is related to, but differs from, evolutionary robotics. 2 0 en -95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005956.mp3 The musical was originally directed and choreographed by Alan Lund. 2 0 en -954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6 common_voice_en_19737073.mp3 He graduated from Columbia High School, in Brown County, South Dakota. 2 0 en -954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6 common_voice_en_19737074.mp3 Competition for limited resources has also resulted in some local conflicts. 2 0 en +95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005954.mp3 The applicants are invited for coffee and visa is given immediately. 3 0 en +95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005955.mp3 Developmental robotics is related to, but differs from, evolutionary robotics. 2 0 en +95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005956.mp3 The musical was originally directed and choreographed by Alan Lund. 2 0 en +954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6 common_voice_en_19737073.mp3 He graduated from Columbia High School, in Brown County, South Dakota. 2 0 en +954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6 common_voice_en_19737074.mp3 Competition for limited resources has also resulted in some local conflicts. 2 0 en diff --git a/tests/inputs/dummy_model_config.json b/tests/inputs/dummy_model_config.json index b51bb3a871..3f64c7f3df 100644 --- a/tests/inputs/dummy_model_config.json +++ b/tests/inputs/dummy_model_config.json @@ -98,5 +98,3 @@ "gst_style_tokens": 10 } } - - diff --git a/tests/inputs/language_ids.json b/tests/inputs/language_ids.json index 27bb15206f..80833d8058 100644 --- a/tests/inputs/language_ids.json +++ b/tests/inputs/language_ids.json @@ -2,4 +2,4 @@ "en": 0, "fr-fr": 1, "pt-br": 2 -} \ No newline at end of file +} diff --git a/tests/inputs/test_align_tts.json b/tests/inputs/test_align_tts.json index 3f928c7e92..80721346d5 100644 --- a/tests/inputs/test_align_tts.json +++ b/tests/inputs/test_align_tts.json @@ -155,4 +155,4 @@ "meta_file_attn_mask": null } ] -} \ No newline at end of file +} diff --git a/tests/inputs/test_speaker_encoder_config.json b/tests/inputs/test_speaker_encoder_config.json index bfcc17ab0e..ae125f1327 100644 --- a/tests/inputs/test_speaker_encoder_config.json +++ b/tests/inputs/test_speaker_encoder_config.json @@ -58,4 +58,4 @@ "storage_size": 15 // the size of the in-memory storage with respect to a single batch }, "datasets":null -} \ No newline at end of file +} diff --git a/tests/inputs/test_speedy_speech.json b/tests/inputs/test_speedy_speech.json index 4a7eea5ded..93e4790ca3 100644 --- a/tests/inputs/test_speedy_speech.json +++ b/tests/inputs/test_speedy_speech.json @@ -152,4 +152,4 @@ "meta_file_attn_mask": "tests/data/ljspeech/metadata_attn_mask.txt" } ] -} \ No newline at end of file +} diff --git a/tests/inputs/test_vocoder_audio_config.json b/tests/inputs/test_vocoder_audio_config.json index 08acc48cd3..cdf347c4eb 100644 --- a/tests/inputs/test_vocoder_audio_config.json +++ b/tests/inputs/test_vocoder_audio_config.json @@ -21,4 +21,3 @@ "do_trim_silence": false } } - diff --git a/tests/inputs/test_vocoder_multiband_melgan_config.json b/tests/inputs/test_vocoder_multiband_melgan_config.json index 82afc97727..2b6cc9e4cd 100644 --- a/tests/inputs/test_vocoder_multiband_melgan_config.json +++ b/tests/inputs/test_vocoder_multiband_melgan_config.json @@ -163,4 +163,3 @@ // PATHS "output_path": "tests/train_outputs/" } - diff --git a/tests/inputs/test_vocoder_wavegrad.json b/tests/inputs/test_vocoder_wavegrad.json index 6378c07a6d..bb06bf2448 100644 --- a/tests/inputs/test_vocoder_wavegrad.json +++ b/tests/inputs/test_vocoder_wavegrad.json @@ -113,4 +113,3 @@ // PATHS "output_path": "tests/train_outputs/" } - diff --git a/tests/inputs/test_vocoder_wavernn_config.json b/tests/inputs/test_vocoder_wavernn_config.json index ee4e5f8e42..1dd8a229f2 100644 --- a/tests/inputs/test_vocoder_wavernn_config.json +++ b/tests/inputs/test_vocoder_wavernn_config.json @@ -109,4 +109,3 @@ // PATHS "output_path": "tests/train_outputs/" } -