Skip to content

Commit

Permalink
Merge pull request coqui-ai#3 from idiap/logging
Browse files Browse the repository at this point in the history
Use Python logging instead of print()
  • Loading branch information
eginhard authored Apr 11, 2024
2 parents e626a29 + e689fd1 commit dfbe016
Show file tree
Hide file tree
Showing 86 changed files with 711 additions and 476 deletions.
7 changes: 5 additions & 2 deletions TTS/api.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import tempfile
import warnings
from pathlib import Path
Expand All @@ -9,6 +10,8 @@
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer

logger = logging.getLogger(__name__)


class TTS(nn.Module):
"""TODO: Add voice conversion and Capacitron support."""
Expand Down Expand Up @@ -59,7 +62,7 @@ def __init__(
gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
"""
super().__init__()
self.manager = ModelManager(models_file=self.get_models_file_path(), progress_bar=progress_bar, verbose=False)
self.manager = ModelManager(models_file=self.get_models_file_path(), progress_bar=progress_bar)
self.config = load_config(config_path) if config_path else None
self.synthesizer = None
self.voice_converter = None
Expand Down Expand Up @@ -122,7 +125,7 @@ def get_models_file_path():

@staticmethod
def list_models():
return ModelManager(models_file=TTS.get_models_file_path(), progress_bar=False, verbose=False).list_models()
return ModelManager(models_file=TTS.get_models_file_path(), progress_bar=False).list_models()

def download_model_by_name(self, model_name: str):
model_path, config_path, model_item = self.manager.download_model(model_name)
Expand Down
4 changes: 4 additions & 0 deletions TTS/bin/compute_attention_masks.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
import importlib
import logging
import os
from argparse import RawTextHelpFormatter

Expand All @@ -13,9 +14,12 @@
from TTS.tts.models import setup_model
from TTS.tts.utils.text.characters import make_symbols, phonemes, symbols
from TTS.utils.audio import AudioProcessor
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger
from TTS.utils.io import load_checkpoint

if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

# pylint: disable=bad-option-value
parser = argparse.ArgumentParser(
description="""Extract attention masks from trained Tacotron/Tacotron2 models.
Expand Down
4 changes: 4 additions & 0 deletions TTS/bin/compute_embeddings.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import logging
import os
from argparse import RawTextHelpFormatter

Expand All @@ -10,6 +11,7 @@
from TTS.tts.datasets import load_tts_samples
from TTS.tts.utils.managers import save_file
from TTS.tts.utils.speakers import SpeakerManager
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger


def compute_embeddings(
Expand Down Expand Up @@ -100,6 +102,8 @@ def compute_embeddings(


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser(
description="""Compute embedding vectors for each audio file in a dataset and store them keyed by `{dataset_name}#{file_path}` in a .pth file\n\n"""
"""
Expand Down
4 changes: 4 additions & 0 deletions TTS/bin/compute_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import argparse
import glob
import logging
import os

import numpy as np
Expand All @@ -12,10 +13,13 @@
from TTS.config import load_config
from TTS.tts.datasets import load_tts_samples
from TTS.utils.audio import AudioProcessor
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger


def main():
"""Run preprocessing process."""
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser(description="Compute mean and variance of spectrogtram features.")
parser.add_argument("config_path", type=str, help="TTS config file path to define audio processin parameters.")
parser.add_argument("out_path", type=str, help="save path (directory and filename).")
Expand Down
4 changes: 4 additions & 0 deletions TTS/bin/eval_encoder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import logging
from argparse import RawTextHelpFormatter

import torch
Expand All @@ -7,6 +8,7 @@
from TTS.config import load_config
from TTS.tts.datasets import load_tts_samples
from TTS.tts.utils.speakers import SpeakerManager
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger


def compute_encoder_accuracy(dataset_items, encoder_manager):
Expand Down Expand Up @@ -51,6 +53,8 @@ def compute_encoder_accuracy(dataset_items, encoder_manager):


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser(
description="""Compute the accuracy of the encoder.\n\n"""
"""
Expand Down
9 changes: 6 additions & 3 deletions TTS/bin/extract_tts_spectrograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""Extract Mel spectrograms with teacher forcing."""

import argparse
import logging
import os

import numpy as np
Expand All @@ -17,11 +18,12 @@
from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.utils.audio import AudioProcessor
from TTS.utils.audio.numpy_transforms import quantize
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger

use_cuda = torch.cuda.is_available()


def setup_loader(ap, r, verbose=False):
def setup_loader(ap, r):
tokenizer, _ = TTSTokenizer.init_from_config(c)
dataset = TTSDataset(
outputs_per_step=r,
Expand All @@ -37,7 +39,6 @@ def setup_loader(ap, r, verbose=False):
phoneme_cache_path=c.phoneme_cache_path,
precompute_num_workers=0,
use_noise_augment=False,
verbose=verbose,
speaker_id_mapping=speaker_manager.name_to_id if c.use_speaker_embedding else None,
d_vector_mapping=speaker_manager.embeddings if c.use_d_vector_file else None,
)
Expand Down Expand Up @@ -257,7 +258,7 @@ def main(args): # pylint: disable=redefined-outer-name
print("\n > Model has {} parameters".format(num_params), flush=True)
# set r
r = 1 if c.model.lower() == "glow_tts" else model.decoder.r
own_loader = setup_loader(ap, r, verbose=True)
own_loader = setup_loader(ap, r)

extract_spectrograms(
own_loader,
Expand All @@ -272,6 +273,8 @@ def main(args): # pylint: disable=redefined-outer-name


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser()
parser.add_argument("--config_path", type=str, help="Path to config file for training.", required=True)
parser.add_argument("--checkpoint_path", type=str, help="Model file to be restored.", required=True)
Expand Down
4 changes: 4 additions & 0 deletions TTS/bin/find_unique_chars.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
"""Find all the unique characters in a dataset"""

import argparse
import logging
from argparse import RawTextHelpFormatter

from TTS.config import load_config
from TTS.tts.datasets import find_unique_chars, load_tts_samples
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger


def main():
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

# pylint: disable=bad-option-value
parser = argparse.ArgumentParser(
description="""Find all the unique characters or phonemes in a dataset.\n\n"""
Expand Down
4 changes: 4 additions & 0 deletions TTS/bin/find_unique_phonemes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Find all the unique characters in a dataset"""

import argparse
import logging
import multiprocessing
from argparse import RawTextHelpFormatter

Expand All @@ -9,6 +10,7 @@
from TTS.config import load_config
from TTS.tts.datasets import load_tts_samples
from TTS.tts.utils.text.phonemizers import Gruut
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger


def compute_phonemes(item):
Expand All @@ -18,6 +20,8 @@ def compute_phonemes(item):


def main():
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

# pylint: disable=W0601
global c, phonemizer
# pylint: disable=bad-option-value
Expand Down
4 changes: 4 additions & 0 deletions TTS/bin/remove_silence_using_vad.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import argparse
import glob
import logging
import multiprocessing
import os
import pathlib

import torch
from tqdm import tqdm

from TTS.utils.generic_utils import ConsoleFormatter, setup_logger
from TTS.utils.vad import get_vad_model_and_utils, remove_silence

torch.set_num_threads(1)
Expand Down Expand Up @@ -75,6 +77,8 @@ def preprocess_audios():


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser(
description="python TTS/bin/remove_silence_using_vad.py -i=VCTK-Corpus/ -o=VCTK-Corpus-removed-silence/ -g=wav48_silence_trimmed/*/*_mic1.flac --trim_just_beginning_and_end True"
)
Expand Down
33 changes: 23 additions & 10 deletions TTS/bin/synthesize.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@

import argparse
import contextlib
import logging
import sys
from argparse import RawTextHelpFormatter

# pylint: disable=redefined-outer-name, unused-argument
from pathlib import Path

from TTS.utils.generic_utils import ConsoleFormatter, setup_logger

logger = logging.getLogger(__name__)

description = """
Synthesize speech on command line.
Expand Down Expand Up @@ -142,6 +147,8 @@ def str2bool(v):


def main():
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser(
description=description.replace(" ```\n", ""),
formatter_class=RawTextHelpFormatter,
Expand Down Expand Up @@ -435,31 +442,37 @@ def main():

# query speaker ids of a multi-speaker model.
if args.list_speaker_idxs:
print(
" > Available speaker ids: (Set --speaker_idx flag to one of these values to use the multi-speaker model."
if synthesizer.tts_model.speaker_manager is None:
logger.info("Model only has a single speaker.")
return
logger.info(
"Available speaker ids: (Set --speaker_idx flag to one of these values to use the multi-speaker model."
)
print(synthesizer.tts_model.speaker_manager.name_to_id)
logger.info(synthesizer.tts_model.speaker_manager.name_to_id)
return

# query langauge ids of a multi-lingual model.
if args.list_language_idxs:
print(
" > Available language ids: (Set --language_idx flag to one of these values to use the multi-lingual model."
if synthesizer.tts_model.language_manager is None:
logger.info("Monolingual model.")
return
logger.info(
"Available language ids: (Set --language_idx flag to one of these values to use the multi-lingual model."
)
print(synthesizer.tts_model.language_manager.name_to_id)
logger.info(synthesizer.tts_model.language_manager.name_to_id)
return

# check the arguments against a multi-speaker model.
if synthesizer.tts_speakers_file and (not args.speaker_idx and not args.speaker_wav):
print(
" [!] Looks like you use a multi-speaker model. Define `--speaker_idx` to "
logger.error(
"Looks like you use a multi-speaker model. Define `--speaker_idx` to "
"select the target speaker. You can list the available speakers for this model by `--list_speaker_idxs`."
)
return

# RUN THE SYNTHESIS
if args.text:
print(" > Text: {}".format(args.text))
logger.info("Text: %s", args.text)

# kick it
if tts_path is not None:
Expand All @@ -484,8 +497,8 @@ def main():
)

# save the results
print(" > Saving output to {}".format(args.out_path))
synthesizer.save_wav(wav, args.out_path, pipe_out=pipe_out)
logger.info("Saved output to %s", args.out_path)


if __name__ == "__main__":
Expand Down
11 changes: 7 additions & 4 deletions TTS/bin/train_encoder.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import logging
import os
import sys
import time
Expand All @@ -19,6 +20,7 @@
from TTS.encoder.utils.visual import plot_embeddings
from TTS.tts.datasets import load_tts_samples
from TTS.utils.audio import AudioProcessor
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger
from TTS.utils.samplers import PerfectBatchSampler
from TTS.utils.training import check_update

Expand All @@ -31,7 +33,7 @@
print(" > Number of GPUs: ", num_gpus)


def setup_loader(ap: AudioProcessor, is_val: bool = False, verbose: bool = False):
def setup_loader(ap: AudioProcessor, is_val: bool = False):
num_utter_per_class = c.num_utter_per_class if not is_val else c.eval_num_utter_per_class
num_classes_in_batch = c.num_classes_in_batch if not is_val else c.eval_num_classes_in_batch

Expand All @@ -42,7 +44,6 @@ def setup_loader(ap: AudioProcessor, is_val: bool = False, verbose: bool = False
voice_len=c.voice_len,
num_utter_per_class=num_utter_per_class,
num_classes_in_batch=num_classes_in_batch,
verbose=verbose,
augmentation_config=c.audio_augmentation if not is_val else None,
use_torch_spec=c.model_params.get("use_torch_spec", False),
)
Expand Down Expand Up @@ -278,9 +279,9 @@ def main(args): # pylint: disable=redefined-outer-name
# pylint: disable=redefined-outer-name
meta_data_train, meta_data_eval = load_tts_samples(c.datasets, eval_split=True)

train_data_loader, train_classes, map_classid_to_classname = setup_loader(ap, is_val=False, verbose=True)
train_data_loader, train_classes, map_classid_to_classname = setup_loader(ap, is_val=False)
if c.run_eval:
eval_data_loader, _, _ = setup_loader(ap, is_val=True, verbose=True)
eval_data_loader, _, _ = setup_loader(ap, is_val=True)
else:
eval_data_loader = None

Expand Down Expand Up @@ -316,6 +317,8 @@ def main(args): # pylint: disable=redefined-outer-name


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

args, c, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger = init_training()

try:
Expand Down
4 changes: 4 additions & 0 deletions TTS/bin/train_tts.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import os
from dataclasses import dataclass, field

Expand All @@ -6,6 +7,7 @@
from TTS.config import load_config, register_config
from TTS.tts.datasets import load_tts_samples
from TTS.tts.models import setup_model
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger


@dataclass
Expand All @@ -15,6 +17,8 @@ class TrainTTSArgs(TrainerArgs):

def main():
"""Run `tts` model training directly by a `config.json` file."""
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

# init trainer args
train_args = TrainTTSArgs()
parser = train_args.init_argparse(arg_prefix="")
Expand Down
Loading

0 comments on commit dfbe016

Please sign in to comment.