Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Python logging instead of print() #3

Merged
merged 6 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions TTS/api.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import tempfile
import warnings
from pathlib import Path
Expand All @@ -9,6 +10,8 @@
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer

logger = logging.getLogger(__name__)


class TTS(nn.Module):
"""TODO: Add voice conversion and Capacitron support."""
Expand Down Expand Up @@ -59,7 +62,7 @@ def __init__(
gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
"""
super().__init__()
self.manager = ModelManager(models_file=self.get_models_file_path(), progress_bar=progress_bar, verbose=False)
self.manager = ModelManager(models_file=self.get_models_file_path(), progress_bar=progress_bar)
self.config = load_config(config_path) if config_path else None
self.synthesizer = None
self.voice_converter = None
Expand Down Expand Up @@ -122,7 +125,7 @@ def get_models_file_path():

@staticmethod
def list_models():
return ModelManager(models_file=TTS.get_models_file_path(), progress_bar=False, verbose=False).list_models()
return ModelManager(models_file=TTS.get_models_file_path(), progress_bar=False).list_models()

def download_model_by_name(self, model_name: str):
model_path, config_path, model_item = self.manager.download_model(model_name)
Expand Down
4 changes: 4 additions & 0 deletions TTS/bin/compute_attention_masks.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
import importlib
import logging
import os
from argparse import RawTextHelpFormatter

Expand All @@ -13,9 +14,12 @@
from TTS.tts.models import setup_model
from TTS.tts.utils.text.characters import make_symbols, phonemes, symbols
from TTS.utils.audio import AudioProcessor
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger
from TTS.utils.io import load_checkpoint

if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

# pylint: disable=bad-option-value
parser = argparse.ArgumentParser(
description="""Extract attention masks from trained Tacotron/Tacotron2 models.
Expand Down
4 changes: 4 additions & 0 deletions TTS/bin/compute_embeddings.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import logging
import os
from argparse import RawTextHelpFormatter

Expand All @@ -10,6 +11,7 @@
from TTS.tts.datasets import load_tts_samples
from TTS.tts.utils.managers import save_file
from TTS.tts.utils.speakers import SpeakerManager
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger


def compute_embeddings(
Expand Down Expand Up @@ -100,6 +102,8 @@ def compute_embeddings(


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser(
description="""Compute embedding vectors for each audio file in a dataset and store them keyed by `{dataset_name}#{file_path}` in a .pth file\n\n"""
"""
Expand Down
4 changes: 4 additions & 0 deletions TTS/bin/compute_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import argparse
import glob
import logging
import os

import numpy as np
Expand All @@ -12,10 +13,13 @@
from TTS.config import load_config
from TTS.tts.datasets import load_tts_samples
from TTS.utils.audio import AudioProcessor
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger


def main():
"""Run preprocessing process."""
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser(description="Compute mean and variance of spectrogtram features.")
parser.add_argument("config_path", type=str, help="TTS config file path to define audio processin parameters.")
parser.add_argument("out_path", type=str, help="save path (directory and filename).")
Expand Down
4 changes: 4 additions & 0 deletions TTS/bin/eval_encoder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import logging
from argparse import RawTextHelpFormatter

import torch
Expand All @@ -7,6 +8,7 @@
from TTS.config import load_config
from TTS.tts.datasets import load_tts_samples
from TTS.tts.utils.speakers import SpeakerManager
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger


def compute_encoder_accuracy(dataset_items, encoder_manager):
Expand Down Expand Up @@ -51,6 +53,8 @@ def compute_encoder_accuracy(dataset_items, encoder_manager):


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser(
description="""Compute the accuracy of the encoder.\n\n"""
"""
Expand Down
9 changes: 6 additions & 3 deletions TTS/bin/extract_tts_spectrograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""Extract Mel spectrograms with teacher forcing."""

import argparse
import logging
import os

import numpy as np
Expand All @@ -17,11 +18,12 @@
from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.utils.audio import AudioProcessor
from TTS.utils.audio.numpy_transforms import quantize
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger

use_cuda = torch.cuda.is_available()


def setup_loader(ap, r, verbose=False):
def setup_loader(ap, r):
tokenizer, _ = TTSTokenizer.init_from_config(c)
dataset = TTSDataset(
outputs_per_step=r,
Expand All @@ -37,7 +39,6 @@ def setup_loader(ap, r, verbose=False):
phoneme_cache_path=c.phoneme_cache_path,
precompute_num_workers=0,
use_noise_augment=False,
verbose=verbose,
speaker_id_mapping=speaker_manager.name_to_id if c.use_speaker_embedding else None,
d_vector_mapping=speaker_manager.embeddings if c.use_d_vector_file else None,
)
Expand Down Expand Up @@ -257,7 +258,7 @@ def main(args): # pylint: disable=redefined-outer-name
print("\n > Model has {} parameters".format(num_params), flush=True)
# set r
r = 1 if c.model.lower() == "glow_tts" else model.decoder.r
own_loader = setup_loader(ap, r, verbose=True)
own_loader = setup_loader(ap, r)

extract_spectrograms(
own_loader,
Expand All @@ -272,6 +273,8 @@ def main(args): # pylint: disable=redefined-outer-name


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser()
parser.add_argument("--config_path", type=str, help="Path to config file for training.", required=True)
parser.add_argument("--checkpoint_path", type=str, help="Model file to be restored.", required=True)
Expand Down
4 changes: 4 additions & 0 deletions TTS/bin/find_unique_chars.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
"""Find all the unique characters in a dataset"""

import argparse
import logging
from argparse import RawTextHelpFormatter

from TTS.config import load_config
from TTS.tts.datasets import find_unique_chars, load_tts_samples
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger


def main():
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

# pylint: disable=bad-option-value
parser = argparse.ArgumentParser(
description="""Find all the unique characters or phonemes in a dataset.\n\n"""
Expand Down
4 changes: 4 additions & 0 deletions TTS/bin/find_unique_phonemes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Find all the unique characters in a dataset"""

import argparse
import logging
import multiprocessing
from argparse import RawTextHelpFormatter

Expand All @@ -9,6 +10,7 @@
from TTS.config import load_config
from TTS.tts.datasets import load_tts_samples
from TTS.tts.utils.text.phonemizers import Gruut
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger


def compute_phonemes(item):
Expand All @@ -18,6 +20,8 @@ def compute_phonemes(item):


def main():
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

# pylint: disable=W0601
global c, phonemizer
# pylint: disable=bad-option-value
Expand Down
4 changes: 4 additions & 0 deletions TTS/bin/remove_silence_using_vad.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import argparse
import glob
import logging
import multiprocessing
import os
import pathlib

import torch
from tqdm import tqdm

from TTS.utils.generic_utils import ConsoleFormatter, setup_logger
from TTS.utils.vad import get_vad_model_and_utils, remove_silence

torch.set_num_threads(1)
Expand Down Expand Up @@ -75,6 +77,8 @@ def preprocess_audios():


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser(
description="python TTS/bin/remove_silence_using_vad.py -i=VCTK-Corpus/ -o=VCTK-Corpus-removed-silence/ -g=wav48_silence_trimmed/*/*_mic1.flac --trim_just_beginning_and_end True"
)
Expand Down
33 changes: 23 additions & 10 deletions TTS/bin/synthesize.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@

import argparse
import contextlib
import logging
import sys
from argparse import RawTextHelpFormatter

# pylint: disable=redefined-outer-name, unused-argument
from pathlib import Path

from TTS.utils.generic_utils import ConsoleFormatter, setup_logger

logger = logging.getLogger(__name__)

description = """
Synthesize speech on command line.

Expand Down Expand Up @@ -142,6 +147,8 @@ def str2bool(v):


def main():
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser(
description=description.replace(" ```\n", ""),
formatter_class=RawTextHelpFormatter,
Expand Down Expand Up @@ -435,31 +442,37 @@ def main():

# query speaker ids of a multi-speaker model.
if args.list_speaker_idxs:
print(
" > Available speaker ids: (Set --speaker_idx flag to one of these values to use the multi-speaker model."
if synthesizer.tts_model.speaker_manager is None:
logger.info("Model only has a single speaker.")
return
logger.info(
"Available speaker ids: (Set --speaker_idx flag to one of these values to use the multi-speaker model."
)
print(synthesizer.tts_model.speaker_manager.name_to_id)
logger.info(synthesizer.tts_model.speaker_manager.name_to_id)
return

# query langauge ids of a multi-lingual model.
if args.list_language_idxs:
print(
" > Available language ids: (Set --language_idx flag to one of these values to use the multi-lingual model."
if synthesizer.tts_model.language_manager is None:
logger.info("Monolingual model.")
return
logger.info(
"Available language ids: (Set --language_idx flag to one of these values to use the multi-lingual model."
)
print(synthesizer.tts_model.language_manager.name_to_id)
logger.info(synthesizer.tts_model.language_manager.name_to_id)
return

# check the arguments against a multi-speaker model.
if synthesizer.tts_speakers_file and (not args.speaker_idx and not args.speaker_wav):
print(
" [!] Looks like you use a multi-speaker model. Define `--speaker_idx` to "
logger.error(
"Looks like you use a multi-speaker model. Define `--speaker_idx` to "
"select the target speaker. You can list the available speakers for this model by `--list_speaker_idxs`."
)
return

# RUN THE SYNTHESIS
if args.text:
print(" > Text: {}".format(args.text))
logger.info("Text: %s", args.text)

# kick it
if tts_path is not None:
Expand All @@ -484,8 +497,8 @@ def main():
)

# save the results
print(" > Saving output to {}".format(args.out_path))
synthesizer.save_wav(wav, args.out_path, pipe_out=pipe_out)
logger.info("Saved output to %s", args.out_path)


if __name__ == "__main__":
Expand Down
11 changes: 7 additions & 4 deletions TTS/bin/train_encoder.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import logging
import os
import sys
import time
Expand All @@ -19,6 +20,7 @@
from TTS.encoder.utils.visual import plot_embeddings
from TTS.tts.datasets import load_tts_samples
from TTS.utils.audio import AudioProcessor
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger
from TTS.utils.samplers import PerfectBatchSampler
from TTS.utils.training import check_update

Expand All @@ -31,7 +33,7 @@
print(" > Number of GPUs: ", num_gpus)


def setup_loader(ap: AudioProcessor, is_val: bool = False, verbose: bool = False):
def setup_loader(ap: AudioProcessor, is_val: bool = False):
num_utter_per_class = c.num_utter_per_class if not is_val else c.eval_num_utter_per_class
num_classes_in_batch = c.num_classes_in_batch if not is_val else c.eval_num_classes_in_batch

Expand All @@ -42,7 +44,6 @@ def setup_loader(ap: AudioProcessor, is_val: bool = False, verbose: bool = False
voice_len=c.voice_len,
num_utter_per_class=num_utter_per_class,
num_classes_in_batch=num_classes_in_batch,
verbose=verbose,
augmentation_config=c.audio_augmentation if not is_val else None,
use_torch_spec=c.model_params.get("use_torch_spec", False),
)
Expand Down Expand Up @@ -278,9 +279,9 @@ def main(args): # pylint: disable=redefined-outer-name
# pylint: disable=redefined-outer-name
meta_data_train, meta_data_eval = load_tts_samples(c.datasets, eval_split=True)

train_data_loader, train_classes, map_classid_to_classname = setup_loader(ap, is_val=False, verbose=True)
train_data_loader, train_classes, map_classid_to_classname = setup_loader(ap, is_val=False)
if c.run_eval:
eval_data_loader, _, _ = setup_loader(ap, is_val=True, verbose=True)
eval_data_loader, _, _ = setup_loader(ap, is_val=True)
else:
eval_data_loader = None

Expand Down Expand Up @@ -316,6 +317,8 @@ def main(args): # pylint: disable=redefined-outer-name


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

args, c, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger = init_training()

try:
Expand Down
4 changes: 4 additions & 0 deletions TTS/bin/train_tts.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import os
from dataclasses import dataclass, field

Expand All @@ -6,6 +7,7 @@
from TTS.config import load_config, register_config
from TTS.tts.datasets import load_tts_samples
from TTS.tts.models import setup_model
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger


@dataclass
Expand All @@ -15,6 +17,8 @@ class TrainTTSArgs(TrainerArgs):

def main():
"""Run `tts` model training directly by a `config.json` file."""
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())

# init trainer args
train_args = TrainTTSArgs()
parser = train_args.init_argparse(arg_prefix="")
Expand Down
Loading
Loading