From 668c8e1f18cefb0ebd2fb2f1d6572ce4d37d1102 Mon Sep 17 00:00:00 2001 From: 34j <55338215+34j@users.noreply.github.com> Date: Sun, 19 Mar 2023 13:17:27 +0900 Subject: [PATCH] perf(preprocessing): better performance (#12) Co-authored-by: gconway --- .gitignore | 5 +- src/so_vits_svc_fork/__main__.py | 25 +++++- .../preprocess_flist_config.py | 2 +- src/so_vits_svc_fork/preprocess_hubert_f0.py | 89 ++++++++++++------- src/so_vits_svc_fork/preprocess_resample.py | 19 +++- src/so_vits_svc_fork/utils.py | 5 +- 6 files changed, 106 insertions(+), 39 deletions(-) diff --git a/.gitignore b/.gitignore index f19c12fb..1d6c2d24 100644 --- a/.gitignore +++ b/.gitignore @@ -141,12 +141,15 @@ cython_debug/ # additional files tests/**/*.wav -!tests/dataset_raw/**/*.wav +!tests/dataset_raw/34j/*.wav tests/**/*.npy tests/**/*.pt tests/**/*.txt tests/**/*.json tests/**/*.pth tests/**/*.download +tests/**/*.lab +tests/**/*.pdf +tests/**/*.csv *.tfevents.* *.pt diff --git a/src/so_vits_svc_fork/__main__.py b/src/so_vits_svc_fork/__main__.py index d3bda208..57b14e2e 100644 --- a/src/so_vits_svc_fork/__main__.py +++ b/src/so_vits_svc_fork/__main__.py @@ -437,14 +437,35 @@ def pre_config( help="path to config", default=Path("./configs/44k/config.json"), ) -def pre_hubert(input_dir: Path, config_path: Path) -> None: +@click.option( + "-n", + "--n_jobs", + type=int, + default=4, + help="number of jobs (optimal value may depend on your VRAM capacity and audio duration per file)", +) +@click.option( + "-f", + "--force_rebuild", + type=bool, + default=True, + help="force rebuild existing preprocessed files", +) +def pre_hubert( + input_dir: Path, config_path: Path, n_jobs: bool, force_rebuild: bool +) -> None: """Preprocessing part 3: hubert If the HuBERT model is not found, it will be downloaded automatically.""" from .preprocess_hubert_f0 import preprocess_hubert_f0 input_dir = Path(input_dir) config_path = Path(config_path) - preprocess_hubert_f0(input_dir=input_dir, config_path=config_path) + preprocess_hubert_f0( + input_dir=input_dir, + config_path=config_path, + n_jobs=n_jobs, + force_rebuild=force_rebuild, + ) @cli.command diff --git a/src/so_vits_svc_fork/preprocess_flist_config.py b/src/so_vits_svc_fork/preprocess_flist_config.py index edec1a0f..b7d79596 100644 --- a/src/so_vits_svc_fork/preprocess_flist_config.py +++ b/src/so_vits_svc_fork/preprocess_flist_config.py @@ -55,7 +55,7 @@ def preprocess_config( shuffle(paths) if len(paths) <= 4: raise ValueError( - f"too few files in {input_dir / speaker} (expected at least 4)." + f"too few files in {input_dir / speaker} (expected at least 5)." ) train += paths[2:-2] val += paths[:2] diff --git a/src/so_vits_svc_fork/preprocess_hubert_f0.py b/src/so_vits_svc_fork/preprocess_hubert_f0.py index 304cb3ff..870d96fb 100644 --- a/src/so_vits_svc_fork/preprocess_hubert_f0.py +++ b/src/so_vits_svc_fork/preprocess_hubert_f0.py @@ -17,41 +17,70 @@ LOG = getLogger(__name__) -def preprocess_hubert_f0(input_dir: Path | str, config_path: Path | str): +def _process_one( + filepath: Path, + hubert_model, + sampling_rate: int, + hop_length: int, + device: Literal["cuda", "cpu"] = "cuda", + force_rebuild: bool = False, +): + wav, sr = librosa.load(filepath, sr=sampling_rate) + soft_path = filepath.parent / (filepath.name + ".soft.pt") + if not soft_path.exists() or force_rebuild: + wav16k = librosa.resample( + wav, orig_sr=sampling_rate, target_sr=HUBERT_SAMPLING_RATE + ) + wav16k = torch.from_numpy(wav16k).to(device) + c = utils.get_hubert_content(hubert_model, wav_16k_tensor=wav16k) + torch.save(c.cpu(), soft_path) + else: + LOG.info(f"Skip {filepath} because {soft_path} exists.") + f0_path = filepath.parent / (filepath.name + ".f0.npy") + if not f0_path.exists() or force_rebuild: + f0 = utils.compute_f0_dio( + wav, sampling_rate=sampling_rate, hop_length=hop_length + ) + np.save(f0_path, f0) + else: + LOG.info(f"Skip {filepath} because {f0_path} exists.") + torch.cuda.empty_cache() + + +def _process_batch( + filepaths: Iterable[Path], + sampling_rate: int, + hop_length: int, + pbar_position: int, + force_rebuild: bool = False, +): + device = "cuda" if torch.cuda.is_available() else "cpu" + hubert_model = utils.get_hubert_model().to(device) + + for filepath in tqdm(filepaths, position=pbar_position): + _process_one(filepath, hubert_model, sampling_rate, hop_length, device) + + +def preprocess_hubert_f0( + input_dir: Path | str, + config_path: Path | str, + n_jobs: int = 4, + force_rebuild: bool = False, +): input_dir = Path(input_dir) config_path = Path(config_path) - utils.get_hubert_model() + utils.ensure_hubert_model() hps = utils.get_hparams_from_file(config_path) sampling_rate = hps.data.sampling_rate hop_length = hps.data.hop_length - def _process_one(filepath: Path, hmodel, device: Literal["cuda", "cpu"] = "cuda"): - wav, sr = librosa.load(filepath, sr=sampling_rate) - soft_path = filepath.parent / (filepath.name + ".soft.pt") - if not soft_path.exists(): - wav16k = librosa.resample( - wav, orig_sr=sampling_rate, target_sr=HUBERT_SAMPLING_RATE - ) - wav16k = torch.from_numpy(wav16k).to(device) - c = utils.get_hubert_content(hmodel, wav_16k_tensor=wav16k) - torch.save(c.cpu(), soft_path) - f0_path = filepath.parent / (filepath.name + ".f0.npy") - if not f0_path.exists(): - f0 = utils.compute_f0_dio( - wav, sampling_rate=sampling_rate, hop_length=hop_length - ) - np.save(f0_path, f0) - - def _process_batch(filepaths: Iterable[Path]): - LOG.info("Loading hubert model...") - device = "cuda" if torch.cuda.is_available() else "cpu" - hmodel = utils.get_hubert_model().to(device) - LOG.info("Hubert model loaded.") - for filepath in tqdm(filepaths): - _process_one(filepath, hmodel, device) - - filepaths = list(input_dir.glob("**/*.wav")) - n_jobs = min(cpu_count(), len(filepaths) // 32 + 1, 8) + filepaths = list(input_dir.rglob("*.wav")) + n_jobs = min(cpu_count(), len(filepaths) // 32 + 1, n_jobs) shuffle(filepaths) filepath_chunks = np.array_split(filepaths, n_jobs) - Parallel(n_jobs=n_jobs)(delayed(_process_batch)(chunk) for chunk in filepath_chunks) + Parallel(n_jobs=n_jobs)( + delayed(_process_batch)( + chunk, sampling_rate, hop_length, pbar_position, force_rebuild + ) + for (pbar_position, chunk) in enumerate(filepath_chunks) + ) diff --git a/src/so_vits_svc_fork/preprocess_resample.py b/src/so_vits_svc_fork/preprocess_resample.py index c3ad6163..24a1082a 100644 --- a/src/so_vits_svc_fork/preprocess_resample.py +++ b/src/so_vits_svc_fork/preprocess_resample.py @@ -1,13 +1,17 @@ from __future__ import annotations +from logging import getLogger from pathlib import Path +import audioread.exceptions import librosa import numpy as np import soundfile from joblib import Parallel, delayed from tqdm_joblib import tqdm_joblib +LOG = getLogger(__name__) + # input_dir and output_dir exists. # write code to convert input dir audio files to output dir audio files, # without changing folder structure. Use joblib to parallelize. @@ -27,7 +31,15 @@ def preprocess_resample( def preprocess_one(input_path: Path, output_path: Path) -> None: """Preprocess one audio file.""" - audio, sr = librosa.load(input_path) + + try: + audio, sr = librosa.load(input_path) + + # Audioread is the last backend it will attempt, so this is the exception thrown on failure + except audioread.exceptions.NoBackendError as e: + # Failure due to attempting to load a file that is not audio, so return early + LOG.warning(f"Failed to load {input_path} due to {e}") + return # Trim silence audio, _ = librosa.effects.trim(audio, top_db=20) @@ -43,9 +55,10 @@ def preprocess_one(input_path: Path, output_path: Path) -> None: soundfile.write(output_path, audio, samplerate=sampling_rate, subtype="PCM_16") in_and_out_paths = [] - for in_path in input_dir.rglob("*.wav"): - out_path = output_dir / in_path.relative_to(input_dir) + for in_path in input_dir.rglob("*.*"): + out_path = output_dir / in_path.relative_to(input_dir).with_suffix(".wav") out_path.parent.mkdir(parents=True, exist_ok=True) in_and_out_paths.append((in_path, out_path)) + with tqdm_joblib(desc="Preprocessing", total=len(in_and_out_paths)): Parallel(n_jobs=-1)(delayed(preprocess_one)(*args) for args in in_and_out_paths) diff --git a/src/so_vits_svc_fork/utils.py b/src/so_vits_svc_fork/utils.py index 361c4c49..d91864eb 100644 --- a/src/so_vits_svc_fork/utils.py +++ b/src/so_vits_svc_fork/utils.py @@ -238,8 +238,9 @@ def ensure_pretrained_model(folder_path: Path) -> None: download_file(model_url, model_path, desc=f"Downloading {model_path.name}") -def ensure_hurbert_model() -> Path: +def ensure_hubert_model() -> Path: vec_path = Path("checkpoint_best_legacy_500.pt") + vec_path.parent.mkdir(parents=True, exist_ok=True) if not vec_path.exists(): # url = "http://obs.cstcloud.cn/share/obs/sankagenkeshi/checkpoint_best_legacy_500.pt" # url = "https://huggingface.co/innnky/contentvec/resolve/main/checkpoint_best_legacy_500.pt" @@ -249,7 +250,7 @@ def ensure_hurbert_model() -> Path: def get_hubert_model(): - vec_path = ensure_hurbert_model() + vec_path = ensure_hubert_model() from fairseq import checkpoint_utils models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(