Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(preprocessing): better performance #12

Merged
merged 11 commits into from
Mar 19, 2023
Merged
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -141,12 +141,15 @@ cython_debug/

# additional files
tests/**/*.wav
!tests/dataset_raw/**/*.wav
!tests/dataset_raw/34j/*.wav
tests/**/*.npy
tests/**/*.pt
tests/**/*.txt
tests/**/*.json
tests/**/*.pth
tests/**/*.download
tests/**/*.lab
tests/**/*.pdf
tests/**/*.csv
*.tfevents.*
*.pt
25 changes: 23 additions & 2 deletions src/so_vits_svc_fork/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,14 +437,35 @@ def pre_config(
help="path to config",
default=Path("./configs/44k/config.json"),
)
def pre_hubert(input_dir: Path, config_path: Path) -> None:
@click.option(
"-n",
"--n_jobs",
type=int,
default=4,
help="number of jobs (optimal value may depend on your VRAM capacity and audio duration per file)",
)
@click.option(
"-f",
"--force_rebuild",
type=bool,
default=True,
help="force rebuild existing preprocessed files",
)
def pre_hubert(
input_dir: Path, config_path: Path, n_jobs: bool, force_rebuild: bool
) -> None:
"""Preprocessing part 3: hubert
If the HuBERT model is not found, it will be downloaded automatically."""
from .preprocess_hubert_f0 import preprocess_hubert_f0

input_dir = Path(input_dir)
config_path = Path(config_path)
preprocess_hubert_f0(input_dir=input_dir, config_path=config_path)
preprocess_hubert_f0(
input_dir=input_dir,
config_path=config_path,
n_jobs=n_jobs,
force_rebuild=force_rebuild,
)


@cli.command
Expand Down
2 changes: 1 addition & 1 deletion src/so_vits_svc_fork/preprocess_flist_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def preprocess_config(
shuffle(paths)
if len(paths) <= 4:
raise ValueError(
f"too few files in {input_dir / speaker} (expected at least 4)."
f"too few files in {input_dir / speaker} (expected at least 5)."
)
train += paths[2:-2]
val += paths[:2]
Expand Down
89 changes: 59 additions & 30 deletions src/so_vits_svc_fork/preprocess_hubert_f0.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,41 +17,70 @@
LOG = getLogger(__name__)


def preprocess_hubert_f0(input_dir: Path | str, config_path: Path | str):
def _process_one(
filepath: Path,
hubert_model,
sampling_rate: int,
hop_length: int,
device: Literal["cuda", "cpu"] = "cuda",
force_rebuild: bool = False,
):
wav, sr = librosa.load(filepath, sr=sampling_rate)
soft_path = filepath.parent / (filepath.name + ".soft.pt")
if not soft_path.exists() or force_rebuild:
wav16k = librosa.resample(
wav, orig_sr=sampling_rate, target_sr=HUBERT_SAMPLING_RATE
)
wav16k = torch.from_numpy(wav16k).to(device)
c = utils.get_hubert_content(hubert_model, wav_16k_tensor=wav16k)
torch.save(c.cpu(), soft_path)
else:
LOG.info(f"Skip {filepath} because {soft_path} exists.")
f0_path = filepath.parent / (filepath.name + ".f0.npy")
if not f0_path.exists() or force_rebuild:
f0 = utils.compute_f0_dio(
wav, sampling_rate=sampling_rate, hop_length=hop_length
)
np.save(f0_path, f0)
else:
LOG.info(f"Skip {filepath} because {f0_path} exists.")
torch.cuda.empty_cache()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice. Thank you for finding that.



def _process_batch(
filepaths: Iterable[Path],
sampling_rate: int,
hop_length: int,
pbar_position: int,
force_rebuild: bool = False,
):
device = "cuda" if torch.cuda.is_available() else "cpu"
hubert_model = utils.get_hubert_model().to(device)

for filepath in tqdm(filepaths, position=pbar_position):
_process_one(filepath, hubert_model, sampling_rate, hop_length, device)


def preprocess_hubert_f0(
input_dir: Path | str,
config_path: Path | str,
n_jobs: int = 4,
force_rebuild: bool = False,
):
input_dir = Path(input_dir)
config_path = Path(config_path)
utils.get_hubert_model()
utils.ensure_hubert_model()
hps = utils.get_hparams_from_file(config_path)
sampling_rate = hps.data.sampling_rate
hop_length = hps.data.hop_length

def _process_one(filepath: Path, hmodel, device: Literal["cuda", "cpu"] = "cuda"):
wav, sr = librosa.load(filepath, sr=sampling_rate)
soft_path = filepath.parent / (filepath.name + ".soft.pt")
if not soft_path.exists():
wav16k = librosa.resample(
wav, orig_sr=sampling_rate, target_sr=HUBERT_SAMPLING_RATE
)
wav16k = torch.from_numpy(wav16k).to(device)
c = utils.get_hubert_content(hmodel, wav_16k_tensor=wav16k)
torch.save(c.cpu(), soft_path)
f0_path = filepath.parent / (filepath.name + ".f0.npy")
if not f0_path.exists():
f0 = utils.compute_f0_dio(
wav, sampling_rate=sampling_rate, hop_length=hop_length
)
np.save(f0_path, f0)

def _process_batch(filepaths: Iterable[Path]):
LOG.info("Loading hubert model...")
device = "cuda" if torch.cuda.is_available() else "cpu"
hmodel = utils.get_hubert_model().to(device)
LOG.info("Hubert model loaded.")
for filepath in tqdm(filepaths):
_process_one(filepath, hmodel, device)

filepaths = list(input_dir.glob("**/*.wav"))
n_jobs = min(cpu_count(), len(filepaths) // 32 + 1, 8)
filepaths = list(input_dir.rglob("*.wav"))
n_jobs = min(cpu_count(), len(filepaths) // 32 + 1, n_jobs)
shuffle(filepaths)
filepath_chunks = np.array_split(filepaths, n_jobs)
Parallel(n_jobs=n_jobs)(delayed(_process_batch)(chunk) for chunk in filepath_chunks)
Parallel(n_jobs=n_jobs)(
delayed(_process_batch)(
chunk, sampling_rate, hop_length, pbar_position, force_rebuild
)
for (pbar_position, chunk) in enumerate(filepath_chunks)
)
19 changes: 16 additions & 3 deletions src/so_vits_svc_fork/preprocess_resample.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
from __future__ import annotations

from logging import getLogger
from pathlib import Path

import audioread.exceptions
import librosa
import numpy as np
import soundfile
from joblib import Parallel, delayed
from tqdm_joblib import tqdm_joblib

LOG = getLogger(__name__)

# input_dir and output_dir exists.
# write code to convert input dir audio files to output dir audio files,
# without changing folder structure. Use joblib to parallelize.
Expand All @@ -27,7 +31,15 @@ def preprocess_resample(

def preprocess_one(input_path: Path, output_path: Path) -> None:
"""Preprocess one audio file."""
audio, sr = librosa.load(input_path)

try:
audio, sr = librosa.load(input_path)

# Audioread is the last backend it will attempt, so this is the exception thrown on failure
except audioread.exceptions.NoBackendError as e:
# Failure due to attempting to load a file that is not audio, so return early
LOG.warning(f"Failed to load {input_path} due to {e}")
return

# Trim silence
audio, _ = librosa.effects.trim(audio, top_db=20)
Expand All @@ -43,9 +55,10 @@ def preprocess_one(input_path: Path, output_path: Path) -> None:
soundfile.write(output_path, audio, samplerate=sampling_rate, subtype="PCM_16")

in_and_out_paths = []
for in_path in input_dir.rglob("*.wav"):
out_path = output_dir / in_path.relative_to(input_dir)
for in_path in input_dir.rglob("*.*"):
out_path = output_dir / in_path.relative_to(input_dir).with_suffix(".wav")
out_path.parent.mkdir(parents=True, exist_ok=True)
in_and_out_paths.append((in_path, out_path))

with tqdm_joblib(desc="Preprocessing", total=len(in_and_out_paths)):
Parallel(n_jobs=-1)(delayed(preprocess_one)(*args) for args in in_and_out_paths)
5 changes: 3 additions & 2 deletions src/so_vits_svc_fork/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,9 @@ def ensure_pretrained_model(folder_path: Path) -> None:
download_file(model_url, model_path, desc=f"Downloading {model_path.name}")


def ensure_hurbert_model() -> Path:
def ensure_hubert_model() -> Path:
vec_path = Path("checkpoint_best_legacy_500.pt")
vec_path.parent.mkdir(parents=True, exist_ok=True)
if not vec_path.exists():
# url = "http://obs.cstcloud.cn/share/obs/sankagenkeshi/checkpoint_best_legacy_500.pt"
# url = "https://huggingface.co/innnky/contentvec/resolve/main/checkpoint_best_legacy_500.pt"
Expand All @@ -249,7 +250,7 @@ def ensure_hurbert_model() -> Path:


def get_hubert_model():
vec_path = ensure_hurbert_model()
vec_path = ensure_hubert_model()
from fairseq import checkpoint_utils

models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
Expand Down