Skip to content

Commit

Permalink
perf(preprocessing): better performance (#12)
Browse files Browse the repository at this point in the history
Co-authored-by: gconway <gconway@g.hmc.edu>
  • Loading branch information
34j and GarrettConway authored Mar 19, 2023
1 parent 9d3910d commit 668c8e1
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 39 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -141,12 +141,15 @@ cython_debug/

# additional files
tests/**/*.wav
!tests/dataset_raw/**/*.wav
!tests/dataset_raw/34j/*.wav
tests/**/*.npy
tests/**/*.pt
tests/**/*.txt
tests/**/*.json
tests/**/*.pth
tests/**/*.download
tests/**/*.lab
tests/**/*.pdf
tests/**/*.csv
*.tfevents.*
*.pt
25 changes: 23 additions & 2 deletions src/so_vits_svc_fork/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,14 +437,35 @@ def pre_config(
help="path to config",
default=Path("./configs/44k/config.json"),
)
def pre_hubert(input_dir: Path, config_path: Path) -> None:
@click.option(
"-n",
"--n_jobs",
type=int,
default=4,
help="number of jobs (optimal value may depend on your VRAM capacity and audio duration per file)",
)
@click.option(
"-f",
"--force_rebuild",
type=bool,
default=True,
help="force rebuild existing preprocessed files",
)
def pre_hubert(
input_dir: Path, config_path: Path, n_jobs: bool, force_rebuild: bool
) -> None:
"""Preprocessing part 3: hubert
If the HuBERT model is not found, it will be downloaded automatically."""
from .preprocess_hubert_f0 import preprocess_hubert_f0

input_dir = Path(input_dir)
config_path = Path(config_path)
preprocess_hubert_f0(input_dir=input_dir, config_path=config_path)
preprocess_hubert_f0(
input_dir=input_dir,
config_path=config_path,
n_jobs=n_jobs,
force_rebuild=force_rebuild,
)


@cli.command
Expand Down
2 changes: 1 addition & 1 deletion src/so_vits_svc_fork/preprocess_flist_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def preprocess_config(
shuffle(paths)
if len(paths) <= 4:
raise ValueError(
f"too few files in {input_dir / speaker} (expected at least 4)."
f"too few files in {input_dir / speaker} (expected at least 5)."
)
train += paths[2:-2]
val += paths[:2]
Expand Down
89 changes: 59 additions & 30 deletions src/so_vits_svc_fork/preprocess_hubert_f0.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,41 +17,70 @@
LOG = getLogger(__name__)


def preprocess_hubert_f0(input_dir: Path | str, config_path: Path | str):
def _process_one(
filepath: Path,
hubert_model,
sampling_rate: int,
hop_length: int,
device: Literal["cuda", "cpu"] = "cuda",
force_rebuild: bool = False,
):
wav, sr = librosa.load(filepath, sr=sampling_rate)
soft_path = filepath.parent / (filepath.name + ".soft.pt")
if not soft_path.exists() or force_rebuild:
wav16k = librosa.resample(
wav, orig_sr=sampling_rate, target_sr=HUBERT_SAMPLING_RATE
)
wav16k = torch.from_numpy(wav16k).to(device)
c = utils.get_hubert_content(hubert_model, wav_16k_tensor=wav16k)
torch.save(c.cpu(), soft_path)
else:
LOG.info(f"Skip {filepath} because {soft_path} exists.")
f0_path = filepath.parent / (filepath.name + ".f0.npy")
if not f0_path.exists() or force_rebuild:
f0 = utils.compute_f0_dio(
wav, sampling_rate=sampling_rate, hop_length=hop_length
)
np.save(f0_path, f0)
else:
LOG.info(f"Skip {filepath} because {f0_path} exists.")
torch.cuda.empty_cache()


def _process_batch(
filepaths: Iterable[Path],
sampling_rate: int,
hop_length: int,
pbar_position: int,
force_rebuild: bool = False,
):
device = "cuda" if torch.cuda.is_available() else "cpu"
hubert_model = utils.get_hubert_model().to(device)

for filepath in tqdm(filepaths, position=pbar_position):
_process_one(filepath, hubert_model, sampling_rate, hop_length, device)


def preprocess_hubert_f0(
input_dir: Path | str,
config_path: Path | str,
n_jobs: int = 4,
force_rebuild: bool = False,
):
input_dir = Path(input_dir)
config_path = Path(config_path)
utils.get_hubert_model()
utils.ensure_hubert_model()
hps = utils.get_hparams_from_file(config_path)
sampling_rate = hps.data.sampling_rate
hop_length = hps.data.hop_length

def _process_one(filepath: Path, hmodel, device: Literal["cuda", "cpu"] = "cuda"):
wav, sr = librosa.load(filepath, sr=sampling_rate)
soft_path = filepath.parent / (filepath.name + ".soft.pt")
if not soft_path.exists():
wav16k = librosa.resample(
wav, orig_sr=sampling_rate, target_sr=HUBERT_SAMPLING_RATE
)
wav16k = torch.from_numpy(wav16k).to(device)
c = utils.get_hubert_content(hmodel, wav_16k_tensor=wav16k)
torch.save(c.cpu(), soft_path)
f0_path = filepath.parent / (filepath.name + ".f0.npy")
if not f0_path.exists():
f0 = utils.compute_f0_dio(
wav, sampling_rate=sampling_rate, hop_length=hop_length
)
np.save(f0_path, f0)

def _process_batch(filepaths: Iterable[Path]):
LOG.info("Loading hubert model...")
device = "cuda" if torch.cuda.is_available() else "cpu"
hmodel = utils.get_hubert_model().to(device)
LOG.info("Hubert model loaded.")
for filepath in tqdm(filepaths):
_process_one(filepath, hmodel, device)

filepaths = list(input_dir.glob("**/*.wav"))
n_jobs = min(cpu_count(), len(filepaths) // 32 + 1, 8)
filepaths = list(input_dir.rglob("*.wav"))
n_jobs = min(cpu_count(), len(filepaths) // 32 + 1, n_jobs)
shuffle(filepaths)
filepath_chunks = np.array_split(filepaths, n_jobs)
Parallel(n_jobs=n_jobs)(delayed(_process_batch)(chunk) for chunk in filepath_chunks)
Parallel(n_jobs=n_jobs)(
delayed(_process_batch)(
chunk, sampling_rate, hop_length, pbar_position, force_rebuild
)
for (pbar_position, chunk) in enumerate(filepath_chunks)
)
19 changes: 16 additions & 3 deletions src/so_vits_svc_fork/preprocess_resample.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
from __future__ import annotations

from logging import getLogger
from pathlib import Path

import audioread.exceptions
import librosa
import numpy as np
import soundfile
from joblib import Parallel, delayed
from tqdm_joblib import tqdm_joblib

LOG = getLogger(__name__)

# input_dir and output_dir exists.
# write code to convert input dir audio files to output dir audio files,
# without changing folder structure. Use joblib to parallelize.
Expand All @@ -27,7 +31,15 @@ def preprocess_resample(

def preprocess_one(input_path: Path, output_path: Path) -> None:
"""Preprocess one audio file."""
audio, sr = librosa.load(input_path)

try:
audio, sr = librosa.load(input_path)

# Audioread is the last backend it will attempt, so this is the exception thrown on failure
except audioread.exceptions.NoBackendError as e:
# Failure due to attempting to load a file that is not audio, so return early
LOG.warning(f"Failed to load {input_path} due to {e}")
return

# Trim silence
audio, _ = librosa.effects.trim(audio, top_db=20)
Expand All @@ -43,9 +55,10 @@ def preprocess_one(input_path: Path, output_path: Path) -> None:
soundfile.write(output_path, audio, samplerate=sampling_rate, subtype="PCM_16")

in_and_out_paths = []
for in_path in input_dir.rglob("*.wav"):
out_path = output_dir / in_path.relative_to(input_dir)
for in_path in input_dir.rglob("*.*"):
out_path = output_dir / in_path.relative_to(input_dir).with_suffix(".wav")
out_path.parent.mkdir(parents=True, exist_ok=True)
in_and_out_paths.append((in_path, out_path))

with tqdm_joblib(desc="Preprocessing", total=len(in_and_out_paths)):
Parallel(n_jobs=-1)(delayed(preprocess_one)(*args) for args in in_and_out_paths)
5 changes: 3 additions & 2 deletions src/so_vits_svc_fork/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,9 @@ def ensure_pretrained_model(folder_path: Path) -> None:
download_file(model_url, model_path, desc=f"Downloading {model_path.name}")


def ensure_hurbert_model() -> Path:
def ensure_hubert_model() -> Path:
vec_path = Path("checkpoint_best_legacy_500.pt")
vec_path.parent.mkdir(parents=True, exist_ok=True)
if not vec_path.exists():
# url = "http://obs.cstcloud.cn/share/obs/sankagenkeshi/checkpoint_best_legacy_500.pt"
# url = "https://huggingface.co/innnky/contentvec/resolve/main/checkpoint_best_legacy_500.pt"
Expand All @@ -249,7 +250,7 @@ def ensure_hurbert_model() -> Path:


def get_hubert_model():
vec_path = ensure_hurbert_model()
vec_path = ensure_hubert_model()
from fairseq import checkpoint_utils

models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
Expand Down

0 comments on commit 668c8e1

Please sign in to comment.