Skip to content

Commit

Permalink
chore(preprocessing): small tweaks and fixes to preprocessing for tra…
Browse files Browse the repository at this point in the history
…ining data
  • Loading branch information
GarrettConway committed Mar 18, 2023
1 parent b988101 commit 0e1d3ef
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 31 deletions.
2 changes: 1 addition & 1 deletion src/so_vits_svc_fork/preprocess_flist_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def preprocess_config(
continue
paths.append(path)
shuffle(paths)
if len(paths) <= 4:
if len(paths) < 4:
raise ValueError(
f"too few files in {input_dir / speaker} (expected at least 4)."
)
Expand Down
60 changes: 32 additions & 28 deletions src/so_vits_svc_fork/preprocess_hubert_f0.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,41 +17,45 @@
LOG = getLogger(__name__)


def _process_one(filepath: Path, hubert_model, sampling_rate: int, hop_length: int,
device: Literal["cuda", "cpu"] = "cuda"):
wav, sr = librosa.load(filepath, sr=sampling_rate)
soft_path = filepath.parent / (filepath.name + ".soft.pt")
if not soft_path.exists():
wav16k = librosa.resample(
wav, orig_sr=sampling_rate, target_sr=HUBERT_SAMPLING_RATE
)
wav16k = torch.from_numpy(wav16k).to(device)
c = utils.get_hubert_content(hubert_model, wav_16k_tensor=wav16k)
torch.save(c.cpu(), soft_path)
f0_path = filepath.parent / (filepath.name + ".f0.npy")
if not f0_path.exists():
f0 = utils.compute_f0_dio(
wav, sampling_rate=sampling_rate, hop_length=hop_length
)
np.save(f0_path, f0)


def _process_batch(filepaths: Iterable[Path], sampling_rate: int, hop_length: int, pos: int):
device = "cuda" if torch.cuda.is_available() else "cpu"
hubert_model = utils.get_hubert_model().to(device)

for filepath in tqdm(filepaths, position=pos):
_process_one(filepath, hubert_model, sampling_rate, hop_length, device)


def preprocess_hubert_f0(input_dir: Path | str, config_path: Path | str):
input_dir = Path(input_dir)
config_path = Path(config_path)
utils.get_hubert_model()
hps = utils.get_hparams_from_file(config_path)
sampling_rate = hps.data.sampling_rate
hop_length = hps.data.hop_length

def _process_one(filepath: Path, hmodel, device: Literal["cuda", "cpu"] = "cuda"):
wav, sr = librosa.load(filepath, sr=sampling_rate)
soft_path = filepath.parent / (filepath.name + ".soft.pt")
if not soft_path.exists():
wav16k = librosa.resample(
wav, orig_sr=sampling_rate, target_sr=HUBERT_SAMPLING_RATE
)
wav16k = torch.from_numpy(wav16k).to(device)
c = utils.get_hubert_content(hmodel, wav_16k_tensor=wav16k)
torch.save(c.cpu(), soft_path)
f0_path = filepath.parent / (filepath.name + ".f0.npy")
if not f0_path.exists():
f0 = utils.compute_f0_dio(
wav, sampling_rate=sampling_rate, hop_length=hop_length
)
np.save(f0_path, f0)

def _process_batch(filepaths: Iterable[Path]):
LOG.info("Loading hubert model...")
device = "cuda" if torch.cuda.is_available() else "cpu"
hmodel = utils.get_hubert_model().to(device)
LOG.info("Hubert model loaded.")
for filepath in tqdm(filepaths):
_process_one(filepath, hmodel, device)

filepaths = list(input_dir.glob("**/*.wav"))
n_jobs = min(cpu_count(), len(filepaths) // 32 + 1, 8)
# Dual threading this until I can determine why this causes memory usage to explode and leak
n_jobs = min(cpu_count(), len(filepaths) // 32 + 1, 2)
shuffle(filepaths)
filepath_chunks = np.array_split(filepaths, n_jobs)
Parallel(n_jobs=n_jobs)(delayed(_process_batch)(chunk) for chunk in filepath_chunks)
Parallel(n_jobs=n_jobs)(
delayed(_process_batch)(chunk, sampling_rate, hop_length, pos) for (pos, chunk) in enumerate(filepath_chunks)
)
6 changes: 4 additions & 2 deletions src/so_vits_svc_fork/preprocess_resample.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import itertools
from pathlib import Path

import librosa
Expand Down Expand Up @@ -43,9 +44,10 @@ def preprocess_one(input_path: Path, output_path: Path) -> None:
soundfile.write(output_path, audio, samplerate=sampling_rate, subtype="PCM_16")

in_and_out_paths = []
for in_path in input_dir.rglob("*.wav"):
out_path = output_dir / in_path.relative_to(input_dir)
for in_path in itertools.chain(input_dir.rglob("*.wav"), input_dir.rglob("*.flac")):
out_path = output_dir / in_path.relative_to(input_dir).with_suffix(".wav")
out_path.parent.mkdir(parents=True, exist_ok=True)
in_and_out_paths.append((in_path, out_path))

with tqdm_joblib(desc="Preprocessing", total=len(in_and_out_paths)):
Parallel(n_jobs=-1)(delayed(preprocess_one)(*args) for args in in_and_out_paths)

0 comments on commit 0e1d3ef

Please sign in to comment.