Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: small tweaks to the preprocessing #7

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/so_vits_svc_fork/preprocess_flist_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def preprocess_config(
continue
paths.append(path)
shuffle(paths)
if len(paths) <= 4:
if len(paths) < 4:
34j marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(
f"too few files in {input_dir / speaker} (expected at least 4)."
)
Expand Down
60 changes: 32 additions & 28 deletions src/so_vits_svc_fork/preprocess_hubert_f0.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,41 +17,45 @@
LOG = getLogger(__name__)


def _process_one(filepath: Path, hubert_model, sampling_rate: int, hop_length: int,
device: Literal["cuda", "cpu"] = "cuda"):
wav, sr = librosa.load(filepath, sr=sampling_rate)
soft_path = filepath.parent / (filepath.name + ".soft.pt")
if not soft_path.exists():
wav16k = librosa.resample(
wav, orig_sr=sampling_rate, target_sr=HUBERT_SAMPLING_RATE
)
wav16k = torch.from_numpy(wav16k).to(device)
c = utils.get_hubert_content(hubert_model, wav_16k_tensor=wav16k)
torch.save(c.cpu(), soft_path)
f0_path = filepath.parent / (filepath.name + ".f0.npy")
if not f0_path.exists():
f0 = utils.compute_f0_dio(
wav, sampling_rate=sampling_rate, hop_length=hop_length
)
np.save(f0_path, f0)


def _process_batch(filepaths: Iterable[Path], sampling_rate: int, hop_length: int, pos: int):
Copy link
Collaborator Author

@GarrettConway GarrettConway Mar 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure why pulling these functions out to the top level increases performance, but it runs much faster.
EDIT: It is probably something to do with how it serializes it to pass it to the joblib Parallel

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like not being able to use LokyBackend?

Copy link
Collaborator Author

@GarrettConway GarrettConway Mar 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After some quick testing on the memory consumption of the backends with my small/medium dataset:

  • multiprocessing backend is the worst. 2 threads will very quickly max out a 3090 and crash it.
  • loky will struggle along on but complete with 2 threads. More threads will crash it.
  • threading backend is slowest, but best on memory consumption by far.

In all three cases, memory was not getting released after it ran until the python instance shut down. I tried some things to get it to release memory, but didn't have any luck. I'm going to swap in the threading backend, but we should probably make an Issue to track it and fix it so it does not break larger datasets.

I'm not too familiar with python memory management myself, but these docs may help: https://joblib.readthedocs.io/en/latest/parallel.html#serialization-and-processes

device = "cuda" if torch.cuda.is_available() else "cpu"
hubert_model = utils.get_hubert_model().to(device)

for filepath in tqdm(filepaths, position=pos):
GarrettConway marked this conversation as resolved.
Show resolved Hide resolved
_process_one(filepath, hubert_model, sampling_rate, hop_length, device)


def preprocess_hubert_f0(input_dir: Path | str, config_path: Path | str):
input_dir = Path(input_dir)
config_path = Path(config_path)
utils.get_hubert_model()
GarrettConway marked this conversation as resolved.
Show resolved Hide resolved
hps = utils.get_hparams_from_file(config_path)
sampling_rate = hps.data.sampling_rate
hop_length = hps.data.hop_length

def _process_one(filepath: Path, hmodel, device: Literal["cuda", "cpu"] = "cuda"):
wav, sr = librosa.load(filepath, sr=sampling_rate)
soft_path = filepath.parent / (filepath.name + ".soft.pt")
if not soft_path.exists():
wav16k = librosa.resample(
wav, orig_sr=sampling_rate, target_sr=HUBERT_SAMPLING_RATE
)
wav16k = torch.from_numpy(wav16k).to(device)
c = utils.get_hubert_content(hmodel, wav_16k_tensor=wav16k)
torch.save(c.cpu(), soft_path)
f0_path = filepath.parent / (filepath.name + ".f0.npy")
if not f0_path.exists():
f0 = utils.compute_f0_dio(
wav, sampling_rate=sampling_rate, hop_length=hop_length
)
np.save(f0_path, f0)

def _process_batch(filepaths: Iterable[Path]):
LOG.info("Loading hubert model...")
GarrettConway marked this conversation as resolved.
Show resolved Hide resolved
device = "cuda" if torch.cuda.is_available() else "cpu"
hmodel = utils.get_hubert_model().to(device)
LOG.info("Hubert model loaded.")
for filepath in tqdm(filepaths):
_process_one(filepath, hmodel, device)

filepaths = list(input_dir.glob("**/*.wav"))
n_jobs = min(cpu_count(), len(filepaths) // 32 + 1, 8)
# Dual threading this until I can determine why this causes memory usage to explode and leak
n_jobs = min(cpu_count(), len(filepaths) // 32 + 1, 2)
GarrettConway marked this conversation as resolved.
Show resolved Hide resolved
shuffle(filepaths)
filepath_chunks = np.array_split(filepaths, n_jobs)
Parallel(n_jobs=n_jobs)(delayed(_process_batch)(chunk) for chunk in filepath_chunks)
Parallel(n_jobs=n_jobs)(
delayed(_process_batch)(chunk, sampling_rate, hop_length, pos) for (pos, chunk) in enumerate(filepath_chunks)
)
6 changes: 4 additions & 2 deletions src/so_vits_svc_fork/preprocess_resample.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import itertools
from pathlib import Path

import librosa
Expand Down Expand Up @@ -43,9 +44,10 @@ def preprocess_one(input_path: Path, output_path: Path) -> None:
soundfile.write(output_path, audio, samplerate=sampling_rate, subtype="PCM_16")
GarrettConway marked this conversation as resolved.
Show resolved Hide resolved

in_and_out_paths = []
for in_path in input_dir.rglob("*.wav"):
out_path = output_dir / in_path.relative_to(input_dir)
for in_path in itertools.chain(input_dir.rglob("*.wav"), input_dir.rglob("*.flac")):
out_path = output_dir / in_path.relative_to(input_dir).with_suffix(".wav")
out_path.parent.mkdir(parents=True, exist_ok=True)
in_and_out_paths.append((in_path, out_path))

with tqdm_joblib(desc="Preprocessing", total=len(in_and_out_paths)):
Parallel(n_jobs=-1)(delayed(preprocess_one)(*args) for args in in_and_out_paths)