Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(preprocessing): allow nested dataset #19

Merged
merged 7 commits into from
Mar 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ cython_debug/

# additional files
tests/**/*.wav
!tests/dataset_raw/34j/*.wav
!tests/dataset_raw/44k/34j/**/*.wav
tests/**/*.npy
tests/**/*.pt
tests/**/*.txt
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ svc --model-path <model-path> source.wav

#### Local

Place your dataset like `dataset_raw/{speaker_id}/{wav_file}.wav` and run:
Place your dataset like `dataset_raw/{speaker_id}/**/{wav_file}.{any_format}` (subfolders are acceptable) and run:

```shell
svc pre-resample
Expand Down
5 changes: 3 additions & 2 deletions src/so_vits_svc_fork/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def wrap_text(
def cli():
"""so-vits-svc allows any folder structure for training data.
However, the following folder structure is recommended.\n
When training: dataset_raw/{speaker_name}/{wav_name}.wav\n
When training: dataset_raw/{speaker_name}/**/{wav_name}.{any_format}\n
When inference: configs/44k/config.json, logs/44k/G_XXXX.pth\n
If the folder structure is followed, you DO NOT NEED TO SPECIFY model path, config path, etc.
(The latest model will be automatically loaded.)\n
Expand Down Expand Up @@ -476,7 +476,8 @@ def clean():
folders = ["dataset", "filelists", "logs"]
if pyip.inputYesNo(f"Are you sure you want to delete files in {folders}?") == "yes":
for folder in folders:
shutil.rmtree(folder)
if Path(folder).exists():
shutil.rmtree(folder)
LOG.info("Cleaned up files")
else:
LOG.info("Aborted")
Expand Down
29 changes: 26 additions & 3 deletions src/so_vits_svc_fork/preprocess_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from logging import getLogger
from pathlib import Path
from typing import Iterable

import audioread.exceptions
import librosa
Expand All @@ -22,6 +23,18 @@
# - save as 16-bit wav file


def _get_unique_filename(path: Path, existing_paths: Iterable[Path]) -> Path:
"""Return a unique path by appending a number to the original path."""
if path not in existing_paths:
return path
i = 1
while True:
new_path = path.parent / f"{path.stem}_{i}{path.suffix}"
if new_path not in existing_paths:
return new_path
i += 1


def preprocess_resample(
input_dir: Path | str, output_dir: Path | str, sampling_rate: int
) -> None:
Expand Down Expand Up @@ -54,11 +67,21 @@ def preprocess_one(input_path: Path, output_path: Path) -> None:
audio /= max(audio.max(), -audio.min())
soundfile.write(output_path, audio, samplerate=sampling_rate, subtype="PCM_16")

in_and_out_paths = []
in_paths = []
out_paths = []
for in_path in input_dir.rglob("*.*"):
out_path = output_dir / in_path.relative_to(input_dir).with_suffix(".wav")
in_path_relative = in_path.relative_to(input_dir)
if len(in_path_relative.parts) < 2:
continue
speaker_name = in_path_relative.parts[0]
file_name = in_path_relative.with_suffix(".wav").name
out_path = output_dir / speaker_name / file_name
out_path = _get_unique_filename(out_path, out_paths)
out_path.parent.mkdir(parents=True, exist_ok=True)
in_and_out_paths.append((in_path, out_path))
in_paths.append(in_path)
out_paths.append(out_path)

in_and_out_paths = list(zip(in_paths, out_paths))

with tqdm_joblib(desc="Preprocessing", total=len(in_and_out_paths)):
Parallel(n_jobs=-1)(delayed(preprocess_one)(*args) for args in in_and_out_paths)
Binary file added tests/dataset_raw/44k/34j/nested/1.wav
Binary file not shown.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file added tests/dataset_raw/44k/34j/nested2/1.wav
Binary file not shown.