diff --git a/.gitignore b/.gitignore index 1d6c2d24..f6e4cf2b 100644 --- a/.gitignore +++ b/.gitignore @@ -141,7 +141,7 @@ cython_debug/ # additional files tests/**/*.wav -!tests/dataset_raw/34j/*.wav +!tests/dataset_raw/44k/34j/**/*.wav tests/**/*.npy tests/**/*.pt tests/**/*.txt diff --git a/README.md b/README.md index 64d48e45..bb8b7812 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ svc --model-path source.wav #### Local -Place your dataset like `dataset_raw/{speaker_id}/{wav_file}.wav` and run: +Place your dataset like `dataset_raw/{speaker_id}/**/{wav_file}.{any_format}` (subfolders are acceptable) and run: ```shell svc pre-resample diff --git a/src/so_vits_svc_fork/__main__.py b/src/so_vits_svc_fork/__main__.py index 57b14e2e..0a38cfba 100644 --- a/src/so_vits_svc_fork/__main__.py +++ b/src/so_vits_svc_fork/__main__.py @@ -84,7 +84,7 @@ def wrap_text( def cli(): """so-vits-svc allows any folder structure for training data. However, the following folder structure is recommended.\n - When training: dataset_raw/{speaker_name}/{wav_name}.wav\n + When training: dataset_raw/{speaker_name}/**/{wav_name}.{any_format}\n When inference: configs/44k/config.json, logs/44k/G_XXXX.pth\n If the folder structure is followed, you DO NOT NEED TO SPECIFY model path, config path, etc. (The latest model will be automatically loaded.)\n @@ -476,7 +476,8 @@ def clean(): folders = ["dataset", "filelists", "logs"] if pyip.inputYesNo(f"Are you sure you want to delete files in {folders}?") == "yes": for folder in folders: - shutil.rmtree(folder) + if Path(folder).exists(): + shutil.rmtree(folder) LOG.info("Cleaned up files") else: LOG.info("Aborted") diff --git a/src/so_vits_svc_fork/preprocess_resample.py b/src/so_vits_svc_fork/preprocess_resample.py index 24a1082a..188d957f 100644 --- a/src/so_vits_svc_fork/preprocess_resample.py +++ b/src/so_vits_svc_fork/preprocess_resample.py @@ -2,6 +2,7 @@ from logging import getLogger from pathlib import Path +from typing import Iterable import audioread.exceptions import librosa @@ -22,6 +23,18 @@ # - save as 16-bit wav file +def _get_unique_filename(path: Path, existing_paths: Iterable[Path]) -> Path: + """Return a unique path by appending a number to the original path.""" + if path not in existing_paths: + return path + i = 1 + while True: + new_path = path.parent / f"{path.stem}_{i}{path.suffix}" + if new_path not in existing_paths: + return new_path + i += 1 + + def preprocess_resample( input_dir: Path | str, output_dir: Path | str, sampling_rate: int ) -> None: @@ -54,11 +67,21 @@ def preprocess_one(input_path: Path, output_path: Path) -> None: audio /= max(audio.max(), -audio.min()) soundfile.write(output_path, audio, samplerate=sampling_rate, subtype="PCM_16") - in_and_out_paths = [] + in_paths = [] + out_paths = [] for in_path in input_dir.rglob("*.*"): - out_path = output_dir / in_path.relative_to(input_dir).with_suffix(".wav") + in_path_relative = in_path.relative_to(input_dir) + if len(in_path_relative.parts) < 2: + continue + speaker_name = in_path_relative.parts[0] + file_name = in_path_relative.with_suffix(".wav").name + out_path = output_dir / speaker_name / file_name + out_path = _get_unique_filename(out_path, out_paths) out_path.parent.mkdir(parents=True, exist_ok=True) - in_and_out_paths.append((in_path, out_path)) + in_paths.append(in_path) + out_paths.append(out_path) + + in_and_out_paths = list(zip(in_paths, out_paths)) with tqdm_joblib(desc="Preprocessing", total=len(in_and_out_paths)): Parallel(n_jobs=-1)(delayed(preprocess_one)(*args) for args in in_and_out_paths) diff --git a/tests/dataset_raw/44k/34j/nested/1.wav b/tests/dataset_raw/44k/34j/nested/1.wav new file mode 100644 index 00000000..1c12ee00 Binary files /dev/null and b/tests/dataset_raw/44k/34j/nested/1.wav differ diff --git a/tests/dataset_raw/44k/34j/10.wav b/tests/dataset_raw/44k/34j/nested/10.wav similarity index 100% rename from tests/dataset_raw/44k/34j/10.wav rename to tests/dataset_raw/44k/34j/nested/10.wav diff --git a/tests/dataset_raw/44k/34j/2.wav b/tests/dataset_raw/44k/34j/nested/2.wav similarity index 100% rename from tests/dataset_raw/44k/34j/2.wav rename to tests/dataset_raw/44k/34j/nested/2.wav diff --git a/tests/dataset_raw/44k/34j/3.wav b/tests/dataset_raw/44k/34j/nested/3.wav similarity index 100% rename from tests/dataset_raw/44k/34j/3.wav rename to tests/dataset_raw/44k/34j/nested/3.wav diff --git a/tests/dataset_raw/44k/34j/4.wav b/tests/dataset_raw/44k/34j/nested/4.wav similarity index 100% rename from tests/dataset_raw/44k/34j/4.wav rename to tests/dataset_raw/44k/34j/nested/4.wav diff --git a/tests/dataset_raw/44k/34j/5.wav b/tests/dataset_raw/44k/34j/nested/5.wav similarity index 100% rename from tests/dataset_raw/44k/34j/5.wav rename to tests/dataset_raw/44k/34j/nested/5.wav diff --git a/tests/dataset_raw/44k/34j/6.wav b/tests/dataset_raw/44k/34j/nested/6.wav similarity index 100% rename from tests/dataset_raw/44k/34j/6.wav rename to tests/dataset_raw/44k/34j/nested/6.wav diff --git a/tests/dataset_raw/44k/34j/7.wav b/tests/dataset_raw/44k/34j/nested/7.wav similarity index 100% rename from tests/dataset_raw/44k/34j/7.wav rename to tests/dataset_raw/44k/34j/nested/7.wav diff --git a/tests/dataset_raw/44k/34j/8.wav b/tests/dataset_raw/44k/34j/nested/8.wav similarity index 100% rename from tests/dataset_raw/44k/34j/8.wav rename to tests/dataset_raw/44k/34j/nested/8.wav diff --git a/tests/dataset_raw/44k/34j/9.wav b/tests/dataset_raw/44k/34j/nested/9.wav similarity index 100% rename from tests/dataset_raw/44k/34j/9.wav rename to tests/dataset_raw/44k/34j/nested/9.wav diff --git a/tests/dataset_raw/44k/34j/nested2/1.wav b/tests/dataset_raw/44k/34j/nested2/1.wav new file mode 100644 index 00000000..1c12ee00 Binary files /dev/null and b/tests/dataset_raw/44k/34j/nested2/1.wav differ