voicepaw · GarrettConway · Mar 18, 2023 · Mar 18, 2023 · Mar 18, 2023 · GarrettConway
diff --git a/src/so_vits_svc_fork/preprocess_flist_config.py b/src/so_vits_svc_fork/preprocess_flist_config.py
@@ -53,7 +53,7 @@ def preprocess_config(
                 continue
             paths.append(path)
         shuffle(paths)
-        if len(paths) <= 4:
+        if len(paths) < 4:
             raise ValueError(
                 f"too few files in {input_dir / speaker} (expected at least 4)."
             )

diff --git a/src/so_vits_svc_fork/preprocess_hubert_f0.py b/src/so_vits_svc_fork/preprocess_hubert_f0.py
@@ -17,41 +17,45 @@
 LOG = getLogger(__name__)
 
 
+def _process_one(filepath: Path, hubert_model, sampling_rate: int, hop_length: int,
+                 device: Literal["cuda", "cpu"] = "cuda"):
+    wav, sr = librosa.load(filepath, sr=sampling_rate)
+    soft_path = filepath.parent / (filepath.name + ".soft.pt")
+    if not soft_path.exists():
+        wav16k = librosa.resample(
+            wav, orig_sr=sampling_rate, target_sr=HUBERT_SAMPLING_RATE
+        )
+        wav16k = torch.from_numpy(wav16k).to(device)
+        c = utils.get_hubert_content(hubert_model, wav_16k_tensor=wav16k)
+        torch.save(c.cpu(), soft_path)
+    f0_path = filepath.parent / (filepath.name + ".f0.npy")
+    if not f0_path.exists():
+        f0 = utils.compute_f0_dio(
+            wav, sampling_rate=sampling_rate, hop_length=hop_length
+        )
+        np.save(f0_path, f0)
+
+
+def _process_batch(filepaths: Iterable[Path], sampling_rate: int, hop_length: int, pos: int):
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    hubert_model = utils.get_hubert_model().to(device)
+
+    for filepath in tqdm(filepaths, position=pos):
+        _process_one(filepath, hubert_model, sampling_rate, hop_length, device)
+
+
 def preprocess_hubert_f0(input_dir: Path | str, config_path: Path | str):
     input_dir = Path(input_dir)
     config_path = Path(config_path)
-    utils.get_hubert_model()
     hps = utils.get_hparams_from_file(config_path)
     sampling_rate = hps.data.sampling_rate
     hop_length = hps.data.hop_length
 
-    def _process_one(filepath: Path, hmodel, device: Literal["cuda", "cpu"] = "cuda"):
-        wav, sr = librosa.load(filepath, sr=sampling_rate)
-        soft_path = filepath.parent / (filepath.name + ".soft.pt")
-        if not soft_path.exists():
-            wav16k = librosa.resample(
-                wav, orig_sr=sampling_rate, target_sr=HUBERT_SAMPLING_RATE
-            )
-            wav16k = torch.from_numpy(wav16k).to(device)
-            c = utils.get_hubert_content(hmodel, wav_16k_tensor=wav16k)
-            torch.save(c.cpu(), soft_path)
-        f0_path = filepath.parent / (filepath.name + ".f0.npy")
-        if not f0_path.exists():
-            f0 = utils.compute_f0_dio(
-                wav, sampling_rate=sampling_rate, hop_length=hop_length
-            )
-            np.save(f0_path, f0)
-
-    def _process_batch(filepaths: Iterable[Path]):
-        LOG.info("Loading hubert model...")
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        hmodel = utils.get_hubert_model().to(device)
-        LOG.info("Hubert model loaded.")
-        for filepath in tqdm(filepaths):
-            _process_one(filepath, hmodel, device)
-
     filepaths = list(input_dir.glob("**/*.wav"))
-    n_jobs = min(cpu_count(), len(filepaths) // 32 + 1, 8)
+    # Dual threading this until I can determine why this causes memory usage to explode and leak
+    n_jobs = min(cpu_count(), len(filepaths) // 32 + 1, 2)
     shuffle(filepaths)
     filepath_chunks = np.array_split(filepaths, n_jobs)
-    Parallel(n_jobs=n_jobs)(delayed(_process_batch)(chunk) for chunk in filepath_chunks)
+    Parallel(n_jobs=n_jobs)(
+        delayed(_process_batch)(chunk, sampling_rate, hop_length, pos) for (pos, chunk) in enumerate(filepath_chunks)
+    )
diff --git a/src/so_vits_svc_fork/preprocess_resample.py b/src/so_vits_svc_fork/preprocess_resample.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import itertools
 from pathlib import Path
 
 import librosa
@@ -43,9 +44,10 @@ def preprocess_one(input_path: Path, output_path: Path) -> None:
         soundfile.write(output_path, audio, samplerate=sampling_rate, subtype="PCM_16")
 
     in_and_out_paths = []
-    for in_path in input_dir.rglob("*.wav"):
-        out_path = output_dir / in_path.relative_to(input_dir)
+    for in_path in itertools.chain(input_dir.rglob("*.wav"), input_dir.rglob("*.flac")):
+        out_path = output_dir / in_path.relative_to(input_dir).with_suffix(".wav")
         out_path.parent.mkdir(parents=True, exist_ok=True)
         in_and_out_paths.append((in_path, out_path))
+
     with tqdm_joblib(desc="Preprocessing", total=len(in_and_out_paths)):
         Parallel(n_jobs=-1)(delayed(preprocess_one)(*args) for args in in_and_out_paths)