diff --git a/src/so_vits_svc_fork/__main__.py b/src/so_vits_svc_fork/__main__.py index e45e81ec..933b39a5 100644 --- a/src/so_vits_svc_fork/__main__.py +++ b/src/so_vits_svc_fork/__main__.py @@ -271,7 +271,7 @@ def infer( @click.option( "-fm", "--f0-method", - type=click.Choice(["crepe", "parselmouth", "dio", "harvest"]), + type=click.Choice(["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"]), default="crepe", help="f0 prediction method", ) @@ -321,7 +321,7 @@ def vc( auto_predict_f0: bool, cluster_infer_ratio: float, noise_scale: float, - f0_method: Literal["crepe", "parselmouth", "dio", "harvest"], + f0_method: Literal["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"], # slice config db_thresh: int, pad_seconds: float, @@ -485,7 +485,7 @@ def pre_config( @click.option( "-fm", "--f0-method", - type=click.Choice(["crepe", "parselmouth", "dio", "harvest"]), + type=click.Choice(["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"]), default="crepe", ) def pre_hubert( @@ -493,7 +493,7 @@ def pre_hubert( config_path: Path, n_jobs: bool, force_rebuild: bool, - f0_method: Literal["crepe", "parselmouth", "dio", "harvest"], + f0_method: Literal["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"], ) -> None: """Preprocessing part 3: hubert If the HuBERT model is not found, it will be downloaded automatically.""" diff --git a/src/so_vits_svc_fork/gui.py b/src/so_vits_svc_fork/gui.py index bf7c8316..6dc6884b 100644 --- a/src/so_vits_svc_fork/gui.py +++ b/src/so_vits_svc_fork/gui.py @@ -129,7 +129,7 @@ def main(): [ sg.Text("F0 prediction method"), sg.Combo( - ["crepe", "parselmouth", "dio", "harvest"], + ["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"], key="f0_method", default_value="crepe", ), @@ -318,7 +318,7 @@ def update_combo() -> None: update_combo() if event.endswith("_path"): for name in window.AllKeysDict: - if name.endswith("_browse"): + if str(name).endswith("_browse"): browser = window[name] if isinstance(browser, sg.Button): LOG.info( diff --git a/src/so_vits_svc_fork/inference/infer_tool.py b/src/so_vits_svc_fork/inference/infer_tool.py index 0b327d49..03a1fd0c 100644 --- a/src/so_vits_svc_fork/inference/infer_tool.py +++ b/src/so_vits_svc_fork/inference/infer_tool.py @@ -128,7 +128,9 @@ def get_unit_f0( tran: int, cluster_infer_ratio: float, speaker: int | str, - f0_method: Literal["crepe", "parselmouth", "dio", "harvest"] = "crepe", + f0_method: Literal[ + "crepe", "crepe-tiny", "parselmouth", "dio", "harvest" + ] = "crepe", ): f0 = utils.compute_f0( audio, @@ -168,7 +170,9 @@ def infer( cluster_infer_ratio: float = 0, auto_predict_f0: bool = False, noise_scale: float = 0.4, - f0_method: Literal["crepe", "parselmouth", "dio", "harvest"] = "crepe", + f0_method: Literal[ + "crepe", "crepe-tiny", "parselmouth", "dio", "harvest" + ] = "crepe", ) -> tuple[torch.Tensor, int]: audio = audio.astype(np.float32) # get speaker id @@ -225,7 +229,9 @@ def infer_silence( auto_predict_f0: bool = False, cluster_infer_ratio: float = 0, noise_scale: float = 0.4, - f0_method: Literal["crepe", "parselmouth", "dio", "harvest"] = "crepe", + f0_method: Literal[ + "crepe", "crepe-tiny", "parselmouth", "dio", "harvest" + ] = "crepe", # slice config db_thresh: int = -40, pad_seconds: float = 0.5, @@ -442,7 +448,9 @@ def infer( cluster_infer_ratio: float = 0, auto_predict_f0: bool = False, noise_scale: float = 0.4, - f0_method: Literal["crepe", "parselmouth", "dio", "harvest"] = "crepe", + f0_method: Literal[ + "crepe", "crepe-tiny", "parselmouth", "dio", "harvest" + ] = "crepe", # slice config db_thresh: int = -40, pad_seconds: float = 0.5, @@ -500,7 +508,9 @@ def process( cluster_infer_ratio: float = 0, auto_predict_f0: bool = False, noise_scale: float = 0.4, - f0_method: Literal["crepe", "parselmouth", "dio", "harvest"] = "crepe", + f0_method: Literal[ + "crepe", "crepe-tiny", "parselmouth", "dio", "harvest" + ] = "crepe", # slice config db_thresh: int = -40, chunk_seconds: float = 0.5, diff --git a/src/so_vits_svc_fork/inference_main.py b/src/so_vits_svc_fork/inference_main.py index fe319014..cd92ff58 100644 --- a/src/so_vits_svc_fork/inference_main.py +++ b/src/so_vits_svc_fork/inference_main.py @@ -29,7 +29,9 @@ def infer( auto_predict_f0: bool = False, cluster_infer_ratio: float = 0, noise_scale: float = 0.4, - f0_method: Literal["crepe", "parselmouth", "dio", "harvest"] = "crepe", + f0_method: Literal[ + "crepe", "crepe-tiny", "parselmouth", "dio", "harvest" + ] = "crepe", # slice config db_thresh: int = -40, pad_seconds: float = 0.5, @@ -81,7 +83,9 @@ def realtime( auto_predict_f0: bool = False, cluster_infer_ratio: float = 0, noise_scale: float = 0.4, - f0_method: Literal["crepe", "parselmouth", "dio", "harvest"] = "crepe", + f0_method: Literal[ + "crepe", "crepe-tiny", "parselmouth", "dio", "harvest" + ] = "crepe", # slice config db_thresh: int = -40, pad_seconds: float = 0.5, diff --git a/src/so_vits_svc_fork/preprocess_hubert_f0.py b/src/so_vits_svc_fork/preprocess_hubert_f0.py index f7b60ab4..cefb6bbf 100644 --- a/src/so_vits_svc_fork/preprocess_hubert_f0.py +++ b/src/so_vits_svc_fork/preprocess_hubert_f0.py @@ -23,7 +23,9 @@ def _process_one( sampling_rate: int, hop_length: int, device: Literal["cuda", "cpu"] = "cuda", - f0_method: Literal["crepe", "parselmouth", "dio", "harvest"] = "crepe", + f0_method: Literal[ + "crepe", "crepe-tiny", "parselmouth", "dio", "harvest" + ] = "crepe", force_rebuild: bool = False, ): wav, sr = librosa.load(filepath, sr=sampling_rate) @@ -57,7 +59,9 @@ def _process_batch( sampling_rate: int, hop_length: int, pbar_position: int, - f0_method: Literal["crepe", "parselmouth", "dio", "harvest"] = "crepe", + f0_method: Literal[ + "crepe", "crepe-tiny", "parselmouth", "dio", "harvest" + ] = "crepe", force_rebuild: bool = False, ): device = "cuda" if torch.cuda.is_available() else "cpu" @@ -79,7 +83,9 @@ def preprocess_hubert_f0( input_dir: Path | str, config_path: Path | str, n_jobs: int = 4, - f0_method: Literal["crepe", "parselmouth", "dio", "harvest"] = "crepe", + f0_method: Literal[ + "crepe", "crepe-tiny", "parselmouth", "dio", "harvest" + ] = "crepe", force_rebuild: bool = False, ): input_dir = Path(input_dir) diff --git a/src/so_vits_svc_fork/utils.py b/src/so_vits_svc_fork/utils.py index 8e4208b7..132b815b 100644 --- a/src/so_vits_svc_fork/utils.py +++ b/src/so_vits_svc_fork/utils.py @@ -242,7 +242,7 @@ def compute_f0( p_len: None | int = None, sampling_rate: int = 44100, hop_length: int = 512, - method: Literal["crepe", "parselmouth", "dio", "harvest"] = "crepe", + method: Literal["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"] = "crepe", **kwargs, ): wav_numpy = wav_numpy.astype(np.float32) @@ -251,6 +251,10 @@ def compute_f0( return compute_f0_pyworld(wav_numpy, p_len, sampling_rate, hop_length, method) elif method == "crepe": return compute_f0_crepe(wav_numpy, p_len, sampling_rate, hop_length, **kwargs) + elif method == "crepe-tiny": + return compute_f0_crepe( + wav_numpy, p_len, sampling_rate, hop_length, model="tiny", **kwargs + ) elif method == "parselmouth": return compute_f0_parselmouth(wav_numpy, p_len, sampling_rate, hop_length) else: