Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modularize Audiocraft #64

Merged
merged 2 commits into from
Jul 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,32 @@
https://rsxdalv.github.io/bark-speaker-directory/

## Changelog
July 5:
* Improved v5 installer - faster and more reliable

July 2:
* Upgrade bark settings #59
* Upgrade bark settings https://github.com/rsxdalv/tts-generation-webui/pull/59

July 1:
* Studio-tab #58
* Studio-tab https://github.com/rsxdalv/tts-generation-webui/pull/58

Jun 29:
* Tortoise new params #54
* Tortoise new params https://github.com/rsxdalv/tts-generation-webui/pull/54

Jun 27:
* Fix eager loading errors, refactor #50
* Fix eager loading errors, refactor https://github.com/rsxdalv/tts-generation-webui/pull/50

Jun 20
* Tortoise: proper long form generation files #46
* Tortoise: proper long form generation files https://github.com/rsxdalv/tts-generation-webui/pull/46

Jun 19
* Tortoise-upgrade #45
* Tortoise-upgrade https://github.com/rsxdalv/tts-generation-webui/pull/45

June 18:
* Update to newest audiocraft, add longer generations

Jun 14:
* add vocos wav tab #42
* add vocos wav tab https://github.com/rsxdalv/tts-generation-webui/pull/42

June 5:
* Fix "Save to Favorites" button on bark generation page, clean up console (v4.1.1)
Expand Down
2 changes: 0 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,5 @@ python-dotenv==1.0.0
soundfile==0.12.1; sys_platform == 'win32' # torchaudio
# sox # torchaudio for linux
suno-bark @ git+https://github.com/suno-ai/bark@7280e4e#egg=suno-bark # MIT License
# audiocraft==0.0.1 # MIT License
audiocraft @ git+https://git@github.com/facebookresearch/audiocraft@d874966#egg=audiocraft # MIT License
vocos==0.0.2 # MIT License
tortoise @ git+https://github.com/rsxdalv/tortoise-tts@f04f95e30245c85996be3af068cba4890952b1a1#egg=tortoise # Apache 2.0
2 changes: 2 additions & 0 deletions requirements_audiocraft.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# audiocraft==0.0.1 # MIT License
audiocraft @ git+https://git@github.com/facebookresearch/audiocraft@d874966#egg=audiocraft # MIT License
50 changes: 32 additions & 18 deletions server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,12 @@
import src.utils.dotenv_init as dotenv_init
import gradio as gr

from src.bark.clone.tab_voice_clone_error import tab_voice_clone_error
from src.config.load_config import default_config
from src.config.config import config

from src.css.css import full_css
from src.Joutai import Joutai
from src.musicgen.musicgen_tab import generation_tab_musicgen
from src.history_tab.collections_directories_atom import collections_directories_atom
from src.config.save_config_gradio import save_config_gradio
from src.tortoise.generation_tab_tortoise import generation_tab_tortoise
from src.config.load_config import default_config
from src.settings_tab_gradio import settings_tab_gradio
from src.bark.generation_tab_bark import generation_tab_bark
from src.history_tab.main import history_tab
from src.bark.settings_tab_bark import settings_tab_bark
from src.config.config import config
from src.history_tab.voices_tab import voices_tab
from src.vocos.vocos_tabs import vocos_tabs
from src.studio.studio_tab import simple_remixer_tab

setup_or_recover.dummy()
dotenv_init.init()
Expand Down Expand Up @@ -47,18 +37,39 @@ def reload_config_and_restart_ui():
) as demo:
gr.Markdown("# TTS Generation WebUI (Bark, MusicGen, Tortoise)")
with Joutai.singleton.tabs:
from src.tortoise.generation_tab_tortoise import generation_tab_tortoise
from src.settings_tab_gradio import settings_tab_gradio
from src.bark.generation_tab_bark import generation_tab_bark
from src.history_tab.main import history_tab
from src.bark.settings_tab_bark import settings_tab_bark
from src.history_tab.voices_tab import voices_tab
from src.vocos.vocos_tabs import vocos_tabs
from src.studio.studio_tab import simple_remixer_tab

register_use_as_history_button = generation_tab_bark()

try:
from src.bark.clone.tab_voice_clone import tab_voice_clone

tab_voice_clone(register_use_as_history_button)
except Exception as e:
from src.bark.clone.tab_voice_clone_error import tab_voice_clone_error

tab_voice_clone_error(e)
print("Failed to load voice clone demo")
print(e)

generation_tab_musicgen()
try:
from src.musicgen.musicgen_tab import generation_tab_musicgen

generation_tab_musicgen()
except Exception as e:
from src.musicgen.musicgen_tab_error import musicgen_tab_error

musicgen_tab_error(e)
print("Failed to load musicgen demo")
print(e)

vocos_tabs()
generation_tab_tortoise()

Expand All @@ -71,9 +82,7 @@ def reload_config_and_restart_ui():
voices_tab(register_use_as_history_button)

settings_tab_bark()
settings_tab_gradio(
save_config_gradio, reload_config_and_restart_ui, gradio_interface_options
)
settings_tab_gradio(reload_config_and_restart_ui, gradio_interface_options)
remixer_input = simple_remixer_tab()
Joutai.singleton.tabs.render()

Expand All @@ -100,7 +109,12 @@ def print_pretty_options(options):
print("Gradio server authentication enabled")
print_pretty_options(gradio_interface_options)

if __name__ == "__main__":

def start_server():
demo.queue(
concurrency_count=gradio_interface_options.get("concurrency_count", 5),
).launch(**gradio_interface_options)


if __name__ == "__main__":
start_server()
4 changes: 1 addition & 3 deletions src/bark/clone/tab_voice_clone_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,9 @@ def install_bark_voice_clone():
if process.returncode == 0:
print("Successfully installed bark voice clone")
yield "Successfully installed bark voice clone, please restart the webui"
return
else:
print("Failed to install bark voice clone")
yield "Failed to install bark voice clone"
return


def tab_voice_clone_error(e: Exception):
Expand All @@ -38,7 +36,7 @@ def tab_voice_clone_error(e: Exception):
gr.Markdown(f"Error: {e}")
gr.Markdown("Please install the requirements_bark_hubert_quantizer.txt file")
gr.Markdown("Please check the console for more information")
install_btn = gr.Button("install bark voice clone")
install_btn = gr.Button("Install Bark Voice Clone")
gr.Markdown("Installation console:")
console_text = gr.HTML()
install_btn.click(
Expand Down
6 changes: 3 additions & 3 deletions src/config/load_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
"coarse_use_gpu": True,
"coarse_use_small": True,
"fine_use_gpu": True,
"fine_use_small": False,
"fine_use_small": True,
"codec_use_gpu": True,
"load_models_on_startup": True,
"load_models_on_startup": False,
},
"gradio_interface_options": {
"inline": False,
Expand Down Expand Up @@ -38,7 +38,7 @@
"file_directories": None,
"_frontend": True,
},
"load_models_on_startup": True,
"load_models_on_startup": False,
}


Expand Down
33 changes: 16 additions & 17 deletions src/musicgen/musicgen_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,26 +197,25 @@ def generate(params: MusicGenGeneration, melody_in: Optional[Tuple[int, np.ndarr
]


musicgen_atom = gr.JSON(
# visible=True,
visible=False,
value={
"text": "",
"melody": None,
"model": "melody",
"duration": 10,
"topk": 250,
"topp": 0,
"temperature": 1.0,
"cfg_coef": 3.0,
"seed": -1,
},
)


def generation_tab_musicgen():
with gr.Tab("MusicGen") as tab:
musicgen_atom.render()
musicgen_atom = gr.JSON(
# visible=True,
visible=False,
value={
"text": "",
"melody": None,
"model": "melody",
"duration": 10,
"topk": 250,
"topp": 0,
"temperature": 1.0,
"cfg_coef": 3.0,
"seed": -1,
},
)
# musicgen_atom.render()
gr.Markdown(f"""Audiocraft version: {AUDIOCRAFT_VERSION}""")
with gr.Row():
with gr.Column():
Expand Down
50 changes: 50 additions & 0 deletions src/musicgen/musicgen_tab_error.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import gradio as gr


def install_musicgen():
import subprocess

process = subprocess.Popen(
"pip install -r requirements_audiocraft.txt",
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True,
)

# Stream the output to the console
for line in process.stdout: # type: ignore
print(line, end="")
yield line

# Wait for the process to finish
process.wait()

# Check if the process was successful
if process.returncode == 0:
print("Successfully installed MusicGen")
yield "Successfully installed MusicGen, please restart the webui"
else:
print("Failed to install MusicGen")
yield "Failed to install MusicGen"


def musicgen_tab_error(e: Exception):
with gr.Tab("MusicGen (!)"):
gr.Markdown("Failed to load voice clone demo")
gr.Markdown(f"Error: {e}")
gr.Markdown("Please install the requirements_bark_hubert_quantizer.txt file")
gr.Markdown("Please check the console for more information")
install_btn = gr.Button("Install MusicGen")
gr.Markdown("Installation console:")
console_text = gr.HTML()
install_btn.click(
install_musicgen,
outputs=[console_text],
)


if __name__ == "__main__":
with gr.Blocks() as demo:
musicgen_tab_error(Exception("Test"))
demo.launch()
6 changes: 3 additions & 3 deletions src/settings_tab_gradio.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Any, Callable, Dict, List
from typing import Any, Callable, Dict
import gradio as gr
from src.config.save_config_gradio import save_config_gradio


def settings_tab_gradio(
save_config: Callable[[List[str], List[Any]], str],
reload_config_and_restart_ui: Callable[[], None],
gradio_interface_options: Dict[str, Any],
):
Expand Down Expand Up @@ -155,7 +155,7 @@ def settings_tab_gradio(
# Map over the UI elements
for i in inputs:
i.change(
fn=lambda *input_values: save_config(keys, input_values),
fn=lambda *input_values: save_config_gradio(keys, input_values),
inputs=inputs,
outputs=[save_beacon],
)
Expand Down