From 386dd5b3b9e032d16e5d81b1d04d2601b7d863b4 Mon Sep 17 00:00:00 2001 From: Jules Date: Wed, 10 Jul 2024 22:34:30 +0200 Subject: [PATCH] Some changes --- build.sh | 2 +- lidya/core.py | 27 +++++++++++++++++++------ lidya/libs/config.py | 18 +++++++++++++++++ lidya/libs/pluginmanager.py | 4 ---- lidya/libs/tts.py | 37 ++++++++++++---------------------- lydia.sh | 3 --- scripts/setup.py | 40 +++++++++++++++++++++++-------------- 7 files changed, 78 insertions(+), 53 deletions(-) delete mode 100755 lydia.sh diff --git a/build.sh b/build.sh index 9f04c62..996265a 100755 --- a/build.sh +++ b/build.sh @@ -10,7 +10,7 @@ sudo apt install portaudio19-dev python3 python3-pip pip install -r requirements.txt # Create plugin folder -mkdir plugins +mkdir plugins models cache echo "Lidya is ready to use. Note that we have not prepared the models or API keys." echo "Good job! You did it! You now have your personal assistant to increase productivity and your leisure time. Note that you must run ./config in order to configure your assistant." diff --git a/lidya/core.py b/lidya/core.py index 0e7e77d..50ea581 100644 --- a/lidya/core.py +++ b/lidya/core.py @@ -16,23 +16,29 @@ from libs import config from libs import llm_con from libs import pluginmanager +from pydub import AudioSegment +from pydub.playback import play import speech_recognition as sr -from playsound import playsound # Edit path sys.path.append("./") # Load configuration +print('[*] Loading config... ') CONF = config.Config("./config") # Init STT & TTS +print('[*] Loading STT & TTS... ') r = sr.Recognizer() -tts = tts.TTS(CONF.get_lang(), CONF.get_tts_model()) +tts = tts.TTS(CONF.get_tts_model()) # Load plugins: +print('[*] Loading plugins... ') pm = pluginmanager.PluginManager() +pm.load_plugins() # Init LLM +print('[*] Loading LLM... ') llm = llm_con.Connector( CONF.get_main_model(), CONF.get_main_service(), @@ -41,6 +47,7 @@ ) # Main func +print('[*] Starting... ') def listen_and_repeat(last_communication): """Main listen & repeat function""" with sr.Microphone() as source: @@ -49,7 +56,7 @@ def listen_and_repeat(last_communication): #user_message = "ok lydia execute la commande 'weather' pour récupérer la météo." - if (time.time() - last_communication) < 5: + if (time.time() - last_communication) < 10: present = True message = user_message else: @@ -64,7 +71,8 @@ def listen_and_repeat(last_communication): break if present: - playsound("./lidya/ressources/sounds/success_blip.mp3") + song = AudioSegment.from_file("./lidya/ressources/sounds/success_blip.mp3", format="mp3") + play(song) print("[*] Generation process starting... ") print( f"[*] API query: {CONF.get_main_service()}, with model {CONF.get_main_model()}..." @@ -73,7 +81,9 @@ def listen_and_repeat(last_communication): try: llm_result = json.loads(llm.interact(message)) except openai.APIConnectionError: - playsound("./lidya/ressources/sounds/fail_blip.mp3") + song = AudioSegment.from_file("./lidya/ressources/sounds/fail_blip.mp3", format="mp3") + play(song) + tts.play_generate_audio( CONF.get_messages()[CONF.get_lang()]["llm_error"] ) @@ -96,7 +106,12 @@ def listen_and_repeat(last_communication): ) print("[*] Generating audio... ") - tts.play_generate_audio(llm_result["message"]) + if isinstance(llm_result, dict) and "message" in llm_result.keys(): + tts.play_generate_audio(llm_result["message"]) + else: + song = AudioSegment.from_file("./lidya/ressources/sounds/fail_blip.mp3", format="mp3") + play(song) + tts.play_generate_audio(llm_result) print("[*] Process finished. ") last_communication = time.time() diff --git a/lidya/libs/config.py b/lidya/libs/config.py index b3dba88..1c5c74a 100644 --- a/lidya/libs/config.py +++ b/lidya/libs/config.py @@ -61,3 +61,21 @@ def get_prompt(self): def get_messages(self): """Lidya pre-created messages loader""" return self.messages + + def rewirte_config(self): + wakewords_file = self.config_folder + "/wakewords.json" + config_file = self.config_folder + "/config.json" + keys_file = self.config_folder + "/keys.json" + messages_file = self.config_folder + "/messages.json" + + with open(wakewords_file, "a", encoding="utf-8") as file: + file.write(json.dump(self.wakewords)) + + with open(config_file, "a", encoding="utf-8") as file: + file.write(json.dump(self.config)) + + with open(keys_file, "a", encoding="utf-8") as file: + file.write(json.dump(self.keys)) + + with open(messages_file, "a", encoding="utf-8") as file: + file.write(json.dump(self.messages)) \ No newline at end of file diff --git a/lidya/libs/pluginmanager.py b/lidya/libs/pluginmanager.py index 84dd5f5..629d2b8 100644 --- a/lidya/libs/pluginmanager.py +++ b/lidya/libs/pluginmanager.py @@ -30,10 +30,8 @@ def process_actions(self, actions): if action["args"] == {}: result = self.execute_plugin_action(action_name) else: - print(action) mapping = {action['name']: action['value']} - print(mapping) result = self.execute_plugin_action(action_name, args=mapping) if result is not None: @@ -59,6 +57,4 @@ def load_plugins(self): plugin_manager_json.append(plugin_json_conf) - print(plugin_manager_json) - return plugin_manager_json diff --git a/lidya/libs/tts.py b/lidya/libs/tts.py index da7baa5..e979c3d 100644 --- a/lidya/libs/tts.py +++ b/lidya/libs/tts.py @@ -1,54 +1,43 @@ """Lidya TTS - Made by SunWater_""" # Imports -from dimits import Dimits +import dimits +from pydub import AudioSegment +from pydub.playback import play +from playsound import playsound # Main class - class TTS: """ Main TTS Lib for Lidya.""" - def __init__(self, lang, voice): + def __init__(self, model): """ Initialize the Dimits object. This is the constructor for the Vehicle class. You need to call this before you use the Vehicle class. - - @param lang - language of the voice e. g. " de " - @param voice - voice of the voice e. g. " mus """ - self.lang = lang - self.voice = voice - self.model = lang + "-" + voice - - self.dimits = Dimits(self.model) + self.model = dimits.TextToSpeechModel(f'./models/{model}/{model}.onnx') def generate_audio(self, text, location): """ Generate and save audio file. This method is used to generate a sound file from text. The file is saved to location - - @param text - text to be played in sound file - @param location - location where to save sound file. It will be saved to - this location - - @return location where audio file was saved to. It will be saved to this - location after completion of the program """ - self.dimits.text_2_audio_file( - text, location.split("/")[-1], "/".join(location.split("/")[0:-1]) - ) + generated_bytes = self.model.synthesize(text) + with open(location, 'ab') as f: + f.write(generated_bytes) + return location def play_generate_audio(self, text): """ Play a generated audio. This is a convenience method for use in tests that want to play audio from a text that has been generated by the user - - @param text - The text that should be """ - self.dimits.text_2_speech(text) + self.generate_audio(text, './cache/tts_output.wav') + song = AudioSegment.from_file("./cache/tts_output.wav", format="wav") + play(song) # Tests if __name__ == "__main__": diff --git a/lydia.sh b/lydia.sh deleted file mode 100755 index fc77b32..0000000 --- a/lydia.sh +++ /dev/null @@ -1,3 +0,0 @@ -# Run lidya - -python3 lidya/core.py \ No newline at end of file diff --git a/scripts/setup.py b/scripts/setup.py index 614df4c..18f7380 100644 --- a/scripts/setup.py +++ b/scripts/setup.py @@ -7,6 +7,11 @@ import re import json import requests +from rich import print as dprint +from rich.console import Console +from rich.prompt import Prompt + +console = Console() CONFIG_URL = "https://pastebin.com/raw/Rmu8qxB9" MESSAGES_URL = "https://pastebin.com/raw/4BaVZZS7" @@ -17,18 +22,21 @@ KEYS_FILE = "./config/keys.json" WAKEWORDS_FILE = "./config/wakewords.json" -print('[*] Collecting default configurations... ') -CONFIG = json.loads(requests.get(CONFIG_URL, timeout=500).text) -MESSAGES = json.loads(requests.get(MESSAGES_URL, timeout=500).text) -KEYS = json.loads(requests.get(KEYS_URL, timeout=500).text) -WAKEWORDS = json.loads(requests.get(WAKEWORDS_URL, timeout=500).text) - -print('[*] Setup wizard is ready! Please complete few questions bellow.') +dprint('[bold blue][*][/bold blue] Collecting default configurations... ') +with console.status("Fetching configuration... ", + spinner="dots10", + spinner_style="blue") as progress: + CONFIG = json.loads(requests.get(CONFIG_URL, timeout=100).text) + MESSAGES = json.loads(requests.get(MESSAGES_URL, timeout=100).text) + KEYS = json.loads(requests.get(KEYS_URL, timeout=100).text) + WAKEWORDS = json.loads(requests.get(WAKEWORDS_URL, timeout=100).text) +dprint('[bold blue][*][/bold blue] Online default configuration is ready! ') +dprint('[bold blue][*][/bold blue] Setup wizard is ready! Please complete few questions bellow.') # Collect OpenAI key VALIDATED = False while VALIDATED is False: - OPENAI_KEY = input('[?] We need a working API key for OpenAI: ') + OPENAI_KEY = Prompt.ask('[bold blue][?][/bold blue] We need a working API key for OpenAI') # Validating key api_key_pattern = re.compile(r"sk-*") @@ -36,22 +44,24 @@ VALIDATED = True else: VALIDATED = False - print('[!] The key does not appear to be valid. Check if there is a line break at the end.') + dprint('[bold yellow][!][/bold yellow] The key does not appear to be valid. ' + + 'Check if there is a line break at the end.') -print('[*] Updating configuration... ') +dprint('[bold blue][*][/bold blue] Updating configuration... ') KEYS['openai'] = OPENAI_KEY # Collect favorite model -OPENAI_FAV_MODEL = input('[?] What is your favorite OpenAI llm model [gpt-3.5-turbo]: ') +OPENAI_FAV_MODEL = Prompt.ask('[bold blue][?][/bold blue] What is your favorite OpenAI llm model', + default="gpt-3.5-turbo") if OPENAI_FAV_MODEL == "": - print('[*] Nothing provied. Choosing gpt-3.5-turbo') + dprint('[bold blue][*][/bold blue] Nothing provied. Choosing gpt-3.5-turbo') OPENAI_FAV_MODEL = "gpt-3.5-turbo" -print('[*] Updating configuration... ') +dprint('[bold blue][*][/bold blue] Updating configuration... ') CONFIG['main_model'] = OPENAI_FAV_MODEL -print('[*] Writing configuration... ') +dprint('[bold blue][*][/bold blue] Writing configuration... ') with open(CONFIG_FILE, 'w', encoding="utf8") as f: f.write(json.dumps(CONFIG)) @@ -65,7 +75,7 @@ with open(MESSAGES_FILE, 'w', encoding="utf8") as f: f.write(json.dumps(MESSAGES)) -print('The assistant is ready for use! \ +dprint('The assistant is ready for use! \ Note that you can change the STT \ language and model in the setting\ s outside of the wizard. However, \