diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index efd083158956..e0313abf17ae 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -247,6 +247,26 @@ func RegisterUIRoutes(app *fiber.App, // Render index return c.Render("views/chat", summary) }) + + app.Get("/talk/", auth, func(c *fiber.Ctx) error { + backendConfigs := cl.GetAllBackendConfigs() + + if len(backendConfigs) == 0 { + // If no model is available redirect to the index which suggests how to install models + return c.Redirect("/") + } + + summary := fiber.Map{ + "Title": "LocalAI - Talk", + "ModelsConfig": backendConfigs, + "Model": backendConfigs[0].Name, + "Version": internal.PrintableVersion(), + } + + // Render index + return c.Render("views/talk", summary) + }) + app.Get("/chat/", auth, func(c *fiber.Ctx) error { backendConfigs := cl.GetAllBackendConfigs() diff --git a/core/http/static/talk.js b/core/http/static/talk.js new file mode 100644 index 000000000000..3072da8473af --- /dev/null +++ b/core/http/static/talk.js @@ -0,0 +1,191 @@ + +const recordButton = document.getElementById('recordButton'); +const audioPlayback = document.getElementById('audioPlayback'); +const resetButton = document.getElementById('resetButton'); + +let mediaRecorder; +let audioChunks = []; +let isRecording = false; +let conversationHistory = []; +let resetTimer; + +function getApiKey() { + return document.getElementById('apiKey').value; +} + +function getModel() { + return document.getElementById('modelSelect').value; +} + +function getWhisperModel() { + return document.getElementById('whisperModelSelect').value; +} + +function getTTSModel() { + return document.getElementById('ttsModelSelect').value; +} + +function resetConversation() { + conversationHistory = []; + console.log("Conversation has been reset."); + clearTimeout(resetTimer); +} + +function setResetTimer() { + clearTimeout(resetTimer); + resetTimer = setTimeout(resetConversation, 300000); // Reset after 5 minutes +} + +recordButton.addEventListener('click', toggleRecording); +resetButton.addEventListener('click', resetConversation); + +function toggleRecording() { + if (!isRecording) { + startRecording(); + } else { + stopRecording(); + } +} + +async function startRecording() { + document.getElementById("recording").style.display = "block"; + document.getElementById("resetButton").style.display = "none"; + if (!navigator.mediaDevices) { + alert('MediaDevices API not supported!'); + return; + } + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + mediaRecorder = new MediaRecorder(stream); + audioChunks = []; + mediaRecorder.ondataavailable = (event) => { + audioChunks.push(event.data); + }; + mediaRecorder.start(); + recordButton.textContent = 'Stop Recording'; + // add class bg-red-500 to recordButton + recordButton.classList.add("bg-gray-500"); + + isRecording = true; +} + +function stopRecording() { + mediaRecorder.stop(); + mediaRecorder.onstop = async () => { + document.getElementById("recording").style.display = "none"; + document.getElementById("recordButton").style.display = "none"; + + document.getElementById("loader").style.display = "block"; + const audioBlob = new Blob(audioChunks, { type: 'audio/webm' }); + document.getElementById("statustext").textContent = "Processing audio..."; + const transcript = await sendAudioToWhisper(audioBlob); + console.log("Transcript:", transcript); + document.getElementById("statustext").textContent = "Seems you said: " + transcript+ ". Generating response..."; + const responseText = await sendTextToChatGPT(transcript); + + console.log("Response:", responseText); + document.getElementById("statustext").textContent = "Response generated: '" + responseText + "'. Generating audio response..."; + + const ttsAudio = await getTextToSpeechAudio(responseText); + playAudioResponse(ttsAudio); + + recordButton.textContent = 'Record'; + // remove class bg-red-500 from recordButton + recordButton.classList.remove("bg-gray-500"); + isRecording = false; + document.getElementById("loader").style.display = "none"; + document.getElementById("recordButton").style.display = "block"; + document.getElementById("resetButton").style.display = "block"; + document.getElementById("statustext").textContent = "Press the record button to start recording."; + }; +} + +function submitKey(event) { + event.preventDefault(); + localStorage.setItem("key", document.getElementById("apiKey").value); + document.getElementById("apiKey").blur(); +} + +document.getElementById("key").addEventListener("submit", submitKey); + + +storeKey = localStorage.getItem("key"); +if (storeKey) { + document.getElementById("apiKey").value = storeKey; +} else { + document.getElementById("apiKey").value = null; +} + + +async function sendAudioToWhisper(audioBlob) { + const formData = new FormData(); + formData.append('file', audioBlob); + formData.append('model', getWhisperModel()); + API_KEY = localStorage.getItem("key"); + + const response = await fetch('/v1/audio/transcriptions', { + method: 'POST', + headers: { + 'Authorization': `Bearer ${API_KEY}` + }, + body: formData + }); + + const result = await response.json(); + console.log("Whisper result:", result) + return result.text; +} + +async function sendTextToChatGPT(text) { + conversationHistory.push({ role: "user", content: text }); + API_KEY = localStorage.getItem("key"); + + const response = await fetch('/v1/chat/completions', { + method: 'POST', + headers: { + 'Authorization': `Bearer ${API_KEY}`, + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + model: getModel(), + messages: conversationHistory + }) + }); + + const result = await response.json(); + const responseText = result.choices[0].message.content; + conversationHistory.push({ role: "assistant", content: responseText }); + + setResetTimer(); + + return responseText; +} + +async function getTextToSpeechAudio(text) { + API_KEY = localStorage.getItem("key"); + + const response = await fetch('/v1/audio/speech', { + + method: 'POST', + headers: { + 'Authorization': `Bearer ${API_KEY}`, + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + // "backend": "string", + input: text, + model: getTTSModel(), + // "voice": "string" + }) + }); + + const audioBlob = await response.blob(); + return audioBlob; // Return the blob directly +} + +function playAudioResponse(audioBlob) { + const audioUrl = URL.createObjectURL(audioBlob); + audioPlayback.src = audioUrl; + audioPlayback.hidden = false; + audioPlayback.play(); +} + diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html index be238479f8b3..caa1f3b77c9f 100644 --- a/core/http/views/partials/navbar.html +++ b/core/http/views/partials/navbar.html @@ -20,6 +20,7 @@ Chat Generate images TTS + Talk API @@ -32,6 +33,7 @@ Chat Generate images TTS + Talk API diff --git a/core/http/views/talk.html b/core/http/views/talk.html new file mode 100644 index 000000000000..862520d1fb22 --- /dev/null +++ b/core/http/views/talk.html @@ -0,0 +1,108 @@ + + + {{template "views/partials/head" .}} + + +
+