Skip to content

Commit

Permalink
feat(ui): add page to talk with voice, transcription, and tts (#2520)
Browse files Browse the repository at this point in the history
* feat(ui): add page to talk with voice, transcription, and tts

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Enhance graphics and status reporting

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Better UX by blocking unvalid actions

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
  • Loading branch information
mudler authored Jun 8, 2024
1 parent aae7ad9 commit e96d2d7
Show file tree
Hide file tree
Showing 4 changed files with 321 additions and 0 deletions.
20 changes: 20 additions & 0 deletions core/http/routes/ui.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,26 @@ func RegisterUIRoutes(app *fiber.App,
// Render index
return c.Render("views/chat", summary)
})

app.Get("/talk/", auth, func(c *fiber.Ctx) error {
backendConfigs := cl.GetAllBackendConfigs()

if len(backendConfigs) == 0 {
// If no model is available redirect to the index which suggests how to install models
return c.Redirect("/")
}

summary := fiber.Map{
"Title": "LocalAI - Talk",
"ModelsConfig": backendConfigs,
"Model": backendConfigs[0].Name,
"Version": internal.PrintableVersion(),
}

// Render index
return c.Render("views/talk", summary)
})

app.Get("/chat/", auth, func(c *fiber.Ctx) error {

backendConfigs := cl.GetAllBackendConfigs()
Expand Down
191 changes: 191 additions & 0 deletions core/http/static/talk.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@

const recordButton = document.getElementById('recordButton');
const audioPlayback = document.getElementById('audioPlayback');
const resetButton = document.getElementById('resetButton');

let mediaRecorder;
let audioChunks = [];
let isRecording = false;
let conversationHistory = [];
let resetTimer;

function getApiKey() {
return document.getElementById('apiKey').value;
}

function getModel() {
return document.getElementById('modelSelect').value;
}

function getWhisperModel() {
return document.getElementById('whisperModelSelect').value;
}

function getTTSModel() {
return document.getElementById('ttsModelSelect').value;
}

function resetConversation() {
conversationHistory = [];
console.log("Conversation has been reset.");
clearTimeout(resetTimer);
}

function setResetTimer() {
clearTimeout(resetTimer);
resetTimer = setTimeout(resetConversation, 300000); // Reset after 5 minutes
}

recordButton.addEventListener('click', toggleRecording);
resetButton.addEventListener('click', resetConversation);

function toggleRecording() {
if (!isRecording) {
startRecording();
} else {
stopRecording();
}
}

async function startRecording() {
document.getElementById("recording").style.display = "block";
document.getElementById("resetButton").style.display = "none";
if (!navigator.mediaDevices) {
alert('MediaDevices API not supported!');
return;
}
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaRecorder = new MediaRecorder(stream);
audioChunks = [];
mediaRecorder.ondataavailable = (event) => {
audioChunks.push(event.data);
};
mediaRecorder.start();
recordButton.textContent = 'Stop Recording';
// add class bg-red-500 to recordButton
recordButton.classList.add("bg-gray-500");

isRecording = true;
}

function stopRecording() {
mediaRecorder.stop();
mediaRecorder.onstop = async () => {
document.getElementById("recording").style.display = "none";
document.getElementById("recordButton").style.display = "none";

document.getElementById("loader").style.display = "block";
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
document.getElementById("statustext").textContent = "Processing audio...";
const transcript = await sendAudioToWhisper(audioBlob);
console.log("Transcript:", transcript);
document.getElementById("statustext").textContent = "Seems you said: " + transcript+ ". Generating response...";
const responseText = await sendTextToChatGPT(transcript);

console.log("Response:", responseText);
document.getElementById("statustext").textContent = "Response generated: '" + responseText + "'. Generating audio response...";

const ttsAudio = await getTextToSpeechAudio(responseText);
playAudioResponse(ttsAudio);

recordButton.textContent = 'Record';
// remove class bg-red-500 from recordButton
recordButton.classList.remove("bg-gray-500");
isRecording = false;
document.getElementById("loader").style.display = "none";
document.getElementById("recordButton").style.display = "block";
document.getElementById("resetButton").style.display = "block";
document.getElementById("statustext").textContent = "Press the record button to start recording.";
};
}

function submitKey(event) {
event.preventDefault();
localStorage.setItem("key", document.getElementById("apiKey").value);
document.getElementById("apiKey").blur();
}

document.getElementById("key").addEventListener("submit", submitKey);


storeKey = localStorage.getItem("key");
if (storeKey) {
document.getElementById("apiKey").value = storeKey;
} else {
document.getElementById("apiKey").value = null;
}


async function sendAudioToWhisper(audioBlob) {
const formData = new FormData();
formData.append('file', audioBlob);
formData.append('model', getWhisperModel());
API_KEY = localStorage.getItem("key");

const response = await fetch('/v1/audio/transcriptions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`
},
body: formData
});

const result = await response.json();
console.log("Whisper result:", result)
return result.text;
}

async function sendTextToChatGPT(text) {
conversationHistory.push({ role: "user", content: text });
API_KEY = localStorage.getItem("key");

const response = await fetch('/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: getModel(),
messages: conversationHistory
})
});

const result = await response.json();
const responseText = result.choices[0].message.content;
conversationHistory.push({ role: "assistant", content: responseText });

setResetTimer();

return responseText;
}

async function getTextToSpeechAudio(text) {
API_KEY = localStorage.getItem("key");

const response = await fetch('/v1/audio/speech', {

method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
// "backend": "string",
input: text,
model: getTTSModel(),
// "voice": "string"
})
});

const audioBlob = await response.blob();
return audioBlob; // Return the blob directly
}

function playAudioResponse(audioBlob) {
const audioUrl = URL.createObjectURL(audioBlob);
audioPlayback.src = audioUrl;
audioPlayback.hidden = false;
audioPlayback.play();
}

2 changes: 2 additions & 0 deletions core/http/views/partials/navbar.html
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
<a href="/chat/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
<a href="/text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a>
<a href="/tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a>
<a href="/talk/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
<a href="/swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
</div>
</div>
Expand All @@ -32,6 +33,7 @@
<a href="/chat/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
<a href="/text2image/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-image pr-2"></i> Generate images</a>
<a href="/tts/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-music pr-2"></i> TTS </a>
<a href="/talk/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
<a href="/swagger/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-code pr-2"></i> API</a>
</div>
</div>
Expand Down
108 changes: 108 additions & 0 deletions core/http/views/talk.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
<!doctype html>
<html lang="en">
{{template "views/partials/head" .}}
<script defer src="/static/talk.js"></script>
<style>
body {
overflow: hidden;
}
</style>
<body class="bg-gray-900 text-gray-200" x-data="{ key: $store.chat.key }">
<div class="flex flex-col min-h-screen">

{{template "views/partials/navbar"}}
<div class="chat-container mt-2 mr-2 ml-2 mb-2 bg-gray-800 shadow-lg rounded-lg " >
<!-- Chat Header -->
<div class="border-b border-gray-700 p-4" x-data="{ component: 'menu' }">

<div class="flex items-center justify-center">

<div x-show="component === 'menu'" id="menu">

<button @click="component = 'key'" title="Update API key"
class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
>Set API Key🔑</button>

</div>

<form x-show="component === 'key'" id="key">
<input
type="password"
id="apiKey"
name="apiKey"
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
placeholder="API Key"
x-model.lazy="key"
/>
<button @click="component = 'menu'" type="submit" title="Save API key">
<i class="fa-solid fa-arrow-right"></i>
</button>
</form>
</div>
</div>

<div class="flex items-center justify-center">
<div class="w-full p-4 max-w-md border-t border-gray-700 ">
<div class="bg-gray-700 shadow-md rounded px-8 pt-6 pb-8 mb-4">
<div id="recording" class="" style="display: none;">
<i class="fa-solid fa-microphone animate-pulse text-red-700"></i>
<span class="text-white-700 text-sm font-bold mb-2">Recording... press "Stop recording" to stop</span>
</div>
<div id="loader" class="my-2 loader" style="display: none;"></div>
<div id="statustext" class="my-2 p-2 block text-white-700 text-sm font-bold mb-2" ></div>
<div class="mb-4" >
<label for="modelSelect" class="block text-white-700 text-sm font-bold mb-2">LLM Model:</label>
<select id="modelSelect"
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
>
<option value="" disabled class="text-gray-400" >Select a model</option>

{{ range .ModelsConfig }}
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
{{ end }}
</select>
</div>

<div class="mb-4" >
<label for="whisperModelSelect" class="block text-white-700 text-sm font-bold mb-2">Whisper Model:</label>
<select id="whisperModelSelect"
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"

>
<option value="" disabled class="text-gray-400" >Select a model</option>

{{ range .ModelsConfig }}
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
{{ end }}
</select>
</div>


<div class="mb-4" >
<label for="ttsModelSelect" class="block text-white-700 text-sm font-bold mb-2">TTS Model:</label>
<select id="ttsModelSelect"
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
>
<option value="" disabled class="text-gray-400" >Select a model</option>
{{ range .ModelsConfig }}
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
{{ end }}
</select>
</div>


<button id="recordButton"
class="bg-red-500 hover:bg-red-700 text-white font-bold py-2 px-4 rounded focus:outline-none focus:shadow-outline"
><i class="fa-solid fa-microphone pr-2"></i>Talk</button>
<a id="resetButton"
class="inline-block align-baseline font-bold text-sm text-blue-500 hover:text-blue-800"
href="#"
>Reset conversation</a>
<audio id="audioPlayback" controls hidden></audio>

</div>
</div>
</div>
</div>
</body>
</html>

0 comments on commit e96d2d7

Please sign in to comment.