-
Notifications
You must be signed in to change notification settings - Fork 59
/
ai_voicetalk_local.py
148 lines (117 loc) · 5.11 KB
/
ai_voicetalk_local.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
if __name__ == '__main__':
from RealtimeTTS import TextToAudioStream, CoquiEngine
from RealtimeSTT import AudioToTextRecorder
import llama_cpp
import torch
import json
import os
output = ""
llama_cpp_cuda = None
if torch.cuda.is_available():
try:
print (f"try to import llama_cpp_cuda")
import llama_cpp_cuda
except:
print (f"llama_cpp_cuda import failed")
llama_cpp_cuda = None
elif torch.version.hip:
try:
print (f"try to import llama_cpp")
import llama_cpp
except:
print (f"ROCm is not available")
llama_cpp = None
def llama_cpp_lib():
if llama_cpp_cuda is None:
print ("llama_cpp_lib: return llama_cpp")
return llama_cpp
else:
print ("llama_cpp_lib: return llama_cpp_cuda")
return llama_cpp_cuda
Llama = llama_cpp_lib().Llama
history = []
def replace_placeholders(params, char, user, scenario = ""):
for key in params:
if isinstance(params[key], str):
params[key] = params[key].replace("{char}", char)
params[key] = params[key].replace("{user}", user)
if scenario:
params[key] = params[key].replace("{scenario}", scenario)
return params
def write_file(file_path, content, mode='w'):
with open(file_path, mode) as f:
f.write(content)
def clear_console():
os.system('clear' if os.name == 'posix' else 'cls')
def encode(string):
return model.tokenize(string.encode() if isinstance(string, str) else string)
def count_tokens(string):
return len(encode(string))
def create_prompt():
prompt = f'<|system|>\n{chat_params["system_prompt"]}</s>\n'
if chat_params["initial_message"]:
prompt += f"<|assistant|>\n{chat_params['initial_message']}</s>\n"
return prompt + "".join(history) + "<|assistant|>"
def generate():
global output
output = ""
prompt = create_prompt()
write_file('last_prompt.txt', prompt)
completion_params['prompt'] = prompt
first_chunk = True
for completion_chunk in model.create_completion(**completion_params):
text = completion_chunk['choices'][0]['text']
if first_chunk and text.isspace():
continue
first_chunk = False
output += text
yield text
with open('creation_params.json') as f:
creation_params = json.load(f)
with open('completion_params.json') as f:
completion_params = json.load(f)
with open('chat_params.json') as f:
chat_params = json.load(f)
chat_params = replace_placeholders(chat_params, chat_params["char"], chat_params["user"])
chat_params = replace_placeholders(chat_params, chat_params["char"], chat_params["user"], chat_params["scenario"])
if not completion_params['logits_processor']:
completion_params['logits_processor'] = None
# Initialize AI Model
print("Initializing LLM llama.cpp model ...")
model = Llama(**creation_params)
print("llama.cpp model initialized")
print("Initializing TTS CoquiEngine ...")
# import logging
# logging.basicConfig(format='AI Voicetalk: %(message)s', level=logging.DEBUG)
# coqui_engine = CoquiEngine(cloning_reference_wav="female.wav", language="en", level=logging.DEBUG)
coqui_engine = CoquiEngine(cloning_reference_wav="female.wav", language="en", speed=1.0)
print("Initializing STT AudioToTextRecorder ...")
#stream = TextToAudioStream(coqui_engine, log_characters=True, level=logging.DEBUG)
stream = TextToAudioStream(coqui_engine, log_characters=True)
recorder = AudioToTextRecorder(model="tiny.en", language="en", spinner=False)
print()
while True:
voice_number = input(f"Select voice (1-5): ")
voice_path = os.path.join("voices", f"voice{voice_number}.wav")
coqui_engine.set_voice(voice_path)
stream.feed(f"This is how voice number {voice_number} sounds like").play()
#stream.feed("This is how your selected voice sounds like").play()
accept_voice = input(f"Accept voice (y/n): ")
if accept_voice.lower() != "n":
break
clear_console()
print(f'Scenario: {chat_params["scenario"]}\n\n')
while True:
print(f'>>> {chat_params["user"]}: ', end="", flush=True)
print(f'{(user_text := recorder.text())}\n<<< {chat_params["char"]}: ', end="", flush=True)
history.append(f"<|user|>\n{user_text}</s>\n")
tokens_history = count_tokens(create_prompt())
while tokens_history > 8192 - 500:
history.pop(0)
history.pop(0)
tokens_history = count_tokens(create_prompt())
generator = generate()
stream.feed(generator)
stream.play(fast_sentence_fragment=True, buffer_threshold_seconds=999, minimum_sentence_length=18, log_synthesized_text=True)
history.append(f"<|assistant|>\n{output}</s>\n")
write_file('last_prompt.txt', create_prompt())