This repository has been archived by the owner on Feb 21, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tts.py
62 lines (46 loc) · 2.14 KB
/
tts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def synthesize_text_with_audio_profile(text, effects_profile_id='handset-class-device'):
"""Synthesizes speech from the input string of text."""
from google.cloud import texttospeech
import pyaudio
import wave
import sys
# length of data to read.
chunk = 1024
# create an audio object
p = pyaudio.PyAudio()
client = texttospeech.TextToSpeechClient()
input_text = texttospeech.types.SynthesisInput(ssml=text)
# Note: the voice can also be specified by name.
# Names of voices can be retrieved with client.list_voices().
voice = texttospeech.types.VoiceSelectionParams(language_code='pl-pl', ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE)
# Note: you can pass in multiple effects_profile_id. They will be applied
# in the same order they are provided.
audio_config = texttospeech.types.AudioConfig(
audio_encoding=texttospeech.enums.AudioEncoding.LINEAR16,
effects_profile_id=[effects_profile_id])
response = client.synthesize_speech(input_text, voice, audio_config)
# The response's audio_content is binary.
with open('output.wav', 'wb') as out:
out.write(response.audio_content)
wf = wave.open('output.wav', 'rb')
# open stream based on the wave object which has been input.
stream = p.open(format =
p.get_format_from_width(wf.getsampwidth()),
channels = wf.getnchannels(),
rate = wf.getframerate(),
output = True)
# read data (based on the chunk size)
data = wf.readframes(chunk)
# play stream (looping from beginning of file to the end)
while data != b'':
# writing to the stream is what *actually* plays the sound.
stream.write(data)
data = wf.readframes(chunk)
# cleanup stuff.
stream.close()
p.terminate()
wf.close()
return
if(__name__ == '__main__'):
synthesize_text_with_audio_profile('<speak>Siema Aleks<break time="500ms"/> jestem Alicja z Medicover!</speak>', "handset-class-device")
#synthesize_text_with_audio_profile('Przepraszam, nic nie słyszę.')