-
Notifications
You must be signed in to change notification settings - Fork 6.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Jerjou Cheng
committed
Apr 7, 2016
1 parent
a25245c
commit bd7b58d
Showing
8 changed files
with
232 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
gcloud==0.12.0 | ||
grpcio==0.13.1 | ||
PyAudio==0.2.9 | ||
grpc-google-cloud-speech==1.0.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
google-api-python-client==1.5.0 |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
#!/usr/bin/python | ||
|
||
import contextlib | ||
import threading | ||
|
||
from gcloud.credentials import get_credentials | ||
from google.cloud.speech.v1.cloud_speech_pb2 import * # noqa | ||
from google.rpc import code_pb2 | ||
from grpc.beta import implementations | ||
import pyaudio | ||
|
||
# Audio recording parameters | ||
RATE = 16000 | ||
CHANNELS = 1 | ||
CHUNK = RATE // 10 # 100ms | ||
|
||
# Keep the request alive for this many seconds | ||
DEADLINE_SECS = 8 * 60 * 60 | ||
SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform' | ||
|
||
|
||
def make_channel(host, port): | ||
"""Creates an SSL channel with auth credentials from the environment.""" | ||
# In order to make an https call, use an ssl channel with defaults | ||
ssl_channel = implementations.ssl_channel_credentials(None, None, None) | ||
|
||
# Grab application default credentials from the environment | ||
creds = get_credentials().create_scoped([SPEECH_SCOPE]) | ||
# Add a plugin to inject the creds into the header | ||
auth_header = ( | ||
'Authorization', | ||
'Bearer ' + creds.get_access_token().access_token) | ||
auth_plugin = implementations.metadata_call_credentials( | ||
lambda _, cb: cb([auth_header], None), | ||
name='google_creds') | ||
|
||
# compose the two together for both ssl and google auth | ||
composite_channel = implementations.composite_channel_credentials( | ||
ssl_channel, auth_plugin) | ||
|
||
return implementations.secure_channel(host, port, composite_channel) | ||
|
||
|
||
@contextlib.contextmanager | ||
def record_audio(channels, rate, chunk): | ||
"""Opens a recording stream in a context manager.""" | ||
audio_interface = pyaudio.PyAudio() | ||
audio_stream = audio_interface.open( | ||
format=pyaudio.paInt16, channels=channels, rate=rate, | ||
input=True, frames_per_buffer=chunk, | ||
) | ||
|
||
yield audio_stream | ||
|
||
audio_stream.stop_stream() | ||
audio_stream.close() | ||
audio_interface.terminate() | ||
|
||
|
||
def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK): | ||
"""Yields `RecognizeRequest`s constructed from a recording audio stream. | ||
Args: | ||
stop_audio: A threading.Event object stops the recording when set. | ||
channels: How many audio channels to record. | ||
rate: The sampling rate. | ||
chunk: Buffer audio into chunks of this size before sending to the api. | ||
""" | ||
with record_audio(channels, rate, chunk) as audio_stream: | ||
# The initial request must contain metadata about the stream, so the | ||
# server knows how to interpret it. | ||
metadata = InitialRecognizeRequest( | ||
encoding='LINEAR16', sample_rate=rate) | ||
audio_request = AudioRequest(content=audio_stream.read(chunk)) | ||
|
||
yield RecognizeRequest( | ||
initial_request=metadata, | ||
audio_request=audio_request) | ||
|
||
while not stop_audio.is_set(): | ||
# Subsequent requests can all just have the content | ||
audio_request = AudioRequest(content=audio_stream.read(chunk)) | ||
|
||
yield RecognizeRequest(audio_request=audio_request) | ||
|
||
|
||
def listen_print_loop(recognize_stream): | ||
for resp in recognize_stream: | ||
if resp.error.code != code_pb2.OK: | ||
raise RuntimeError('Server error: ' + resp.error.message) | ||
|
||
# Display the transcriptions & their alternatives | ||
for result in resp.results: | ||
print(result.alternatives) | ||
|
||
# Exit recognition if any of the transcribed phrases could be | ||
# one of our keywords. | ||
if any(alt.confidence > .5 and | ||
(alt.transcript.strip() in ('exit', 'quit')) | ||
for result in resp.results | ||
for alt in result.alternatives): | ||
print('Exiting..') | ||
return | ||
|
||
|
||
def main(): | ||
stop_audio = threading.Event() | ||
with beta_create_Speech_stub( | ||
make_channel('speech.googleapis.com', 443)) as service: | ||
try: | ||
listen_print_loop( | ||
service.Recognize(request_stream(stop_audio), DEADLINE_SECS)) | ||
finally: | ||
# Stop the request stream once we're done with the loop - otherwise | ||
# it'll keep going in the thread that the grpc lib makes for it.. | ||
stop_audio.set() | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
# Copyright 2016, Google, Inc. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import contextlib | ||
import io | ||
import re | ||
import sys | ||
|
||
import pytest | ||
|
||
import speech_streaming | ||
|
||
|
||
class MockAudioStream(object): | ||
def __init__(self, audio_filename, trailing_silence_secs=10): | ||
self.audio_filename = audio_filename | ||
self.silence = io.BytesIO('\0\0' * speech_streaming.RATE * | ||
trailing_silence_secs) | ||
|
||
def __enter__(self): | ||
self.audio_file = open(self.audio_filename) | ||
return self | ||
|
||
def __exit__(self, *args): | ||
self.audio_file.close() | ||
|
||
def __call__(self, *args): | ||
return self | ||
|
||
def read(self, num_frames): | ||
# audio is 16-bit samples, whereas python byte is 8-bit | ||
num_bytes = 2 * num_frames | ||
chunk = self.audio_file.read(num_bytes) or self.silence.read(num_bytes) | ||
return chunk | ||
|
||
|
||
def mock_audio_stream(filename): | ||
@contextlib.contextmanager | ||
def mock_audio_stream(channels, rate, chunk): | ||
with open(filename, 'rb') as audio_file: | ||
yield audio_file | ||
|
||
return mock_audio_stream | ||
|
||
|
||
@pytest.mark.skipif( | ||
sys.version_info >= (3, 0), reason="can't get grpc lib to work in python3") | ||
def test_main(resource, monkeypatch, capsys): | ||
monkeypatch.setattr( | ||
speech_streaming, 'record_audio', | ||
mock_audio_stream(resource('quit.raw'))) | ||
monkeypatch.setattr(speech_streaming, 'DEADLINE_SECS', 5) | ||
|
||
speech_streaming.main() | ||
out, err = capsys.readouterr() | ||
|
||
assert re.search(r'transcript.*"quit"', out, re.DOTALL | re.I) |