Skip to content

Commit

Permalink
Add speech api streaming sample.
Browse files Browse the repository at this point in the history
  • Loading branch information
Jerjou Cheng committed Apr 7, 2016
1 parent a25245c commit bd7b58d
Show file tree
Hide file tree
Showing 8 changed files with 232 additions and 4 deletions.
5 changes: 5 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,13 @@ env:
- GOOGLE_CLIENT_SECRETS=${TRAVIS_BUILD_DIR}/testing/resources/client-secrets.json
- GAE_ROOT=${HOME}/.cache/
- secure: Orp9Et2TIwCG/Hf59aa0NUDF1pNcwcS4TFulXX175918cFREOzf/cNZNg+Ui585ZRFjbifZdc858tVuCVd8XlxQPXQgp7bwB7nXs3lby3LYg4+HD83Gaz7KOWxRLWVor6IVn8OxeCzwl6fJkdmffsTTO9csC4yZ7izHr+u7hiO4=
addons:
apt:
packages:
- portaudio19-dev
before_install:
- pip install --upgrade pip wheel virtualenv
# for speech api sample
- openssl aes-256-cbc -k "$secrets_password" -in secrets.tar.enc -out secrets.tar -d
- tar xvf secrets.tar
install:
Expand Down
5 changes: 5 additions & 0 deletions nox.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ def session_tests(session, interpreter, extra_pytest_args=None):
# allows users to run a particular test instead of all of them.
for sample in (session.posargs or
collect_sample_dirs('.', SESSION_TESTS_BLACKLIST)):
# Install additional dependencies if they exist
dirname = sample if os.path.isdir(sample) else os.path.dirname(sample)
for reqfile in list_files(dirname, 'requirements*.txt'):
session.install('-r', reqfile)

session.run(
'py.test', sample,
*pytest_args,
Expand Down
34 changes: 30 additions & 4 deletions speech/api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,36 @@ See the
[Cloud Platform Auth Guide](https://cloud.google.com/docs/authentication#developer_workflow)
for more information.

### Install the dependencies

* If you're running the `speechrest.py` sample:

```sh
$ pip install requirements-speechrest.txt
```

* If you're running the `speech_streaming.py` sample:
```sh
$ pip install requirements-speech_streaming.txt
```
## Run the example
```sh
$ python speechrest.py resources/audio.raw
```
* To run the `speechrest.py` sample:
```sh
$ python speechrest.py resources/audio.raw
```
You should see a response with the transcription result.
* To run the `speech_streaming.py` sample:
```sh
$ python speech_streaming.py
```
You should see a response with the transcription result.
The sample will run in a continuous loop, printing the data and metadata
it receives from the Speech API, which includes alternative transcriptions
of what it hears, and a confidence score. Say "exit" to exit the loop.
4 changes: 4 additions & 0 deletions speech/api/requirements-speech_streaming.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
gcloud==0.12.0
grpcio==0.13.1
PyAudio==0.2.9
grpc-google-cloud-speech==1.0.0
1 change: 1 addition & 0 deletions speech/api/requirements-speechrest.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
google-api-python-client==1.5.0
Binary file added speech/api/resources/quit.raw
Binary file not shown.
120 changes: 120 additions & 0 deletions speech/api/speech_streaming.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/usr/bin/python

import contextlib
import threading

from gcloud.credentials import get_credentials
from google.cloud.speech.v1.cloud_speech_pb2 import * # noqa
from google.rpc import code_pb2
from grpc.beta import implementations
import pyaudio

# Audio recording parameters
RATE = 16000
CHANNELS = 1
CHUNK = RATE // 10 # 100ms

# Keep the request alive for this many seconds
DEADLINE_SECS = 8 * 60 * 60
SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform'


def make_channel(host, port):
"""Creates an SSL channel with auth credentials from the environment."""
# In order to make an https call, use an ssl channel with defaults
ssl_channel = implementations.ssl_channel_credentials(None, None, None)

# Grab application default credentials from the environment
creds = get_credentials().create_scoped([SPEECH_SCOPE])
# Add a plugin to inject the creds into the header
auth_header = (
'Authorization',
'Bearer ' + creds.get_access_token().access_token)
auth_plugin = implementations.metadata_call_credentials(
lambda _, cb: cb([auth_header], None),
name='google_creds')

# compose the two together for both ssl and google auth
composite_channel = implementations.composite_channel_credentials(
ssl_channel, auth_plugin)

return implementations.secure_channel(host, port, composite_channel)


@contextlib.contextmanager
def record_audio(channels, rate, chunk):
"""Opens a recording stream in a context manager."""
audio_interface = pyaudio.PyAudio()
audio_stream = audio_interface.open(
format=pyaudio.paInt16, channels=channels, rate=rate,
input=True, frames_per_buffer=chunk,
)

yield audio_stream

audio_stream.stop_stream()
audio_stream.close()
audio_interface.terminate()


def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
"""Yields `RecognizeRequest`s constructed from a recording audio stream.
Args:
stop_audio: A threading.Event object stops the recording when set.
channels: How many audio channels to record.
rate: The sampling rate.
chunk: Buffer audio into chunks of this size before sending to the api.
"""
with record_audio(channels, rate, chunk) as audio_stream:
# The initial request must contain metadata about the stream, so the
# server knows how to interpret it.
metadata = InitialRecognizeRequest(
encoding='LINEAR16', sample_rate=rate)
audio_request = AudioRequest(content=audio_stream.read(chunk))

yield RecognizeRequest(
initial_request=metadata,
audio_request=audio_request)

while not stop_audio.is_set():
# Subsequent requests can all just have the content
audio_request = AudioRequest(content=audio_stream.read(chunk))

yield RecognizeRequest(audio_request=audio_request)


def listen_print_loop(recognize_stream):
for resp in recognize_stream:
if resp.error.code != code_pb2.OK:
raise RuntimeError('Server error: ' + resp.error.message)

# Display the transcriptions & their alternatives
for result in resp.results:
print(result.alternatives)

# Exit recognition if any of the transcribed phrases could be
# one of our keywords.
if any(alt.confidence > .5 and
(alt.transcript.strip() in ('exit', 'quit'))
for result in resp.results
for alt in result.alternatives):
print('Exiting..')
return


def main():
stop_audio = threading.Event()
with beta_create_Speech_stub(
make_channel('speech.googleapis.com', 443)) as service:
try:
listen_print_loop(
service.Recognize(request_stream(stop_audio), DEADLINE_SECS))
finally:
# Stop the request stream once we're done with the loop - otherwise
# it'll keep going in the thread that the grpc lib makes for it..
stop_audio.set()


if __name__ == '__main__':
main()
67 changes: 67 additions & 0 deletions speech/api/speech_streaming_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright 2016, Google, Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import contextlib
import io
import re
import sys

import pytest

import speech_streaming


class MockAudioStream(object):
def __init__(self, audio_filename, trailing_silence_secs=10):
self.audio_filename = audio_filename
self.silence = io.BytesIO('\0\0' * speech_streaming.RATE *
trailing_silence_secs)

def __enter__(self):
self.audio_file = open(self.audio_filename)
return self

def __exit__(self, *args):
self.audio_file.close()

def __call__(self, *args):
return self

def read(self, num_frames):
# audio is 16-bit samples, whereas python byte is 8-bit
num_bytes = 2 * num_frames
chunk = self.audio_file.read(num_bytes) or self.silence.read(num_bytes)
return chunk


def mock_audio_stream(filename):
@contextlib.contextmanager
def mock_audio_stream(channels, rate, chunk):
with open(filename, 'rb') as audio_file:
yield audio_file

return mock_audio_stream


@pytest.mark.skipif(
sys.version_info >= (3, 0), reason="can't get grpc lib to work in python3")
def test_main(resource, monkeypatch, capsys):
monkeypatch.setattr(
speech_streaming, 'record_audio',
mock_audio_stream(resource('quit.raw')))
monkeypatch.setattr(speech_streaming, 'DEADLINE_SECS', 5)

speech_streaming.main()
out, err = capsys.readouterr()

assert re.search(r'transcript.*"quit"', out, re.DOTALL | re.I)

0 comments on commit bd7b58d

Please sign in to comment.