diff --git a/docs/index.rst b/docs/index.rst index fbf22306ab75..66e28a5950f9 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -173,6 +173,10 @@ speech-usage Client + speech-encoding + speech-metadata + speech-operation + speech-transcript .. toctree:: :maxdepth: 0 diff --git a/docs/speech-client.rst b/docs/speech-client.rst index 1162e5454a97..1e14b24eeac5 100644 --- a/docs/speech-client.rst +++ b/docs/speech-client.rst @@ -1,5 +1,5 @@ Speech Client -================ +============= .. automodule:: google.cloud.speech.client :members: diff --git a/docs/speech-encoding.rst b/docs/speech-encoding.rst new file mode 100644 index 000000000000..affe80a4ebd2 --- /dev/null +++ b/docs/speech-encoding.rst @@ -0,0 +1,7 @@ +Speech Encoding +=============== + +.. automodule:: google.cloud.speech.encoding + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/speech-metadata.rst b/docs/speech-metadata.rst new file mode 100644 index 000000000000..575094a2d0f2 --- /dev/null +++ b/docs/speech-metadata.rst @@ -0,0 +1,7 @@ +Speech Metadata +=============== + +.. automodule:: google.cloud.speech.metadata + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/speech-operation.rst b/docs/speech-operation.rst new file mode 100644 index 000000000000..5c0ec3b92b12 --- /dev/null +++ b/docs/speech-operation.rst @@ -0,0 +1,7 @@ +Speech Operation +================ + +.. automodule:: google.cloud.speech.operation + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/speech-transcript.rst b/docs/speech-transcript.rst new file mode 100644 index 000000000000..f71f72bd2645 --- /dev/null +++ b/docs/speech-transcript.rst @@ -0,0 +1,7 @@ +Speech Transcript +================= + +.. automodule:: google.cloud.speech.transcript + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/speech-usage.rst b/docs/speech-usage.rst index 57e274608b40..509c59b1b579 100644 --- a/docs/speech-usage.rst +++ b/docs/speech-usage.rst @@ -2,7 +2,8 @@ Using the API ============= The `Google Speech`_ API enables developers to convert audio to text. -The API recognizes over 80 languages and variants, to support your global user base. +The API recognizes over 80 languages and variants, to support your global user +base. .. warning:: @@ -30,11 +31,41 @@ create an instance of :class:`~google.cloud.speech.client.Client`. >>> client = speech.Client() +Asychronous Recognition +----------------------- + +The :meth:`~google.cloud.speech.Client.async_recognize` sends audio data to the +Speech API and initiates a Long Running Operation. Using this operation, you +can periodically poll for recognition results. Use asynchronous requests for +audio data of any duration up to 80 minutes. + +See: `Speech Asynchronous Recognize`_ + + + .. code-block:: python + + >>> import time + >>> operation = client.async_recognize( + ... None, 'gs://my-bucket/recording.flac', + ... 'FLAC', 16000, max_alternatives=2) + >>> retry_count = 100 + >>> while retry_count > 0 and not operation.complete: + ... retry_count -= 1 + ... time.sleep(10) + ... operation.poll() # API call + >>> operation.complete + True + >>> operation.results[0].transcript + 'how old is the Brooklyn Bridge' + >>> operation.results[0].confidence + 0.98267895 + + Synchronous Recognition ----------------------- -The :meth:`~google.cloud.speech.Client.sync_recognize` method converts speech data to text -and returns alternative text transcriptons. +The :meth:`~google.cloud.speech.Client.sync_recognize` method converts speech +data to text and returns alternative text transcriptons. .. code-block:: python @@ -53,3 +84,4 @@ and returns alternative text transcriptons. confidence: 0 .. _sync_recognize: https://cloud.google.com/speech/reference/rest/v1beta1/speech/syncrecognize +.. _Speech Asynchronous Recognize: https://cloud.google.com/speech/reference/rest/v1beta1/speech/asyncrecognize diff --git a/speech/google/cloud/speech/client.py b/speech/google/cloud/speech/client.py index 7f30b06c05ba..8226fd76c644 100644 --- a/speech/google/cloud/speech/client.py +++ b/speech/google/cloud/speech/client.py @@ -19,30 +19,8 @@ from google.cloud._helpers import _to_bytes from google.cloud import client as client_module from google.cloud.speech.connection import Connection - - -class Encoding(object): - """Audio encoding types. - - See: - https://cloud.google.com/speech/reference/rest/v1beta1/\ - RecognitionConfig#AudioEncoding - """ - - LINEAR16 = 'LINEAR16' - """LINEAR16 encoding type.""" - - FLAC = 'FLAC' - """FLAC encoding type.""" - - MULAW = 'MULAW' - """MULAW encoding type.""" - - AMR = 'AMR' - """AMR encoding type.""" - - AMR_WB = 'AMR_WB' - """AMR_WB encoding type.""" +from google.cloud.speech.encoding import Encoding +from google.cloud.speech.operation import Operation class Client(client_module.Client): @@ -68,6 +46,81 @@ class Client(client_module.Client): _connection_class = Connection + def async_recognize(self, content, source_uri, encoding, sample_rate, + language_code=None, max_alternatives=None, + profanity_filter=None, speech_context=None): + """Asychronous Recognize request to Google Speech API. + + .. _async_recognize: https://cloud.google.com/speech/reference/\ + rest/v1beta1/speech/asyncrecognize + + See `async_recognize`_. + + :type content: bytes + :param content: Byte stream of audio. + + :type source_uri: str + :param source_uri: URI that points to a file that contains audio + data bytes as specified in RecognitionConfig. + Currently, only Google Cloud Storage URIs are + supported, which must be specified in the following + format: ``gs://bucket_name/object_name``. + + :type encoding: str + :param encoding: encoding of audio data sent in all RecognitionAudio + messages, can be one of: :attr:`~.Encoding.LINEAR16`, + :attr:`~.Encoding.FLAC`, :attr:`~.Encoding.MULAW`, + :attr:`~.Encoding.AMR`, :attr:`~.Encoding.AMR_WB` + + :type sample_rate: int + :param sample_rate: Sample rate in Hertz of the audio data sent in all + requests. Valid values are: 8000-48000. For best + results, set the sampling rate of the audio source + to 16000 Hz. If that's not possible, use the + native sample rate of the audio source (instead of + re-sampling). + + :type language_code: str + :param language_code: (Optional) The language of the supplied audio as + BCP-47 language tag. Example: ``'en-GB'``. + If omitted, defaults to ``'en-US'``. + + :type max_alternatives: int + :param max_alternatives: (Optional) Maximum number of recognition + hypotheses to be returned. The server may + return fewer than maxAlternatives. + Valid values are 0-30. A value of 0 or 1 + will return a maximum of 1. Defaults to 1 + + :type profanity_filter: bool + :param profanity_filter: If True, the server will attempt to filter + out profanities, replacing all but the + initial character in each filtered word with + asterisks, e.g. ``'f***'``. If False or + omitted, profanities won't be filtered out. + + :type speech_context: list + :param speech_context: A list of strings (max 50) containing words and + phrases "hints" so that the speech recognition + is more likely to recognize them. This can be + used to improve the accuracy for specific words + and phrases. This can also be used to add new + words to the vocabulary of the recognizer. + + :rtype: `~google.cloud.speech.operation.Operation` + :returns: ``Operation`` for asynchronous request to Google Speech API. + """ + + data = _build_request_data(content, source_uri, encoding, + sample_rate, language_code, + max_alternatives, profanity_filter, + speech_context) + + api_response = self.connection.api_request( + method='POST', path='speech:asyncrecognize', data=data) + + return Operation.from_api_repr(self, api_response) + def sync_recognize(self, content, source_uri, encoding, sample_rate, language_code=None, max_alternatives=None, profanity_filter=None, speech_context=None): @@ -139,44 +192,115 @@ def sync_recognize(self, content, source_uri, encoding, sample_rate, between 0 and 1. """ - if content is None and source_uri is None: - raise ValueError('content and source_uri cannot be both ' - 'equal to None') - - if content is not None and source_uri is not None: - raise ValueError('content and source_uri cannot be both ' - 'different from None') + data = _build_request_data(content, source_uri, encoding, + sample_rate, language_code, + max_alternatives, profanity_filter, + speech_context) - if encoding is None: - raise ValueError('encoding cannot be None') - if sample_rate is None: - raise ValueError('sample_rate cannot be None') + api_response = self.connection.api_request( + method='POST', path='speech:syncrecognize', data=data) - if content is not None: - audio = {'content': b64encode(_to_bytes(content))} + if len(api_response['results']) == 1: + return api_response['results'][0]['alternatives'] else: - audio = {'uri': source_uri} + raise ValueError('result in api should have length 1') - config = {'encoding': encoding, 'sampleRate': sample_rate} - if language_code is not None: - config['languageCode'] = language_code - if max_alternatives is not None: - config['maxAlternatives'] = max_alternatives - if profanity_filter is not None: - config['profanityFilter'] = profanity_filter - if speech_context is not None: - config['speechContext'] = {'phrases': speech_context} +def _build_request_data(content, source_uri, encoding, sample_rate, + language_code=None, max_alternatives=None, + profanity_filter=None, speech_context=None): + """Builds the request data before making API request. + + :type content: bytes + :param content: Byte stream of audio. + + :type source_uri: str + :param source_uri: URI that points to a file that contains audio + data bytes as specified in RecognitionConfig. + Currently, only Google Cloud Storage URIs are + supported, which must be specified in the following + format: ``gs://bucket_name/object_name``. + + :type encoding: str + :param encoding: encoding of audio data sent in all RecognitionAudio + messages, can be one of: :attr:`~.Encoding.LINEAR16`, + :attr:`~.Encoding.FLAC`, :attr:`~.Encoding.MULAW`, + :attr:`~.Encoding.AMR`, :attr:`~.Encoding.AMR_WB` + + :type sample_rate: int + :param sample_rate: Sample rate in Hertz of the audio data sent in all + requests. Valid values are: 8000-48000. For best + results, set the sampling rate of the audio source + to 16000 Hz. If that's not possible, use the + native sample rate of the audio source (instead of + re-sampling). + + :type language_code: str + :param language_code: (Optional) The language of the supplied audio as + BCP-47 language tag. Example: ``'en-GB'``. + If omitted, defaults to ``'en-US'``. + + :type max_alternatives: int + :param max_alternatives: (Optional) Maximum number of recognition + hypotheses to be returned. The server may + return fewer than maxAlternatives. + Valid values are 0-30. A value of 0 or 1 + will return a maximum of 1. Defaults to 1 + + :type profanity_filter: bool + :param profanity_filter: If True, the server will attempt to filter + out profanities, replacing all but the + initial character in each filtered word with + asterisks, e.g. ``'f***'``. If False or + omitted, profanities won't be filtered out. + + :type speech_context: list + :param speech_context: A list of strings (max 50) containing words and + phrases "hints" so that the speech recognition + is more likely to recognize them. This can be + used to improve the accuracy for specific words + and phrases. This can also be used to add new + words to the vocabulary of the recognizer. + + :rtype: dict + :returns: Dictionary with required data for Google Speech API. + """ + if content is None and source_uri is None: + raise ValueError('content and source_uri cannot be both ' + 'equal to None') - data = { - 'audio': audio, - 'config': config, - } + if content is not None and source_uri is not None: + raise ValueError('content and source_uri cannot be both ' + 'different from None') - api_response = self.connection.api_request( - method='POST', path='syncrecognize', data=data) + if encoding is None: + raise ValueError('encoding cannot be None') - if len(api_response['results']) == 1: - return api_response['results'][0]['alternatives'] - else: - raise ValueError('result in api should have length 1') + encoding_value = getattr(Encoding, encoding) + + if sample_rate is None: + raise ValueError('sample_rate cannot be None') + + if content is not None: + audio = {'content': b64encode(_to_bytes(content))} + else: + audio = {'uri': source_uri} + + config = {'encoding': encoding_value, + 'sampleRate': sample_rate} + + if language_code is not None: + config['languageCode'] = language_code + if max_alternatives is not None: + config['maxAlternatives'] = max_alternatives + if profanity_filter is not None: + config['profanityFilter'] = profanity_filter + if speech_context is not None: + config['speechContext'] = {'phrases': speech_context} + + data = { + 'audio': audio, + 'config': config, + } + + return data diff --git a/speech/google/cloud/speech/connection.py b/speech/google/cloud/speech/connection.py index 3decf19290d8..d74d729344e8 100644 --- a/speech/google/cloud/speech/connection.py +++ b/speech/google/cloud/speech/connection.py @@ -26,7 +26,7 @@ class Connection(base_connection.JSONConnection): API_VERSION = 'v1beta1' """The version of the API, used in building the API call's URL.""" - API_URL_TEMPLATE = '{api_base_url}/{api_version}/speech:{path}' + API_URL_TEMPLATE = '{api_base_url}/{api_version}/{path}' """A template for the URL of a particular API call.""" SCOPE = ('https://www.googleapis.com/auth/cloud-platform',) diff --git a/speech/google/cloud/speech/encoding.py b/speech/google/cloud/speech/encoding.py new file mode 100644 index 000000000000..4fdaa3367834 --- /dev/null +++ b/speech/google/cloud/speech/encoding.py @@ -0,0 +1,39 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Encodings used by the Google Cloud Speech API.""" + + +class Encoding(object): + """Audio encoding types. + + See: + https://cloud.google.com/speech/reference/rest/v1beta1/\ + RecognitionConfig#AudioEncoding + """ + + LINEAR16 = 'LINEAR16' + """LINEAR16 encoding type.""" + + FLAC = 'FLAC' + """FLAC encoding type.""" + + MULAW = 'MULAW' + """MULAW encoding type.""" + + AMR = 'AMR' + """AMR encoding type.""" + + AMR_WB = 'AMR_WB' + """AMR_WB encoding type.""" diff --git a/speech/google/cloud/speech/metadata.py b/speech/google/cloud/speech/metadata.py new file mode 100644 index 000000000000..89a8ff3369d9 --- /dev/null +++ b/speech/google/cloud/speech/metadata.py @@ -0,0 +1,78 @@ +# Copyright 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Metadata representation from Google Speech API""" + +from google.cloud._helpers import _rfc3339_to_datetime + + +class Metadata(object): + """Representation of metadata from a Google Speech API Operation. + + :type last_update: datetime + :param last_update: When the Speech operation was last updated. + + :type start_time: datetime + :param start_time: When the Speech operation was started. + + :type progress_percent: int + :param progress_percent: Percentage of operation that has been completed. + """ + def __init__(self, last_update, start_time, progress_percent): + self._last_update = last_update + self._start_time = start_time + self._progress_percent = progress_percent + + @classmethod + def from_api_repr(cls, response): + """Factory: construct representation of operation metadata. + + :type response: dict + :param response: Dictionary containing operation metadata. + + :rtype: :class:`~google.cloud.speech.metadata.Metadata` + :returns: Instance of operation Metadata. + """ + last_update = _rfc3339_to_datetime(response['lastUpdateTime']) + start_time = _rfc3339_to_datetime(response['startTime']) + progress_percent = response['progressPercent'] + + return cls(last_update, start_time, progress_percent) + + @property + def last_update(self): + """Last time operation was updated. + + :rtype: datetime + :returns: Datetime when operation was last updated. + """ + return self._last_update + + @property + def start_time(self): + """Start time of operation. + + :rtype: datetime + :returns: Datetime when operation was started. + """ + return self._start_time + + @property + def progress_percent(self): + """Progress percentage completed of operation. + + :rtype: int + :returns: Percentage of operation completed. + """ + return self._progress_percent diff --git a/speech/google/cloud/speech/operation.py b/speech/google/cloud/speech/operation.py new file mode 100644 index 000000000000..69614b16cb7f --- /dev/null +++ b/speech/google/cloud/speech/operation.py @@ -0,0 +1,132 @@ +# Copyright 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Long running operation representation for Google Speech API""" + +from google.cloud.speech.metadata import Metadata +from google.cloud.speech.transcript import Transcript +from google.cloud import operation + + +class Operation(operation.Operation): + """Representation of a Google API Long-Running Operation. + + :type client: :class:`~google.cloud.speech.client.Client` + :param client: Instance of speech client. + + :type name: int + :param name: ID assigned to an operation. + + :type complete: bool + :param complete: True if operation is complete, else False. + + :type metadata: :class:`~google.cloud.speech.metadata.Metadata` + :param metadata: Instance of ``Metadata`` with operation information. + + :type results: dict + :param results: Dictionary with transcript and score of operation. + """ + def __init__(self, client, name, complete=False, metadata=None, + results=None): + self.client = client + self.name = name + self._complete = complete + self._metadata = metadata + self._results = results + + @classmethod + def from_api_repr(cls, client, response): + """Factory: construct an instance from Google Speech API. + + :type client: :class:`~google.cloud.speech.client.Client` + :param client: Instance of speech client. + + :type response: dict + :param response: Dictionary response from Google Speech Operations API. + + :rtype: :class:`Operation` + :returns: Instance of `~google.cloud.speech.operations.Operation`. + """ + name = response['name'] + complete = response.get('done', False) + + operation_instance = cls(client, name, complete) + operation_instance._update(response) + return operation_instance + + @property + def complete(self): + """Completion state of the `Operation`. + + :rtype: bool + :returns: True if already completed, else false. + """ + return self._complete + + @property + def metadata(self): + """Metadata of operation. + + :rtype: :class:`~google.cloud.speech.metadata.Metadata` + :returns: Instance of ``Metadata``. + """ + return self._metadata + + @property + def results(self): + """Results dictionary with transcript information. + + :rtype: dict + :returns: Dictionary with transcript and confidence score. + """ + return self._results + + def poll(self): + """Check if the operation has finished. + + :rtype: bool + :returns: A boolean indicating if the current operation has completed. + :raises: :class:`ValueError ` if the operation + has already completed. + """ + if self.complete: + raise ValueError('The operation has completed.') + + path = 'operations/%s' % (self.name,) + api_response = self.client.connection.api_request(method='GET', + path=path) + self._update(api_response) + return self.complete + + def _update(self, response): + """Update Operation instance with latest data from Speech API. + + .. _speech_operations: https://cloud.google.com/speech/reference/\ + rest/v1beta1/operations + + :type response: dict + :param response: Response from Speech API Operations endpoint. + See: `speech_operations`_. + """ + metadata = response.get('metadata', None) + raw_results = response.get('response', {}).get('results', None) + results = [] + if raw_results: + for result in raw_results[0]['alternatives']: + results.append(Transcript(result)) + if metadata: + self._metadata = Metadata.from_api_repr(metadata) + + self._results = results + self._complete = response.get('done', False) diff --git a/speech/google/cloud/speech/transcript.py b/speech/google/cloud/speech/transcript.py new file mode 100644 index 000000000000..bbe915396c5c --- /dev/null +++ b/speech/google/cloud/speech/transcript.py @@ -0,0 +1,44 @@ +# Copyright 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Transcript representation for Google Speech API""" + + +class Transcript(object): + """Representation of Speech Transcripts + + :type result: dict + :param result: Dictionary of transcript and confidence of recognition. + """ + def __init__(self, result): + self._transcript = result.get('transcript') + self._confidence = result.get('confidence') + + @property + def transcript(self): + """Transcript text from audio. + + :rtype: str + :returns: Text detected in audio. + """ + return self._transcript + + @property + def confidence(self): + """Confidence score for recognized speech. + + :rtype: float + :returns: Confidence score of recognized speech [0-1]. + """ + return self._confidence diff --git a/speech/unit_tests/_fixtures.py b/speech/unit_tests/_fixtures.py index 9665fef535d8..7980ed862038 100644 --- a/speech/unit_tests/_fixtures.py +++ b/speech/unit_tests/_fixtures.py @@ -28,3 +28,45 @@ SYNC_RECOGNIZE_EMPTY_RESPONSE = { 'results': [], } + +ASYNC_RECOGNIZE_RESPONSE = { + 'name': '123456789' +} + +OPERATION_COMPLETE_RESPONSE = { + 'name': '123456789', + 'metadata': { + '@type': ('type.googleapis.com/' + 'google.cloud.speech.v1beta1.AsyncRecognizeMetadata'), + 'progressPercent': 100, + 'startTime': '2016-09-22T17:52:25.536964Z', + 'lastUpdateTime': '2016-09-22T17:52:27.802902Z', + }, + 'done': True, + 'response': { + '@type': ('type.googleapis.com/' + 'google.cloud.speech.v1beta1.AsyncRecognizeResponse'), + 'results': [ + { + 'alternatives': [ + { + 'transcript': 'how old is the Brooklyn Bridge', + 'confidence': 0.98267895 + }, + ], + }, + ], + }, +} + +OPERATION_INCOMPLETE_RESPONSE = { + 'name': '123456789', + 'metadata': { + '@type': ('type.googleapis.com/' + 'google.cloud.speech.v1beta1.AsyncRecognizeMetadata'), + 'progressPercent': 27, + 'startTime': '2016-09-22T17:52:25.536964Z', + 'lastUpdateTime': '2016-09-22T17:52:27.802902Z', + }, + 'done': False, +} diff --git a/speech/unit_tests/test_client.py b/speech/unit_tests/test_client.py index 53e1eb67b89e..200562ea2661 100644 --- a/speech/unit_tests/test_client.py +++ b/speech/unit_tests/test_client.py @@ -40,7 +40,7 @@ def test_ctor(self): def test_sync_recognize_content_with_optional_parameters(self): import base64 from google.cloud._helpers import _to_bytes - from google.cloud.speech.client import Encoding + from google.cloud.speech.encoding import Encoding from unit_tests._fixtures import SYNC_RECOGNIZE_RESPONSE _AUDIO_CONTENT = _to_bytes('/9j/4QNURXhpZgAASUkq') @@ -82,7 +82,7 @@ def test_sync_recognize_content_with_optional_parameters(self): self.assertEqual(len(req), 3) self.assertEqual(req['data'], REQUEST) self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], 'syncrecognize') + self.assertEqual(req['path'], 'speech:syncrecognize') expected = SYNC_RECOGNIZE_RESPONSE['results'][0]['alternatives'] self.assertEqual(response, expected) @@ -116,13 +116,13 @@ def test_sync_recognize_source_uri_without_optional_parameters(self): self.assertEqual(len(req), 3) self.assertEqual(req['data'], REQUEST) self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], 'syncrecognize') + self.assertEqual(req['path'], 'speech:syncrecognize') expected = SYNC_RECOGNIZE_RESPONSE['results'][0]['alternatives'] self.assertEqual(response, expected) def test_sync_recognize_without_content_or_source_uri(self): - from google.cloud.speech.client import Encoding + from google.cloud.speech.encoding import Encoding credentials = _Credentials() client = self._makeOne(credentials=credentials) @@ -132,7 +132,7 @@ def test_sync_recognize_without_content_or_source_uri(self): def test_sync_recognize_with_content_and_source_uri(self): from google.cloud._helpers import _to_bytes - from google.cloud.speech.client import Encoding + from google.cloud.speech.encoding import Encoding _AUDIO_CONTENT = _to_bytes('/9j/4QNURXhpZgAASUkq') credentials = _Credentials() @@ -151,7 +151,7 @@ def test_sync_recognize_without_encoding(self): self.SAMPLE_RATE) def test_sync_recognize_without_samplerate(self): - from google.cloud.speech.client import Encoding + from google.cloud.speech.encoding import Encoding credentials = _Credentials() client = self._makeOne(credentials=credentials) @@ -172,6 +172,25 @@ def test_sync_recognize_with_empty_results(self): client.sync_recognize(None, self.AUDIO_SOURCE_URI, Encoding.FLAC, self.SAMPLE_RATE) + def test_async_recognize(self): + from unit_tests._fixtures import ASYNC_RECOGNIZE_RESPONSE + from google.cloud.speech.encoding import Encoding + from google.cloud.speech.operation import Operation + RETURNED = ASYNC_RECOGNIZE_RESPONSE + + credentials = _Credentials() + client = self._makeOne(credentials=credentials) + client.connection = _Connection(RETURNED) + + encoding = Encoding.FLAC + + operation = client.async_recognize(None, self.AUDIO_SOURCE_URI, + encoding, + self.SAMPLE_RATE) + self.assertIsInstance(operation, Operation) + self.assertFalse(operation.complete) + self.assertIsNone(operation.metadata) + class _Credentials(object): diff --git a/speech/unit_tests/test_connection.py b/speech/unit_tests/test_connection.py index ea9471dc4937..0de94cb1d7c1 100644 --- a/speech/unit_tests/test_connection.py +++ b/speech/unit_tests/test_connection.py @@ -26,11 +26,11 @@ def _makeOne(self, *args, **kw): def test_build_api_url(self): conn = self._makeOne() + method = 'speech:syncrecognize' uri = '/'.join([ conn.API_BASE_URL, conn.API_VERSION, - 'speech', + method, ]) - method = 'syncrecognize' - uri += ':' + method + self.assertEqual(conn.build_api_url(method), uri) diff --git a/speech/unit_tests/test_metadata.py b/speech/unit_tests/test_metadata.py new file mode 100644 index 000000000000..8e1dcd03e733 --- /dev/null +++ b/speech/unit_tests/test_metadata.py @@ -0,0 +1,50 @@ +# Copyright 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + + +class TestMetadata(unittest.TestCase): + OPERATION_ID = 123456789 + + def _getTargetClass(self): + from google.cloud.speech.metadata import Metadata + return Metadata + + def _makeOne(self, *args, **kwargs): + return self._getTargetClass()(*args, **kwargs) + + def test_ctor(self): + last_update = 'last_update' + start_time = 'start_time' + progress_percent = 23 + metadata = self._makeOne(last_update, start_time, progress_percent) + self.assertEqual('last_update', metadata.last_update) + self.assertEqual('start_time', metadata.start_time) + self.assertEqual(23, metadata.progress_percent) + + def test_from_api_repr(self): + import datetime + from google.cloud._helpers import _rfc3339_to_datetime + from unit_tests._fixtures import OPERATION_INCOMPLETE_RESPONSE as DATA + METADATA = DATA['metadata'] + + start_time = _rfc3339_to_datetime(METADATA['startTime']) + last_update = _rfc3339_to_datetime(METADATA['lastUpdateTime']) + metadata = self._getTargetClass().from_api_repr(METADATA) + self.assertIsInstance(metadata.last_update, datetime.datetime) + self.assertEqual(last_update, metadata.last_update) + self.assertIsInstance(metadata.start_time, datetime.datetime) + self.assertEqual(start_time, metadata.start_time) + self.assertEqual(27, metadata.progress_percent) diff --git a/speech/unit_tests/test_operation.py b/speech/unit_tests/test_operation.py new file mode 100644 index 000000000000..2ebce2c75ce1 --- /dev/null +++ b/speech/unit_tests/test_operation.py @@ -0,0 +1,120 @@ +# Copyright 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + + +class OperationTests(unittest.TestCase): + + OPERATION_NAME = '123456789' + + def _getTargetClass(self): + from google.cloud.speech.operation import Operation + return Operation + + def _makeOne(self, *args, **kwargs): + return self._getTargetClass()(*args, **kwargs) + + def test_ctor_defaults(self): + client = _Client() + operation = self._makeOne(client, self.OPERATION_NAME) + self.assertEqual(operation.name, '123456789') + self.assertFalse(operation.complete) + self.assertIsNone(operation.metadata) + self.assertIsNone(operation.results) + + def test_from_api_repr(self): + from unit_tests._fixtures import OPERATION_COMPLETE_RESPONSE + from google.cloud.speech.operation import Transcript + from google.cloud.speech.metadata import Metadata + RESPONSE = OPERATION_COMPLETE_RESPONSE + + client = _Client() + operation = self._getTargetClass().from_api_repr(client, RESPONSE) + + self.assertEqual('123456789', operation.name) + self.assertTrue(operation.complete) + + self.assertIsInstance(operation.results[0], Transcript) + self.assertEqual(operation.results[0].transcript, + 'how old is the Brooklyn Bridge') + self.assertEqual(operation.results[0].confidence, 0.98267895) + self.assertTrue(operation.complete) + self.assertIsInstance(operation.metadata, Metadata) + self.assertEqual(operation.metadata.progress_percent, 100) + + def test_update_response(self): + from unit_tests._fixtures import ASYNC_RECOGNIZE_RESPONSE + from unit_tests._fixtures import OPERATION_COMPLETE_RESPONSE + RESPONSE = ASYNC_RECOGNIZE_RESPONSE + + client = _Client() + operation = self._getTargetClass().from_api_repr(client, RESPONSE) + self.assertEqual(operation.name, '123456789') + operation._update(OPERATION_COMPLETE_RESPONSE) + self.assertTrue(operation.complete) + + def test_poll(self): + from google.cloud.speech.operation import Metadata + from unit_tests._fixtures import ASYNC_RECOGNIZE_RESPONSE + from unit_tests._fixtures import OPERATION_COMPLETE_RESPONSE + RESPONSE = ASYNC_RECOGNIZE_RESPONSE + client = _Client() + connection = _Connection(OPERATION_COMPLETE_RESPONSE) + client.connection = connection + + operation = self._getTargetClass().from_api_repr(client, RESPONSE) + self.assertFalse(operation.complete) + operation.poll() + self.assertTrue(operation.complete) + self.assertIsInstance(operation.metadata, Metadata) + self.assertEqual(operation.metadata.progress_percent, 100) + requested = client.connection._requested + self.assertEqual(requested[0]['method'], 'GET') + self.assertEqual(requested[0]['path'], + 'operations/%s' % (operation.name,)) + + def test_poll_complete(self): + from unit_tests._fixtures import OPERATION_COMPLETE_RESPONSE + from unit_tests._fixtures import OPERATION_INCOMPLETE_RESPONSE + RESPONSE = OPERATION_INCOMPLETE_RESPONSE + + client = _Client() + connection = _Connection(OPERATION_COMPLETE_RESPONSE) + client.connection = connection + operation = self._getTargetClass().from_api_repr(client, RESPONSE) + + self.assertFalse(operation.complete) + operation.poll() # Update the operation with complete data. + + with self.assertRaises(ValueError): + operation.poll() + requested = client.connection._requested + self.assertEqual(requested[0]['method'], 'GET') + self.assertEqual(requested[0]['path'], + 'operations/%s' % (operation.name,)) + + +class _Connection(object): + def __init__(self, response=None): + self.response = response + self._requested = [] + + def api_request(self, method, path): + self._requested.append({'method': method, 'path': path}) + return self.response + + +class _Client(object): + connection = None diff --git a/speech/unit_tests/test_transcript.py b/speech/unit_tests/test_transcript.py new file mode 100644 index 000000000000..b585d6e7429c --- /dev/null +++ b/speech/unit_tests/test_transcript.py @@ -0,0 +1,32 @@ +# Copyright 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + + +class TestTranscript(unittest.TestCase): + def _getTargetClass(self): + from google.cloud.speech.transcript import Transcript + return Transcript + + def _makeOne(self, *args, **kwargs): + return self._getTargetClass()(*args, **kwargs) + + def test_ctor(self): + from unit_tests._fixtures import OPERATION_COMPLETE_RESPONSE as DATA + TRANSCRIPT_DATA = DATA['response']['results'][0]['alternatives'][0] + transcript = self._makeOne(TRANSCRIPT_DATA) + self.assertEqual('how old is the Brooklyn Bridge', + transcript.transcript) + self.assertEqual(0.98267895, transcript.confidence)