From 1127101a14b637f532548fc7c3320c6ee5af28a5 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 21 Sep 2018 11:37:00 -0400 Subject: [PATCH] Speech: Prep docs for repo split. (#6017) - Move contents of 'docs/speech/' to 'speech/docs', leaving a symlink. - Harmonize / DRY 'speech/README.rst' with 'speech/docs/index.rst'. --- .../google-cloud-python-speech/README.rst | 17 +- .../docs/changelog.md | 1 + .../google-cloud-python-speech/docs/index.rst | 336 ++++++++++++++---- packages/google-cloud-python-speech/synth.py | 35 +- 4 files changed, 269 insertions(+), 120 deletions(-) create mode 120000 packages/google-cloud-python-speech/docs/changelog.md diff --git a/packages/google-cloud-python-speech/README.rst b/packages/google-cloud-python-speech/README.rst index b37eac0eb347..55bbafb02dbf 100644 --- a/packages/google-cloud-python-speech/README.rst +++ b/packages/google-cloud-python-speech/README.rst @@ -1,7 +1,9 @@ Python Client for Cloud Speech API (`Beta`_) ============================================= -`Cloud Speech API`_: Converts audio to text by applying powerful neural network models. +The `Cloud Speech API`_ enables developers to convert audio to text by applying +powerful neural network models. The API recognizes over 80 languages and +variants, to support your global user base. - `Client Library Documentation`_ - `Product Documentation`_ @@ -61,11 +63,8 @@ Windows \Scripts\activate \Scripts\pip.exe install google-cloud-speech -Preview -~~~~~~~ - -SpeechClient -^^^^^^^^^^^^ +Example Usage +~~~~~~~~~~~~~ .. code:: py @@ -88,10 +87,6 @@ Next Steps - Read the `Client Library Documentation`_ for Cloud Speech API API to see other available methods on the client. -- Read the `Cloud Speech API Product documentation`_ to learn +- Read the `Product documentation`_ to learn more about the product and see How-to Guides. -- View this `repository’s main README`_ to see the full list of Cloud APIs that we cover. - -.. _Cloud Speech API Product documentation: https://cloud.google.com/speech -.. _repository’s main README: https://github.com/GoogleCloudPlatform/google-cloud-python/blob/master/README.rst \ No newline at end of file diff --git a/packages/google-cloud-python-speech/docs/changelog.md b/packages/google-cloud-python-speech/docs/changelog.md new file mode 120000 index 000000000000..04c99a55caae --- /dev/null +++ b/packages/google-cloud-python-speech/docs/changelog.md @@ -0,0 +1 @@ +../CHANGELOG.md \ No newline at end of file diff --git a/packages/google-cloud-python-speech/docs/index.rst b/packages/google-cloud-python-speech/docs/index.rst index 7c828d5e2998..054d4b62f029 100644 --- a/packages/google-cloud-python-speech/docs/index.rst +++ b/packages/google-cloud-python-speech/docs/index.rst @@ -1,108 +1,294 @@ -Python Client for Cloud Speech API (`Beta`_) -============================================= +.. include:: /../speech/README.rst -`Cloud Speech API`_: Converts audio to text by applying powerful neural network models. +Using the Library +----------------- -- `Client Library Documentation`_ -- `Product Documentation`_ +Asynchronous Recognition +~~~~~~~~~~~~~~~~~~~~~~~~ -.. _Beta: https://github.com/GoogleCloudPlatform/google-cloud-python/blob/master/README.rst -.. _Cloud Speech API: https://cloud.google.com/speech -.. _Client Library Documentation: https://googlecloudplatform.github.io/google-cloud-python/stable/speech/usage.html -.. _Product Documentation: https://cloud.google.com/speech +The :meth:`~.speech_v1.SpeechClient.long_running_recognize` method +sends audio data to the Speech API and initiates a Long Running Operation. -Quick Start ------------ +Using this operation, you can periodically poll for recognition results. +Use asynchronous requests for audio data of any duration up to 80 minutes. -In order to use this library, you first need to go through the following steps: +See: `Speech Asynchronous Recognize`_ -1. `Select or create a Cloud Platform project.`_ -2. `Enable billing for your project.`_ -3. `Enable the Cloud Speech API.`_ -4. `Setup Authentication.`_ -.. _Select or create a Cloud Platform project.: https://console.cloud.google.com/project -.. _Enable billing for your project.: https://cloud.google.com/billing/docs/how-to/modify-project#enable_billing_for_a_project -.. _Enable the Cloud Speech API.: https://cloud.google.com/speech -.. _Setup Authentication.: https://googlecloudplatform.github.io/google-cloud-python/stable/core/auth.html +.. code-block:: python -Installation -~~~~~~~~~~~~ + >>> from google.cloud import speech + >>> client = speech.SpeechClient() + >>> operation = client.long_running_recognize( + ... audio=speech.types.RecognitionAudio( + ... uri='gs://my-bucket/recording.flac', + ... ), + ... config=speech.types.RecognitionConfig( + ... encoding='LINEAR16', + ... language_code='en-US', + ... sample_rate_hertz=44100, + ... ), + ... ) + >>> op_result = operation.result() + >>> for result in op_result.results: + ... for alternative in result.alternatives: + ... print('=' * 20) + ... print(alternative.transcript) + ... print(alternative.confidence) + ==================== + 'how old is the Brooklyn Bridge' + 0.98267895 -Install this library in a `virtualenv`_ using pip. `virtualenv`_ is a tool to -create isolated Python environments. The basic problem it addresses is one of -dependencies and versions, and indirectly permissions. -With `virtualenv`_, it's possible to install this library without needing system -install permissions, and without clashing with the installed system -dependencies. +Synchronous Recognition +~~~~~~~~~~~~~~~~~~~~~~~ -.. _`virtualenv`: https://virtualenv.pypa.io/en/latest/ +The :meth:`~.speech_v1.SpeechClient.recognize` method converts speech +data to text and returns alternative text transcriptions. +This example uses ``language_code='en-GB'`` to better recognize a dialect from +Great Britain. -Mac/Linux -^^^^^^^^^ +.. code-block:: python -.. code-block:: console + >>> from google.cloud import speech + >>> client = speech.SpeechClient() + >>> results = client.recognize( + ... audio=speech.types.RecognitionAudio( + ... uri='gs://my-bucket/recording.flac', + ... ), + ... config=speech.types.RecognitionConfig( + ... encoding='LINEAR16', + ... language_code='en-US', + ... sample_rate_hertz=44100, + ... ), + ... ) + >>> for result in results: + ... for alternative in result.alternatives: + ... print('=' * 20) + ... print('transcript: ' + alternative.transcript) + ... print('confidence: ' + str(alternative.confidence)) + ==================== + transcript: Hello, this is a test + confidence: 0.81 + ==================== + transcript: Hello, this is one test + confidence: 0 - pip install virtualenv - virtualenv - source /bin/activate - /bin/pip install google-cloud-speech +Example of using the profanity filter. +.. code-block:: python -Windows -^^^^^^^ + >>> from google.cloud import speech + >>> client = speech.SpeechClient() + >>> results = client.recognize( + ... audio=speech.types.RecognitionAudio( + ... uri='gs://my-bucket/recording.flac', + ... ), + ... config=speech.types.RecognitionConfig( + ... encoding='LINEAR16', + ... language_code='en-US', + ... profanity_filter=True, + ... sample_rate_hertz=44100, + ... ), + ... ) + >>> for result in results: + ... for alternative in result.alternatives: + ... print('=' * 20) + ... print('transcript: ' + alternative.transcript) + ... print('confidence: ' + str(alternative.confidence)) + ==================== + transcript: Hello, this is a f****** test + confidence: 0.81 -.. code-block:: console +Using speech context hints to get better results. This can be used to improve +the accuracy for specific words and phrases. This can also be used to add new +words to the vocabulary of the recognizer. - pip install virtualenv - virtualenv - \Scripts\activate - \Scripts\pip.exe install google-cloud-speech +.. code-block:: python -Preview -~~~~~~~ + >>> from google.cloud import speech + >>> from google.cloud import speech + >>> client = speech.SpeechClient() + >>> results = client.recognize( + ... audio=speech.types.RecognitionAudio( + ... uri='gs://my-bucket/recording.flac', + ... ), + ... config=speech.types.RecognitionConfig( + ... encoding='LINEAR16', + ... language_code='en-US', + ... sample_rate_hertz=44100, + ... speech_contexts=[speech.types.SpeechContext( + ... phrases=['hi', 'good afternoon'], + ... )], + ... ), + ... ) + >>> for result in results: + ... for alternative in result.alternatives: + ... print('=' * 20) + ... print('transcript: ' + alternative.transcript) + ... print('confidence: ' + str(alternative.confidence)) + ==================== + transcript: Hello, this is a test + confidence: 0.81 -SpeechClient -^^^^^^^^^^^^ -.. code:: py +Streaming Recognition +~~~~~~~~~~~~~~~~~~~~~ - from google.cloud import speech_v1 - from google.cloud.speech_v1 import enums +The :meth:`~speech_v1.SpeechClient.streaming_recognize` method converts +speech data to possible text alternatives on the fly. - client = speech_v1.SpeechClient() +.. note:: + Streaming recognition requests are limited to 1 minute of audio. - encoding = enums.RecognitionConfig.AudioEncoding.FLAC - sample_rate_hertz = 44100 - language_code = 'en-US' - config = {'encoding': encoding, 'sample_rate_hertz': sample_rate_hertz, 'language_code': language_code} - uri = 'gs://bucket_name/file_name.flac' - audio = {'uri': uri} + See: https://cloud.google.com/speech/limits#content - response = client.recognize(config, audio) +.. code-block:: python -Next Steps -~~~~~~~~~~ + >>> import io + >>> from google.cloud import speech + >>> client = speech.SpeechClient() + >>> config = speech.types.RecognitionConfig( + ... encoding='LINEAR16', + ... language_code='en-US', + ... sample_rate_hertz=44100, + ... ) + >>> with io.open('./hello.wav', 'rb') as stream: + ... requests = [speech.types.StreamingRecognizeRequest( + ... audio_content=stream.read(), + ... )] + >>> results = sample.streaming_recognize( + ... config=speech.types.StreamingRecognitionConfig(config=config), + ... requests, + ... ) + >>> for result in results: + ... for alternative in result.alternatives: + ... print('=' * 20) + ... print('transcript: ' + alternative.transcript) + ... print('confidence: ' + str(alternative.confidence)) + ==================== + transcript: hello thank you for using Google Cloud platform + confidence: 0.927983105183 -- Read the `Client Library Documentation`_ for Cloud Speech API - API to see other available methods on the client. -- Read the `Cloud Speech API Product documentation`_ to learn - more about the product and see How-to Guides. -- View this `repository’s main README`_ to see the full list of Cloud - APIs that we cover. -.. _Cloud Speech API Product documentation: https://cloud.google.com/speech -.. _repository’s main README: https://github.com/GoogleCloudPlatform/google-cloud-python/blob/master/README.rst +By default the API will perform continuous recognition +(continuing to process audio even if the speaker in the audio pauses speaking) +until the client closes the output stream or until the maximum time limit has +been reached. -Api Reference +If you only want to recognize a single utterance you can set +``single_utterance`` to :data:`True` and only one result will be returned. + +See: `Single Utterance`_ + +.. code-block:: python + + >>> import io + >>> from google.cloud import speech + >>> client = speech.SpeechClient() + >>> config = speech.types.RecognitionConfig( + ... encoding='LINEAR16', + ... language_code='en-US', + ... sample_rate_hertz=44100, + ... ) + >>> with io.open('./hello-pause-goodbye.wav', 'rb') as stream: + ... requests = [speech.types.StreamingRecognizeRequest( + ... audio_content=stream.read(), + ... )] + >>> results = sample.streaming_recognize( + ... config=speech.types.StreamingRecognitionConfig( + ... config=config, + ... single_utterance=False, + ... ), + ... requests, + ... ) + >>> for result in results: + ... for alternative in result.alternatives: + ... print('=' * 20) + ... print('transcript: ' + alternative.transcript) + ... print('confidence: ' + str(alternative.confidence)) + ... for result in results: + ... for alternative in result.alternatives: + ... print('=' * 20) + ... print('transcript: ' + alternative.transcript) + ... print('confidence: ' + str(alternative.confidence)) + ==================== + transcript: testing a pause + confidence: 0.933770477772 + +If ``interim_results`` is set to :data:`True`, interim results +(tentative hypotheses) may be returned as they become available. + +.. code-block:: python + + >>> import io + >>> from google.cloud import speech + >>> client = speech.SpeechClient() + >>> config = speech.types.RecognitionConfig( + ... encoding='LINEAR16', + ... language_code='en-US', + ... sample_rate_hertz=44100, + ... ) + >>> with io.open('./hello.wav', 'rb') as stream: + ... requests = [speech.types.StreamingRecognizeRequest( + ... audio_content=stream.read(), + ... )] + >>> config = speech.types.StreamingRecognitionConfig(config=config) + >>> responses = client.streaming_recognize(config,requests) + >>> for response in responses: + ... for result in response: + ... for alternative in result.alternatives: + ... print('=' * 20) + ... print('transcript: ' + alternative.transcript) + ... print('confidence: ' + str(alternative.confidence)) + ... print('is_final:' + str(result.is_final)) + ==================== + 'he' + None + False + ==================== + 'hell' + None + False + ==================== + 'hello' + 0.973458576 + True + + +.. _Single Utterance: https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#streamingrecognitionconfig +.. _sync_recognize: https://cloud.google.com/speech/reference/rest/v1beta1/speech/syncrecognize +.. _Speech Asynchronous Recognize: https://cloud.google.com/speech/reference/rest/v1beta1/speech/asyncrecognize + + +API Reference ------------- + +.. toctree:: + :maxdepth: 2 + + gapic/v1/api + gapic/v1/types + +A new beta release, spelled ``v1p1beta1``, is provided to provide for preview +of upcoming features. In order to use this, you will want to import from +``google.cloud.speech_v1p1beta1`` in lieu of ``google.cloud.speech``. + +An API and type reference is provided the first beta also: + +.. toctree:: + :maxdepth: 2 + + gapic/v1p1beta1/api + gapic/v1p1beta1/types + +Changelog +--------- + +For a list of all ``google-cloud-speech`` releases: + .. toctree:: - :maxdepth: 2 + :maxdepth: 2 - gapic/v1/api - gapic/v1/types - gapic/v1p1beta1/api - gapic/v1p1beta1/types - changelog + changelog diff --git a/packages/google-cloud-python-speech/synth.py b/packages/google-cloud-python-speech/synth.py index 989d41aece0a..cd1358597831 100644 --- a/packages/google-cloud-python-speech/synth.py +++ b/packages/google-cloud-python-speech/synth.py @@ -36,41 +36,8 @@ s.move(library / f'docs/gapic/{version}') -# Use the highest version library to generate documentation index, README, and -# import alias. +# Use the highest version library to generate documentation import alias. s.move(library / 'google/cloud/speech.py') -s.move(library / 'docs/index.rst') -s.move(library / 'README.rst') - - -# Make the docs multiversion -s.replace( - 'docs/index.rst', - r' gapic/v1/api(.+?)\Z', - """\ - gapic/v1/api - gapic/v1/types - gapic/v1p1beta1/api - gapic/v1p1beta1/types - changelog -""", re.DOTALL | re.MULTILINE) - - -# The release stage is Beta, not Alpha. -s.replace( - ['README.rst', 'docs/index.rst'], - r'Google Cloud Speech API \(`Alpha`_\)', - 'Google Cloud Speech API (`Beta`_)') - - -s.replace( - ['README.rst', 'docs/index.rst'], - '`Alpha`', '`Beta`') - - -s.replace( - ['README.rst', 'docs/index.rst'], - '.. _Alpha', '.. _Beta') # Fix bad reference to operations_v1