Update enhanced models and auto punctuation to GA (#1702)

* Update enhanced models and auto punctuation to GA * Update model-slection to GA
GoogleCloudPlatform · Sep 21, 2018 · ae66001 · ae66001
1 parent b4098b6
commit ae66001
Show file tree

Hide file tree

Showing 8 changed files with 258 additions and 7 deletions.
diff --git a/speech/cloud-client/README.rst b/speech/cloud-client/README.rst
@@ -206,6 +206,68 @@ To run this sample:
 
 
 
+Transcribe Enhanced Models
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+.. image:: https://gstatic.com/cloudssh/images/open-btn.png
+   :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=speech/cloud-client/transcribe_enhanced_model.py,speech/cloud-client/README.rst
+
+
+
+
+To run this sample:
+
+.. code-block:: bash
+
+    $ python transcribe_enhanced_model.py
+
+    usage: transcribe_enhanced_model.py [-h] path
+
+    Google Cloud Speech API sample that demonstrates enhanced models
+    and recognition metadata.
+
+    Example usage:
+        python transcribe_enhanced_model.py resources/commercial_mono.wav
+
+    positional arguments:
+      path        File to stream to the API
+
+    optional arguments:
+      -h, --help  show this help message and exit
+
+
+
+Transcribe Automatic Punctuation
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+.. image:: https://gstatic.com/cloudssh/images/open-btn.png
+   :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=speech/cloud-client/transcribe_auto_punctuation.py,speech/cloud-client/README.rst
+
+
+
+
+To run this sample:
+
+.. code-block:: bash
+
+    $ python transcribe_auto_punctuation.py
+
+    usage: transcribe_auto_punctuation.py [-h] path
+
+    Google Cloud Speech API sample that demonstrates auto punctuation
+    and recognition metadata.
+
+    Example usage:
+        python transcribe_auto_punctuation.py resources/commercial_mono.wav
+
+    positional arguments:
+      path        File to stream to the API
+
+    optional arguments:
+      -h, --help  show this help message and exit
+
+
+
 Beta Samples
 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 

diff --git a/speech/cloud-client/README.rst.in b/speech/cloud-client/README.rst.in
@@ -34,6 +34,12 @@ samples:
 - name: Transcribe Streaming
   file: transcribe_streaming.py
   show_help: true
+- name: Transcribe Enhanced Models
+  file: transcribe_enhanced_model.py
+  show_help: true
+- name: Transcribe Automatic Punctuation
+  file: transcribe_auto_punctuation.py
+  show_help: true
 - name: Beta Samples
   file: beta_snippets.py
   show_help: true

diff --git a/speech/cloud-client/requirements.txt b/speech/cloud-client/requirements.txt
@@ -1 +1 @@
-google-cloud-speech==0.35.0
+google-cloud-speech==0.36.0
diff --git a/speech/cloud-client/transcribe_auto_punctuation.py b/speech/cloud-client/transcribe_auto_punctuation.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+
+# Copyright 2018 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Google Cloud Speech API sample that demonstrates auto punctuation
+and recognition metadata.
+
+Example usage:
+    python transcribe_auto_punctuation.py resources/commercial_mono.wav
+"""
+
+import argparse
+import io
+
+
+def transcribe_file_with_auto_punctuation(path):
+    """Transcribe the given audio file with auto punctuation enabled."""
+    # [START speech_transcribe_auto_punctuation]
+    from google.cloud import speech
+    client = speech.SpeechClient()
+
+    # path = 'resources/commercial_mono.wav'
+    with io.open(path, 'rb') as audio_file:
+        content = audio_file.read()
+
+    audio = speech.types.RecognitionAudio(content=content)
+    config = speech.types.RecognitionConfig(
+        encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=8000,
+        language_code='en-US',
+        # Enable automatic punctuation
+        enable_automatic_punctuation=True)
+
+    response = client.recognize(config, audio)
+
+    for i, result in enumerate(response.results):
+        alternative = result.alternatives[0]
+        print('-' * 20)
+        print('First alternative of result {}'.format(i))
+        print('Transcript: {}'.format(alternative.transcript))
+    # [END speech_transcribe_auto_punctuation]
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument('path', help='File to stream to the API')
+
+    args = parser.parse_args()
+
+    transcribe_file_with_auto_punctuation(args.path)
diff --git a/speech/cloud-client/transcribe_auto_punctuation_test.py b/speech/cloud-client/transcribe_auto_punctuation_test.py
@@ -0,0 +1,26 @@
+# Copyright 2018, Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import transcribe_auto_punctuation
+
+RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
+
+
+def test_transcribe_file_with_auto_punctuation(capsys):
+    transcribe_auto_punctuation.transcribe_file_with_auto_punctuation(
+        'resources/commercial_mono.wav')
+    out, _ = capsys.readouterr()
+
+    assert 'Okay. Sure.' in out
diff --git a/speech/cloud-client/transcribe_enhanced_model.py b/speech/cloud-client/transcribe_enhanced_model.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+
+# Copyright 2018 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Google Cloud Speech API sample that demonstrates enhanced models
+and recognition metadata.
+
+Example usage:
+    python transcribe_enhanced_model.py resources/commercial_mono.wav
+"""
+
+import argparse
+import io
+
+
+def transcribe_file_with_enhanced_model(path):
+    """Transcribe the given audio file using an enhanced model."""
+    # [START speech_transcribe_enhanced_model]
+    from google.cloud import speech
+    client = speech.SpeechClient()
+
+    # path = 'resources/commercial_mono.wav'
+    with io.open(path, 'rb') as audio_file:
+        content = audio_file.read()
+
+    audio = speech.types.RecognitionAudio(content=content)
+    config = speech.types.RecognitionConfig(
+        encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=8000,
+        language_code='en-US',
+        # Enhanced models are only available to projects that
+        # opt in for audio data collection.
+        use_enhanced=True,
+        # A model must be specified to use enhanced model.
+        model='phone_call')
+
+    response = client.recognize(config, audio)
+
+    for i, result in enumerate(response.results):
+        alternative = result.alternatives[0]
+        print('-' * 20)
+        print('First alternative of result {}'.format(i))
+        print('Transcript: {}'.format(alternative.transcript))
+    # [END speech_transcribe_enhanced_model]
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument('path', help='File to stream to the API')
+
+    args = parser.parse_args()
+
+    transcribe_file_with_enhanced_model(args.path)
diff --git a/speech/cloud-client/transcribe_enhanced_model_test.py b/speech/cloud-client/transcribe_enhanced_model_test.py
@@ -0,0 +1,26 @@
+# Copyright 2018, Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import transcribe_enhanced_model
+
+RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
+
+
+def test_transcribe_file_with_enhanced_model(capsys):
+    transcribe_enhanced_model.transcribe_file_with_enhanced_model(
+        'resources/commercial_mono.wav')
+    out, _ = capsys.readouterr()
+
+    assert 'Chrome' in out
diff --git a/speech/cloud-client/transcribe_model_selection.py b/speech/cloud-client/transcribe_model_selection.py
@@ -27,11 +27,11 @@
 import argparse
 
 
-# [START speech_transcribe_model_selection_beta]
+# [START speech_transcribe_model_selection]
 def transcribe_model_selection(speech_file, model):
     """Transcribe the given audio file synchronously with
     the selected model."""
-    from google.cloud import speech_v1p1beta1 as speech
+    from google.cloud import speech
     client = speech.SpeechClient()
 
     with open(speech_file, 'rb') as audio_file:
@@ -52,14 +52,14 @@ def transcribe_model_selection(speech_file, model):
         print('-' * 20)
         print('First alternative of result {}'.format(i))
         print(u'Transcript: {}'.format(alternative.transcript))
-# [END speech_transcribe_model_selection_beta]
+# [END speech_transcribe_model_selection]
 
 
-# [START speech_transcribe_model_selection_gcs_beta]
+# [START speech_transcribe_model_selection_gcs]
 def transcribe_model_selection_gcs(gcs_uri, model):
     """Transcribe the given audio file asynchronously with
     the selected model."""
-    from google.cloud import speech_v1p1beta1 as speech
+    from google.cloud import speech
     client = speech.SpeechClient()
 
     audio = speech.types.RecognitionAudio(uri=gcs_uri)
@@ -80,7 +80,7 @@ def transcribe_model_selection_gcs(gcs_uri, model):
         print('-' * 20)
         print('First alternative of result {}'.format(i))
         print(u'Transcript: {}'.format(alternative.transcript))
-# [END speech_transcribe_model_selection_gcs_beta]
+# [END speech_transcribe_model_selection_gcs]
 
 
 if __name__ == '__main__':