Add word time offset samples [(#1050)](GoogleCloudPlatform/python-doc…

…s-samples#1050)
googleapis · Sep 3, 2020 · 7528207 · 7528207
1 parent 919446a
commit 7528207
Show file tree

Hide file tree

Showing 6 changed files with 189 additions and 40 deletions.
diff --git a/packages/google-cloud-python-speech/samples/snippets/README.rst b/packages/google-cloud-python-speech/samples/snippets/README.rst
@@ -140,6 +140,32 @@ To run this sample:
       -h, --help  show this help message and exit
 
 
+Transcribe with word time offsets
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+
+To run this sample:
+
+.. code-block:: bash
+
+    $ python transcribe_word_time_offsets.py
+
+    usage: transcribe_word_time_offsets.py [-h] path
+    
+    Google Cloud Speech API sample that demonstrates word time offsets.
+    
+    Example usage:
+        python transcribe_word_time_offsets.py resources/audio.raw
+        python transcribe_word_time_offsets.py         gs://cloud-samples-tests/speech/vr.flac
+    
+    positional arguments:
+      path        File or GCS path for audio file to be recognized
+    
+    optional arguments:
+      -h, --help  show this help message and exit
+
+
 Transcribe Streaming
 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 

diff --git a/packages/google-cloud-python-speech/samples/snippets/README.rst.in b/packages/google-cloud-python-speech/samples/snippets/README.rst.in
@@ -28,6 +28,9 @@ samples:
 - name: Transcribe async
   file: transcribe_async.py
   show_help: true
+- name: Transcribe with word time offsets
+  file: transcribe_word_time_offsets.py
+  show_help: true
 - name: Transcribe Streaming
   file: transcribe_streaming.py
   show_help: true

diff --git a/packages/google-cloud-python-speech/samples/snippets/transcribe_async.py b/packages/google-cloud-python-speech/samples/snippets/transcribe_async.py
@@ -24,7 +24,6 @@
 
 import argparse
 import io
-import time
 
 
 # [START def_transcribe_file]
@@ -49,17 +48,10 @@ def transcribe_file(speech_file):
     operation = client.long_running_recognize(config, audio)
     # [END migration_async_request]
 
-    # Sleep and poll operation.done()
-    retry_count = 100
-    while retry_count > 0 and not operation.done():
-        retry_count -= 1
-        time.sleep(2)
+    print('Waiting for operation to complete...')
+    result = operation.result(timeout=90)
 
-    if not operation.done():
-        print('Operation not complete and retry limit reached.')
-        return
-
-    alternatives = operation.result().results[0].alternatives
+    alternatives = result.results[0].alternatives
     for alternative in alternatives:
         print('Transcript: {}'.format(alternative.transcript))
         print('Confidence: {}'.format(alternative.confidence))
@@ -84,28 +76,13 @@ def transcribe_gcs(gcs_uri):
 
     operation = client.long_running_recognize(config, audio)
 
-    retry_count = 100
-    while retry_count > 0 and not operation.done():
-        retry_count -= 1
-        time.sleep(2)
-
-    if not operation.done():
-        print('Operation not complete and retry limit reached.')
-        return
+    print('Waiting for operation to complete...')
+    result = operation.result(timeout=90)
 
-    alternatives = operation.result().results[0].alternatives
+    alternatives = result.results[0].alternatives
     for alternative in alternatives:
         print('Transcript: {}'.format(alternative.transcript))
         print('Confidence: {}'.format(alternative.confidence))
-
-        for word_info in alternative.words:
-            word = word_info.word
-            start_time = word_info.start_time
-            end_time = word_info.end_time
-            print('Word: {}, start_time: {}, end_time: {}'.format(
-                word,
-                start_time.seconds + start_time.nanos * 1e-9,
-                end_time.seconds + end_time.nanos * 1e-9))
 # [END def_transcribe_gcs]
 
 

diff --git a/packages/google-cloud-python-speech/samples/snippets/transcribe_async_test.py b/packages/google-cloud-python-speech/samples/snippets/transcribe_async_test.py
@@ -33,14 +33,3 @@ def test_transcribe_gcs(capsys):
     out, err = capsys.readouterr()
 
     assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)
-
-
-def test_transcribe_gcs_word_time_offsets(capsys):
-    transcribe_async.transcribe_gcs(
-        'gs://python-docs-samples-tests/speech/audio.flac')
-    out, err = capsys.readouterr()
-
-    match = re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I)
-    time = float(match.group(1))
-
-    assert time > 0
diff --git a/packages/google-cloud-python-speech/samples/snippets/transcribe_word_time_offsets.py b/packages/google-cloud-python-speech/samples/snippets/transcribe_word_time_offsets.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python
+
+# Copyright 2017 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Google Cloud Speech API sample that demonstrates word time offsets.
+
+Example usage:
+    python transcribe_word_time_offsets.py resources/audio.raw
+    python transcribe_word_time_offsets.py \
+        gs://cloud-samples-tests/speech/vr.flac
+"""
+
+import argparse
+import io
+
+
+def transcribe_file_with_word_time_offsets(speech_file):
+    """Transcribe the given audio file synchronously and output the word time
+    offsets."""
+    from google.cloud import speech
+    from google.cloud.speech import enums
+    from google.cloud.speech import types
+    client = speech.SpeechClient()
+
+    with io.open(speech_file, 'rb') as audio_file:
+        content = audio_file.read()
+
+    audio = types.RecognitionAudio(content=content)
+    config = types.RecognitionConfig(
+        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=16000,
+        language_code='en-US',
+        enable_word_time_offsets=True)
+
+    response = client.recognize(config, audio)
+
+    alternatives = response.results[0].alternatives
+
+    for alternative in alternatives:
+        print('Transcript: {}'.format(alternative.transcript))
+
+        for word_info in alternative.words:
+            word = word_info.word
+            start_time = word_info.start_time
+            end_time = word_info.end_time
+            print('Word: {}, start_time: {}, end_time: {}'.format(
+                word,
+                start_time.seconds + start_time.nanos * 1e-9,
+                end_time.seconds + end_time.nanos * 1e-9))
+
+
+# [START def_transcribe_gcs]
+def transcribe_gcs_with_word_time_offsets(gcs_uri):
+    """Transcribe the given audio file asynchronously and output the word time
+    offsets."""
+    from google.cloud import speech
+    from google.cloud.speech import enums
+    from google.cloud.speech import types
+    client = speech.SpeechClient()
+
+    audio = types.RecognitionAudio(uri=gcs_uri)
+    config = types.RecognitionConfig(
+        encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
+        sample_rate_hertz=16000,
+        language_code='en-US',
+        enable_word_time_offsets=True)
+
+    operation = client.long_running_recognize(config, audio)
+
+    print('Waiting for operation to complete...')
+    result = operation.result(timeout=90)
+
+    alternatives = result.results[0].alternatives
+    for alternative in alternatives:
+        print('Transcript: {}'.format(alternative.transcript))
+        print('Confidence: {}'.format(alternative.confidence))
+
+        for word_info in alternative.words:
+            word = word_info.word
+            start_time = word_info.start_time
+            end_time = word_info.end_time
+            print('Word: {}, start_time: {}, end_time: {}'.format(
+                word,
+                start_time.seconds + start_time.nanos * 1e-9,
+                end_time.seconds + end_time.nanos * 1e-9))
+# [END def_transcribe_gcs]
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument(
+        'path', help='File or GCS path for audio file to be recognized')
+    args = parser.parse_args()
+    if args.path.startswith('gs://'):
+        transcribe_gcs_with_word_time_offsets(args.path)
+    else:
+        transcribe_file_with_word_time_offsets(args.path)
diff --git a/packages/google-cloud-python-speech/samples/snippets/transcribe_word_time_offsets_test.py b/packages/google-cloud-python-speech/samples/snippets/transcribe_word_time_offsets_test.py
@@ -0,0 +1,43 @@
+# Copyright 2016, Google, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+
+import transcribe_word_time_offsets
+
+RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
+
+
+def test_transcribe_file_with_word_time_offsets(capsys):
+    transcribe_word_time_offsets.transcribe_file_with_word_time_offsets(
+        os.path.join(RESOURCES, 'audio.raw'))
+    out, _ = capsys.readouterr()
+
+    print(out)
+    match = re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I)
+    time = float(match.group(1))
+
+    assert time > 0
+
+
+def test_transcribe_gcs_with_word_time_offsets(capsys):
+    transcribe_word_time_offsets.transcribe_gcs_with_word_time_offsets(
+        'gs://python-docs-samples-tests/speech/audio.flac')
+    out, _ = capsys.readouterr()
+
+    print(out)
+    match = re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I)
+    time = float(match.group(1))
+
+    assert time > 0