feat(v2): add processing strategy to batch recognition requests (#514)

* feat: add processing strategy to batch recognition requests PiperOrigin-RevId: 530882015 Source-Link: googleapis/googleapis@189bdfa Source-Link: googleapis/googleapis-gen@64c7a84 Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiNjRjN2E4NDNhZmRkYThjNjUxZmZjYTZlMWEzNzRhYzc4MDFkMzcyOCJ9 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
googleapis · May 10, 2023 · 49c356c · 49c356c
1 parent 39e28e6
commit 49c356c
Show file tree

Hide file tree

Showing 5 changed files with 45 additions and 61 deletions.
diff --git a/packages/google-cloud-speech/google/cloud/speech_v2/types/cloud_speech.py b/packages/google-cloud-speech/google/cloud/speech_v2/types/cloud_speech.py
@@ -377,9 +377,9 @@ class ListRecognizersRequest(proto.Message):
         page_size (int):
             The maximum number of Recognizers to return.
             The service may return fewer than this value. If
-            unspecified, at most 20 Recognizers will be
-            returned. The maximum value is 20; values above
-            20 will be coerced to 20.
+            unspecified, at most 5 Recognizers will be
+            returned. The maximum value is 100; values above
+            100 will be coerced to 100.
         page_token (str):
             A page token, received from a previous
             [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers]
@@ -593,59 +593,20 @@ class Recognizer(proto.Message):
             Select the model best suited to your domain to get best
             results.
 
-            Supported models:
-
-            -  ``latest_long``
-
-               Best for long form content like media or conversation.
-
-            -  ``latest_short``
-
-               Best for short form content like commands or single shot
-               directed speech. When using this model, the service will
-               stop transcribing audio after the first utterance is
-               detected and completed.
-
-               When using this model,
-               [SEPARATE_RECOGNITION_PER_CHANNEL][google.cloud.speech.v2.RecognitionFeatures.MultiChannelMode.SEPARATE_RECOGNITION_PER_CHANNEL]
-               is not supported; multi-channel audio is accepted, but
-               only the first channel will be processed and transcribed.
-
-            -  ``telephony``
-
-               Best for audio that originated from a phone call
-               (typically recorded at an 8khz sampling rate).
-
-            -  ``medical_conversation``
-
-               For conversations between a medical provider—for example,
-               a doctor or nurse—and a patient. Use this model when both
-               a provider and a patient are speaking. Words uttered by
-               each speaker are automatically detected and labeled in
-               the returned transcript.
-
-               For supported features please see `medical models
-               documentation <https://cloud.google.com/speech-to-text/docs/medical-models>`__.
-
-            -  ``medical_dictation``
-
-               For dictated notes spoken by a single medical
-               provider—for example, a doctor dictating notes about a
-               patient's blood test results.
-
-               For supported features please see `medical models
-               documentation <https://cloud.google.com/speech-to-text/docs/medical-models>`__.
-
-            -  ``usm``
-
-               The next generation of Speech-to-Text models from Google.
+            Guidance for choosing which model to use can be found in the
+            `Transcription Models
+            Documentation <https://cloud.google.com/speech-to-text/v2/docs/transcription-model>`__
+            and the models supported in each region can be found in the
+            `Table Of Supported
+            Models <https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages>`__.
         language_codes (MutableSequence[str]):
             Required. The language of the supplied audio as a
             `BCP-47 <https://www.rfc-editor.org/rfc/bcp/bcp47.txt>`__
             language tag.
 
-            Supported languages for each model are listed at:
-            https://cloud.google.com/speech-to-text/docs/languages
+            Supported languages for each model are listed in the `Table
+            of Supported
+            Models <https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages>`__.
 
             If additional languages are provided, recognition result
             will contain recognition in the most likely language
@@ -1652,8 +1613,26 @@ class BatchRecognizeRequest(proto.Message):
         recognition_output_config (google.cloud.speech_v2.types.RecognitionOutputConfig):
             Configuration options for where to output the
             transcripts of each file.
+        processing_strategy (google.cloud.speech_v2.types.BatchRecognizeRequest.ProcessingStrategy):
+            Processing strategy to use for this request.
     """
 
+    class ProcessingStrategy(proto.Enum):
+        r"""Possible processing strategies for batch requests.
+
+        Values:
+            PROCESSING_STRATEGY_UNSPECIFIED (0):
+                Default value for the processing strategy.
+                The request is processed as soon as its
+                received.
+            DYNAMIC_BATCHING (1):
+                If selected, processes the request during
+                lower utilization periods for a price discount.
+                The request is fulfilled within 24 hours.
+        """
+        PROCESSING_STRATEGY_UNSPECIFIED = 0
+        DYNAMIC_BATCHING = 1
+
     recognizer: str = proto.Field(
         proto.STRING,
         number=1,
@@ -1678,6 +1657,11 @@ class BatchRecognizeRequest(proto.Message):
         number=6,
         message="RecognitionOutputConfig",
     )
+    processing_strategy: ProcessingStrategy = proto.Field(
+        proto.ENUM,
+        number=7,
+        enum=ProcessingStrategy,
+    )
 
 
 class GcsOutputConfig(proto.Message):
@@ -2624,10 +2608,10 @@ class ListCustomClassesRequest(proto.Message):
             ``projects/{project}/locations/{location}``.
         page_size (int):
             Number of results per requests. A valid page_size ranges
-            from 0 to 20 inclusive. If the page_size is zero or
+            from 0 to 100 inclusive. If the page_size is zero or
             unspecified, a page size of 5 will be chosen. If the page
-            size exceeds 20, it will be coerced down to 20. Note that a
-            call might return fewer results than the requested page
+            size exceeds 100, it will be coerced down to 100. Note that
+            a call might return fewer results than the requested page
             size.
         page_token (str):
             A page token, received from a previous
@@ -2878,9 +2862,9 @@ class ListPhraseSetsRequest(proto.Message):
         page_size (int):
             The maximum number of PhraseSets to return.
             The service may return fewer than this value. If
-            unspecified, at most 20 PhraseSets will be
-            returned. The maximum value is 20; values above
-            20 will be coerced to 20.
+            unspecified, at most 5 PhraseSets will be
+            returned. The maximum value is 100; values above
+            100 will be coerced to 100.
         page_token (str):
             A page token, received from a previous
             [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets]

diff --git a/...oogle-cloud-speech/samples/generated_samples/snippet_metadata_google.cloud.speech.v1.json b/...oogle-cloud-speech/samples/generated_samples/snippet_metadata_google.cloud.speech.v1.json
@@ -8,7 +8,7 @@
     ],
     "language": "PYTHON",
     "name": "google-cloud-speech",
-    "version": "2.19.0"
+    "version": "0.1.0"
   },
   "snippets": [
     {

diff --git a/...loud-speech/samples/generated_samples/snippet_metadata_google.cloud.speech.v1p1beta1.json b/...loud-speech/samples/generated_samples/snippet_metadata_google.cloud.speech.v1p1beta1.json
@@ -8,7 +8,7 @@
     ],
     "language": "PYTHON",
     "name": "google-cloud-speech",
-    "version": "2.19.0"
+    "version": "0.1.0"
   },
   "snippets": [
     {

diff --git a/...oogle-cloud-speech/samples/generated_samples/snippet_metadata_google.cloud.speech.v2.json b/...oogle-cloud-speech/samples/generated_samples/snippet_metadata_google.cloud.speech.v2.json
@@ -8,7 +8,7 @@
     ],
     "language": "PYTHON",
     "name": "google-cloud-speech",
-    "version": "2.19.0"
+    "version": "0.1.0"
   },
   "snippets": [
     {

diff --git a/packages/google-cloud-speech/scripts/fixup_speech_v2_keywords.py b/packages/google-cloud-speech/scripts/fixup_speech_v2_keywords.py
@@ -39,7 +39,7 @@ def partition(
 class speechCallTransformer(cst.CSTTransformer):
     CTRL_PARAMS: Tuple[str] = ('retry', 'timeout', 'metadata')
     METHOD_TO_PARAMS: Dict[str, Tuple[str]] = {
-        'batch_recognize': ('recognizer', 'config', 'config_mask', 'files', 'recognition_output_config', ),
+        'batch_recognize': ('recognizer', 'config', 'config_mask', 'files', 'recognition_output_config', 'processing_strategy', ),
         'create_custom_class': ('custom_class', 'parent', 'validate_only', 'custom_class_id', ),
         'create_phrase_set': ('phrase_set', 'parent', 'validate_only', 'phrase_set_id', ),
         'create_recognizer': ('recognizer', 'parent', 'validate_only', 'recognizer_id', ),