Skip to content

Commit

Permalink
feat(v2): add processing strategy to batch recognition requests (#514)
Browse files Browse the repository at this point in the history
* feat: add processing strategy to batch recognition requests

PiperOrigin-RevId: 530882015

Source-Link: googleapis/googleapis@189bdfa

Source-Link: googleapis/googleapis-gen@64c7a84
Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiNjRjN2E4NDNhZmRkYThjNjUxZmZjYTZlMWEzNzRhYzc4MDFkMzcyOCJ9

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
gcf-owl-bot[bot] and gcf-owl-bot[bot] authored May 10, 2023
1 parent 39e28e6 commit 49c356c
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 61 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -377,9 +377,9 @@ class ListRecognizersRequest(proto.Message):
page_size (int):
The maximum number of Recognizers to return.
The service may return fewer than this value. If
unspecified, at most 20 Recognizers will be
returned. The maximum value is 20; values above
20 will be coerced to 20.
unspecified, at most 5 Recognizers will be
returned. The maximum value is 100; values above
100 will be coerced to 100.
page_token (str):
A page token, received from a previous
[ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers]
Expand Down Expand Up @@ -593,59 +593,20 @@ class Recognizer(proto.Message):
Select the model best suited to your domain to get best
results.
Supported models:
- ``latest_long``
Best for long form content like media or conversation.
- ``latest_short``
Best for short form content like commands or single shot
directed speech. When using this model, the service will
stop transcribing audio after the first utterance is
detected and completed.
When using this model,
[SEPARATE_RECOGNITION_PER_CHANNEL][google.cloud.speech.v2.RecognitionFeatures.MultiChannelMode.SEPARATE_RECOGNITION_PER_CHANNEL]
is not supported; multi-channel audio is accepted, but
only the first channel will be processed and transcribed.
- ``telephony``
Best for audio that originated from a phone call
(typically recorded at an 8khz sampling rate).
- ``medical_conversation``
For conversations between a medical provider—for example,
a doctor or nurse—and a patient. Use this model when both
a provider and a patient are speaking. Words uttered by
each speaker are automatically detected and labeled in
the returned transcript.
For supported features please see `medical models
documentation <https://cloud.google.com/speech-to-text/docs/medical-models>`__.
- ``medical_dictation``
For dictated notes spoken by a single medical
provider—for example, a doctor dictating notes about a
patient's blood test results.
For supported features please see `medical models
documentation <https://cloud.google.com/speech-to-text/docs/medical-models>`__.
- ``usm``
The next generation of Speech-to-Text models from Google.
Guidance for choosing which model to use can be found in the
`Transcription Models
Documentation <https://cloud.google.com/speech-to-text/v2/docs/transcription-model>`__
and the models supported in each region can be found in the
`Table Of Supported
Models <https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages>`__.
language_codes (MutableSequence[str]):
Required. The language of the supplied audio as a
`BCP-47 <https://www.rfc-editor.org/rfc/bcp/bcp47.txt>`__
language tag.
Supported languages for each model are listed at:
https://cloud.google.com/speech-to-text/docs/languages
Supported languages for each model are listed in the `Table
of Supported
Models <https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages>`__.
If additional languages are provided, recognition result
will contain recognition in the most likely language
Expand Down Expand Up @@ -1652,8 +1613,26 @@ class BatchRecognizeRequest(proto.Message):
recognition_output_config (google.cloud.speech_v2.types.RecognitionOutputConfig):
Configuration options for where to output the
transcripts of each file.
processing_strategy (google.cloud.speech_v2.types.BatchRecognizeRequest.ProcessingStrategy):
Processing strategy to use for this request.
"""

class ProcessingStrategy(proto.Enum):
r"""Possible processing strategies for batch requests.
Values:
PROCESSING_STRATEGY_UNSPECIFIED (0):
Default value for the processing strategy.
The request is processed as soon as its
received.
DYNAMIC_BATCHING (1):
If selected, processes the request during
lower utilization periods for a price discount.
The request is fulfilled within 24 hours.
"""
PROCESSING_STRATEGY_UNSPECIFIED = 0
DYNAMIC_BATCHING = 1

recognizer: str = proto.Field(
proto.STRING,
number=1,
Expand All @@ -1678,6 +1657,11 @@ class BatchRecognizeRequest(proto.Message):
number=6,
message="RecognitionOutputConfig",
)
processing_strategy: ProcessingStrategy = proto.Field(
proto.ENUM,
number=7,
enum=ProcessingStrategy,
)


class GcsOutputConfig(proto.Message):
Expand Down Expand Up @@ -2624,10 +2608,10 @@ class ListCustomClassesRequest(proto.Message):
``projects/{project}/locations/{location}``.
page_size (int):
Number of results per requests. A valid page_size ranges
from 0 to 20 inclusive. If the page_size is zero or
from 0 to 100 inclusive. If the page_size is zero or
unspecified, a page size of 5 will be chosen. If the page
size exceeds 20, it will be coerced down to 20. Note that a
call might return fewer results than the requested page
size exceeds 100, it will be coerced down to 100. Note that
a call might return fewer results than the requested page
size.
page_token (str):
A page token, received from a previous
Expand Down Expand Up @@ -2878,9 +2862,9 @@ class ListPhraseSetsRequest(proto.Message):
page_size (int):
The maximum number of PhraseSets to return.
The service may return fewer than this value. If
unspecified, at most 20 PhraseSets will be
returned. The maximum value is 20; values above
20 will be coerced to 20.
unspecified, at most 5 PhraseSets will be
returned. The maximum value is 100; values above
100 will be coerced to 100.
page_token (str):
A page token, received from a previous
[ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
],
"language": "PYTHON",
"name": "google-cloud-speech",
"version": "2.19.0"
"version": "0.1.0"
},
"snippets": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
],
"language": "PYTHON",
"name": "google-cloud-speech",
"version": "2.19.0"
"version": "0.1.0"
},
"snippets": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
],
"language": "PYTHON",
"name": "google-cloud-speech",
"version": "2.19.0"
"version": "0.1.0"
},
"snippets": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def partition(
class speechCallTransformer(cst.CSTTransformer):
CTRL_PARAMS: Tuple[str] = ('retry', 'timeout', 'metadata')
METHOD_TO_PARAMS: Dict[str, Tuple[str]] = {
'batch_recognize': ('recognizer', 'config', 'config_mask', 'files', 'recognition_output_config', ),
'batch_recognize': ('recognizer', 'config', 'config_mask', 'files', 'recognition_output_config', 'processing_strategy', ),
'create_custom_class': ('custom_class', 'parent', 'validate_only', 'custom_class_id', ),
'create_phrase_set': ('phrase_set', 'parent', 'validate_only', 'phrase_set_id', ),
'create_recognizer': ('recognizer', 'parent', 'validate_only', 'recognizer_id', ),
Expand Down

0 comments on commit 49c356c

Please sign in to comment.