feat(speech): update the api

#### speech:v1 The following keys were added: - schemas.Entry (Total Keys: 5) - schemas.RecognitionConfig.properties.transcriptNormalization.$ref (Total Keys: 1) - schemas.TranscriptNormalization (Total Keys: 4)
googleapis · Nov 29, 2023 · c29c5ea · c29c5ea
1 parent 569fe15
commit c29c5ea
Show file tree

Hide file tree

Showing 4 changed files with 64 additions and 7 deletions.
diff --git a/docs/dyn/speech_v1.speech.html b/docs/dyn/speech_v1.speech.html
@@ -200,6 +200,15 @@ <h3>Method Details</h3>
         ],
       },
     ],
+    &quot;transcriptNormalization&quot;: { # Transcription normalization configuration. Use transcription normalization to automatically replace parts of the transcript with phrases of your choosing. For StreamingRecognize, this normalization only applies to stable partial transcripts (stability &gt; 0.8) and final transcripts. # Optional. Use transcription normalization to automatically replace parts of the transcript with phrases of your choosing. For StreamingRecognize, this normalization only applies to stable partial transcripts (stability &gt; 0.8) and final transcripts.
+      &quot;entries&quot;: [ # A list of replacement entries. We will perform replacement with one entry at a time. For example, the second entry in [&quot;cat&quot; =&gt; &quot;dog&quot;, &quot;mountain cat&quot; =&gt; &quot;mountain dog&quot;] will never be applied because we will always process the first entry before it. At most 100 entries.
+        { # A single replacement configuration.
+          &quot;caseSensitive&quot;: True or False, # Whether the search is case sensitive.
+          &quot;replace&quot;: &quot;A String&quot;, # What to replace with. Max length is 100 characters.
+          &quot;search&quot;: &quot;A String&quot;, # What to replace. Max length is 100 characters.
+        },
+      ],
+    },
     &quot;useEnhanced&quot;: True or False, # Set to true to use an enhanced model for speech recognition. If `use_enhanced` is set to true and the `model` field is not set, then an appropriate enhanced model is chosen if an enhanced model exists for the audio. If `use_enhanced` is true and an enhanced version of the specified model does not exist, then the speech is recognized using the standard version of the specified model.
   },
   &quot;outputConfig&quot;: { # Specifies an optional destination for the recognition results. # Optional. Specifies an optional destination for the recognition results.
@@ -347,6 +356,15 @@ <h3>Method Details</h3>
         ],
       },
     ],
+    &quot;transcriptNormalization&quot;: { # Transcription normalization configuration. Use transcription normalization to automatically replace parts of the transcript with phrases of your choosing. For StreamingRecognize, this normalization only applies to stable partial transcripts (stability &gt; 0.8) and final transcripts. # Optional. Use transcription normalization to automatically replace parts of the transcript with phrases of your choosing. For StreamingRecognize, this normalization only applies to stable partial transcripts (stability &gt; 0.8) and final transcripts.
+      &quot;entries&quot;: [ # A list of replacement entries. We will perform replacement with one entry at a time. For example, the second entry in [&quot;cat&quot; =&gt; &quot;dog&quot;, &quot;mountain cat&quot; =&gt; &quot;mountain dog&quot;] will never be applied because we will always process the first entry before it. At most 100 entries.
+        { # A single replacement configuration.
+          &quot;caseSensitive&quot;: True or False, # Whether the search is case sensitive.
+          &quot;replace&quot;: &quot;A String&quot;, # What to replace with. Max length is 100 characters.
+          &quot;search&quot;: &quot;A String&quot;, # What to replace. Max length is 100 characters.
+        },
+      ],
+    },
     &quot;useEnhanced&quot;: True or False, # Set to true to use an enhanced model for speech recognition. If `use_enhanced` is set to true and the `model` field is not set, then an appropriate enhanced model is chosen if an enhanced model exists for the audio. If `use_enhanced` is true and an enhanced version of the specified model does not exist, then the speech is recognized using the standard version of the specified model.
   },
 }

diff --git a/docs/dyn/speech_v1p1beta1.speech.html b/docs/dyn/speech_v1p1beta1.speech.html
@@ -203,7 +203,7 @@ <h3>Method Details</h3>
         ],
       },
     ],
-    &quot;transcriptNormalization&quot;: { # Transcription normalization configuration. Use transcription normalization to automatically replace parts of the transcript with phrases of your choosing. For StreamingRecognize, this normalization only applies to stable partial transcripts (stability &gt; 0.8) and final transcripts. # Use transcription normalization to automatically replace parts of the transcript with phrases of your choosing. For StreamingRecognize, this normalization only applies to stable partial transcripts (stability &gt; 0.8) and final transcripts.
+    &quot;transcriptNormalization&quot;: { # Transcription normalization configuration. Use transcription normalization to automatically replace parts of the transcript with phrases of your choosing. For StreamingRecognize, this normalization only applies to stable partial transcripts (stability &gt; 0.8) and final transcripts. # Optional. Use transcription normalization to automatically replace parts of the transcript with phrases of your choosing. For StreamingRecognize, this normalization only applies to stable partial transcripts (stability &gt; 0.8) and final transcripts.
       &quot;entries&quot;: [ # A list of replacement entries. We will perform replacement with one entry at a time. For example, the second entry in [&quot;cat&quot; =&gt; &quot;dog&quot;, &quot;mountain cat&quot; =&gt; &quot;mountain dog&quot;] will never be applied because we will always process the first entry before it. At most 100 entries.
         { # A single replacement configuration.
           &quot;caseSensitive&quot;: True or False, # Whether the search is case sensitive.
@@ -362,7 +362,7 @@ <h3>Method Details</h3>
         ],
       },
     ],
-    &quot;transcriptNormalization&quot;: { # Transcription normalization configuration. Use transcription normalization to automatically replace parts of the transcript with phrases of your choosing. For StreamingRecognize, this normalization only applies to stable partial transcripts (stability &gt; 0.8) and final transcripts. # Use transcription normalization to automatically replace parts of the transcript with phrases of your choosing. For StreamingRecognize, this normalization only applies to stable partial transcripts (stability &gt; 0.8) and final transcripts.
+    &quot;transcriptNormalization&quot;: { # Transcription normalization configuration. Use transcription normalization to automatically replace parts of the transcript with phrases of your choosing. For StreamingRecognize, this normalization only applies to stable partial transcripts (stability &gt; 0.8) and final transcripts. # Optional. Use transcription normalization to automatically replace parts of the transcript with phrases of your choosing. For StreamingRecognize, this normalization only applies to stable partial transcripts (stability &gt; 0.8) and final transcripts.
       &quot;entries&quot;: [ # A list of replacement entries. We will perform replacement with one entry at a time. For example, the second entry in [&quot;cat&quot; =&gt; &quot;dog&quot;, &quot;mountain cat&quot; =&gt; &quot;mountain dog&quot;] will never be applied because we will always process the first entry before it. At most 100 entries.
         { # A single replacement configuration.
           &quot;caseSensitive&quot;: True or False, # Whether the search is case sensitive.

diff --git a/googleapiclient/discovery_cache/documents/speech.v1.json b/googleapiclient/discovery_cache/documents/speech.v1.json
@@ -524,7 +524,7 @@
       }
     }
   },
-  "revision": "20231024",
+  "revision": "20231115",
   "rootUrl": "https://speech.googleapis.com/",
   "schemas": {
     "ABNFGrammar": {
@@ -674,6 +674,25 @@
       "properties": {},
       "type": "object"
     },
+    "Entry": {
+      "description": "A single replacement configuration.",
+      "id": "Entry",
+      "properties": {
+        "caseSensitive": {
+          "description": "Whether the search is case sensitive.",
+          "type": "boolean"
+        },
+        "replace": {
+          "description": "What to replace with. Max length is 100 characters.",
+          "type": "string"
+        },
+        "search": {
+          "description": "What to replace. Max length is 100 characters.",
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
     "ListCustomClassesResponse": {
       "description": "Message returned to the client by the `ListCustomClasses` method.",
       "id": "ListCustomClassesResponse",
@@ -1024,6 +1043,7 @@
             "AMR_WB",
             "OGG_OPUS",
             "SPEEX_WITH_HEADER_BYTE",
+            "MP3",
             "WEBM_OPUS"
           ],
           "enumDescriptions": [
@@ -1035,7 +1055,8 @@
             "Adaptive Multi-Rate Wideband codec. `sample_rate_hertz` must be 16000.",
             "Opus encoded audio frames in Ogg container ([OggOpus](https://wiki.xiph.org/OggOpus)). `sample_rate_hertz` must be one of 8000, 12000, 16000, 24000, or 48000.",
             "Although the use of lossy encodings is not recommended, if a very low bitrate encoding is required, `OGG_OPUS` is highly preferred over Speex encoding. The [Speex](https://speex.org/) encoding supported by Cloud Speech API has a header byte in each block, as in MIME type `audio/x-speex-with-header-byte`. It is a variant of the RTP Speex encoding defined in [RFC 5574](https://tools.ietf.org/html/rfc5574). The stream is a sequence of blocks, one block per RTP packet. Each block starts with a byte containing the length of the block, in bytes, followed by one or more frames of Speex data, padded to an integral number of bytes (octets) as specified in RFC 5574. In other words, each RTP header is replaced with a single byte containing the block length. Only Speex wideband is supported. `sample_rate_hertz` must be 16000.",
-            "Opus encoded audio frames in WebM container ([OggOpus](https://wiki.xiph.org/OggOpus)). `sample_rate_hertz` must be one of 8000, 12000, 16000, 24000, or 48000."
+            "MP3 audio. MP3 encoding is a Beta feature and only available in v1p1beta1. Support all standard MP3 bitrates (which range from 32-320 kbps). When using this encoding, `sample_rate_hertz` has to match the sample rate of the file being used.",
+            "Opus encoded audio frames in WebM container ([WebM](https://www.webmproject.org/docs/container/)). `sample_rate_hertz` must be one of 8000, 12000, 16000, 24000, or 48000."
           ],
           "type": "string"
         },
@@ -1072,6 +1093,10 @@
           },
           "type": "array"
         },
+        "transcriptNormalization": {
+          "$ref": "TranscriptNormalization",
+          "description": "Optional. Use transcription normalization to automatically replace parts of the transcript with phrases of your choosing. For StreamingRecognize, this normalization only applies to stable partial transcripts (stability > 0.8) and final transcripts."
+        },
         "useEnhanced": {
           "description": "Set to true to use an enhanced model for speech recognition. If `use_enhanced` is set to true and the `model` field is not set, then an appropriate enhanced model is chosen if an enhanced model exists for the audio. If `use_enhanced` is true and an enhanced version of the specified model does not exist, then the speech is recognized using the standard version of the specified model.",
           "type": "boolean"
@@ -1398,6 +1423,20 @@
       },
       "type": "object"
     },
+    "TranscriptNormalization": {
+      "description": "Transcription normalization configuration. Use transcription normalization to automatically replace parts of the transcript with phrases of your choosing. For StreamingRecognize, this normalization only applies to stable partial transcripts (stability > 0.8) and final transcripts.",
+      "id": "TranscriptNormalization",
+      "properties": {
+        "entries": {
+          "description": "A list of replacement entries. We will perform replacement with one entry at a time. For example, the second entry in [\"cat\" => \"dog\", \"mountain cat\" => \"mountain dog\"] will never be applied because we will always process the first entry before it. At most 100 entries.",
+          "items": {
+            "$ref": "Entry"
+          },
+          "type": "array"
+        }
+      },
+      "type": "object"
+    },
     "TranscriptOutputConfig": {
       "description": "Specifies an optional destination for the recognition results.",
       "id": "TranscriptOutputConfig",

diff --git a/googleapiclient/discovery_cache/documents/speech.v1p1beta1.json b/googleapiclient/discovery_cache/documents/speech.v1p1beta1.json
@@ -524,7 +524,7 @@
       }
     }
   },
-  "revision": "20231024",
+  "revision": "20231115",
   "rootUrl": "https://speech.googleapis.com/",
   "schemas": {
     "ABNFGrammar": {
@@ -1072,7 +1072,7 @@
             "Opus encoded audio frames in Ogg container ([OggOpus](https://wiki.xiph.org/OggOpus)). `sample_rate_hertz` must be one of 8000, 12000, 16000, 24000, or 48000.",
             "Although the use of lossy encodings is not recommended, if a very low bitrate encoding is required, `OGG_OPUS` is highly preferred over Speex encoding. The [Speex](https://speex.org/) encoding supported by Cloud Speech API has a header byte in each block, as in MIME type `audio/x-speex-with-header-byte`. It is a variant of the RTP Speex encoding defined in [RFC 5574](https://tools.ietf.org/html/rfc5574). The stream is a sequence of blocks, one block per RTP packet. Each block starts with a byte containing the length of the block, in bytes, followed by one or more frames of Speex data, padded to an integral number of bytes (octets) as specified in RFC 5574. In other words, each RTP header is replaced with a single byte containing the block length. Only Speex wideband is supported. `sample_rate_hertz` must be 16000.",
             "MP3 audio. MP3 encoding is a Beta feature and only available in v1p1beta1. Support all standard MP3 bitrates (which range from 32-320 kbps). When using this encoding, `sample_rate_hertz` has to match the sample rate of the file being used.",
-            "Opus encoded audio frames in WebM container ([OggOpus](https://wiki.xiph.org/OggOpus)). `sample_rate_hertz` must be one of 8000, 12000, 16000, 24000, or 48000."
+            "Opus encoded audio frames in WebM container ([WebM](https://www.webmproject.org/docs/container/)). `sample_rate_hertz` must be one of 8000, 12000, 16000, 24000, or 48000."
           ],
           "type": "string"
         },
@@ -1111,7 +1111,7 @@
         },
         "transcriptNormalization": {
           "$ref": "TranscriptNormalization",
-          "description": "Use transcription normalization to automatically replace parts of the transcript with phrases of your choosing. For StreamingRecognize, this normalization only applies to stable partial transcripts (stability > 0.8) and final transcripts."
+          "description": "Optional. Use transcription normalization to automatically replace parts of the transcript with phrases of your choosing. For StreamingRecognize, this normalization only applies to stable partial transcripts (stability > 0.8) and final transcripts."
         },
         "useEnhanced": {
           "description": "Set to true to use an enhanced model for speech recognition. If `use_enhanced` is set to true and the `model` field is not set, then an appropriate enhanced model is chosen if an enhanced model exists for the audio. If `use_enhanced` is true and an enhanced version of the specified model does not exist, then the speech is recognized using the standard version of the specified model.",