CodeGen from PR 25734 in Azure/azure-rest-api-specs

Azure OpenAI: minimal, partial specification for Whisper transcription/translation (Azure#25734) * minimalistic whisper .tsp definitions * merge, format, remove client.tsp changes for mvp simplicity * speculative example JSON update for string response types * restore header traits for swagger hints * review pass, prioritize object response for OpenAPI v2 * PR feedback: fully distinguish transcription/translation models
azure-sdk · Sep 20, 2023 · ac39fe0 · ac39fe0
1 parent 88aa872
commit ac39fe0
Show file tree

Hide file tree

Showing 7 changed files with 477 additions and 30 deletions.
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIAsyncClient.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIAsyncClient.java
@@ -14,6 +14,7 @@
 import com.azure.ai.openai.models.AudioTranscription;
 import com.azure.ai.openai.models.AudioTranscriptionFormat;
 import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.ai.openai.models.AudioTranslation;
 import com.azure.ai.openai.models.AudioTranslationOptions;
 import com.azure.ai.openai.models.ChatCompletions;
 import com.azure.ai.openai.models.ChatCompletionsOptions;
@@ -1261,14 +1262,14 @@ public Mono<String> getAudioTranscriptionAsPlainText(
      */
     @Generated
     @ServiceMethod(returns = ReturnType.SINGLE)
-    public Mono<AudioTranscription> getAudioTranslationAsResponseObject(
+    public Mono<AudioTranslation> getAudioTranslationAsResponseObject(
             String deploymentOrModelName, AudioTranslationOptions audioTranslationOptions) {
         // Generated convenience method for getAudioTranslationAsResponseObjectWithResponse
         RequestOptions requestOptions = new RequestOptions();
         return getAudioTranslationAsResponseObjectWithResponse(
                         deploymentOrModelName, BinaryData.fromObject(audioTranslationOptions), requestOptions)
                 .flatMap(FluxUtil::toMono)
-                .map(protocolMethodData -> protocolMethodData.toObject(AudioTranscription.class));
+                .map(protocolMethodData -> protocolMethodData.toObject(AudioTranslation.class));
     }
 
     /**

diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClient.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClient.java
@@ -12,6 +12,7 @@
 import com.azure.ai.openai.models.AudioTranscription;
 import com.azure.ai.openai.models.AudioTranscriptionFormat;
 import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.ai.openai.models.AudioTranslation;
 import com.azure.ai.openai.models.AudioTranslationOptions;
 import com.azure.ai.openai.models.ChatCompletions;
 import com.azure.ai.openai.models.ChatCompletionsOptions;
@@ -1242,14 +1243,14 @@ public String getAudioTranscriptionAsPlainText(
      */
     @Generated
     @ServiceMethod(returns = ReturnType.SINGLE)
-    public AudioTranscription getAudioTranslationAsResponseObject(
+    public AudioTranslation getAudioTranslationAsResponseObject(
             String deploymentOrModelName, AudioTranslationOptions audioTranslationOptions) {
         // Generated convenience method for getAudioTranslationAsResponseObjectWithResponse
         RequestOptions requestOptions = new RequestOptions();
         return getAudioTranslationAsResponseObjectWithResponse(
                         deploymentOrModelName, BinaryData.fromObject(audioTranslationOptions), requestOptions)
                 .getValue()
-                .toObject(AudioTranscription.class);
+                .toObject(AudioTranslation.class);
     }
 
     /**

diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslation.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslation.java
@@ -0,0 +1,119 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) AutoRest Code Generator.
+
+package com.azure.ai.openai.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import java.time.Duration;
+import java.util.List;
+
+/** Result information for an operation that translated spoken audio into written text. */
+@Immutable
+public final class AudioTranslation {
+    /*
+     * The translated text for the provided audio data.
+     */
+    @Generated
+    @JsonProperty(value = "text")
+    private String text;
+
+    /*
+     * The label that describes which operation type generated the accompanying response data.
+     */
+    @Generated
+    @JsonProperty(value = "task")
+    private AudioTaskLabel task;
+
+    /*
+     * The spoken language that was detected in the translated audio data.
+     * This is expressed as a two-letter ISO-639-1 language code like 'en' or 'fr'.
+     */
+    @Generated
+    @JsonProperty(value = "language")
+    private String language;
+
+    /*
+     * The total duration of the audio processed to produce accompanying translation information.
+     */
+    @Generated
+    @JsonProperty(value = "duration")
+    private Double duration;
+
+    /*
+     * A collection of information about the timing, probabilities, and other detail of each processed audio segment.
+     */
+    @Generated
+    @JsonProperty(value = "segments")
+    private List<AudioTranslationSegment> segments;
+
+    /**
+     * Creates an instance of AudioTranslation class.
+     *
+     * @param text the text value to set.
+     */
+    @Generated
+    @JsonCreator
+    private AudioTranslation(@JsonProperty(value = "text") String text) {
+        this.text = text;
+    }
+
+    /**
+     * Get the text property: The translated text for the provided audio data.
+     *
+     * @return the text value.
+     */
+    @Generated
+    public String getText() {
+        return this.text;
+    }
+
+    /**
+     * Get the task property: The label that describes which operation type generated the accompanying response data.
+     *
+     * @return the task value.
+     */
+    @Generated
+    public AudioTaskLabel getTask() {
+        return this.task;
+    }
+
+    /**
+     * Get the language property: The spoken language that was detected in the translated audio data. This is expressed
+     * as a two-letter ISO-639-1 language code like 'en' or 'fr'.
+     *
+     * @return the language value.
+     */
+    @Generated
+    public String getLanguage() {
+        return this.language;
+    }
+
+    /**
+     * Get the duration property: The total duration of the audio processed to produce accompanying translation
+     * information.
+     *
+     * @return the duration value.
+     */
+    @Generated
+    public Duration getDuration() {
+        if (this.duration == null) {
+            return null;
+        }
+        return Duration.ofNanos((long) (this.duration * 1000_000_000L));
+    }
+
+    /**
+     * Get the segments property: A collection of information about the timing, probabilities, and other detail of each
+     * processed audio segment.
+     *
+     * @return the segments value.
+     */
+    @Generated
+    public List<AudioTranslationSegment> getSegments() {
+        return this.segments;
+    }
+}
diff --git a/...enai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationFormat.java b/...enai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationFormat.java
@@ -0,0 +1,65 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) AutoRest Code Generator.
+
+package com.azure.ai.openai.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.ExpandableStringEnum;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import java.util.Collection;
+
+/** Defines available options for the underlying response format of output translation information. */
+public final class AudioTranslationFormat extends ExpandableStringEnum<AudioTranslationFormat> {
+    /** Use a response body that is a JSON object containing a single 'text' field for the translation. */
+    @Generated public static final AudioTranslationFormat JSON = fromString("json");
+
+    /**
+     * Use a response body that is a JSON object containing translation text along with timing, segments, and other
+     * metadata.
+     */
+    @Generated public static final AudioTranslationFormat VERBOSE_JSON = fromString("verbose_json");
+
+    /** Use a response body that is plain text containing the raw, unannotated translation. */
+    @Generated public static final AudioTranslationFormat TEXT = fromString("text");
+
+    /** Use a response body that is plain text in SubRip (SRT) format that also includes timing information. */
+    @Generated public static final AudioTranslationFormat SRT = fromString("srt");
+
+    /**
+     * Use a response body that is plain text in Web Video Text Tracks (VTT) format that also includes timing
+     * information.
+     */
+    @Generated public static final AudioTranslationFormat VTT = fromString("vtt");
+
+    /**
+     * Creates a new instance of AudioTranslationFormat value.
+     *
+     * @deprecated Use the {@link #fromString(String)} factory method.
+     */
+    @Generated
+    @Deprecated
+    public AudioTranslationFormat() {}
+
+    /**
+     * Creates or finds a AudioTranslationFormat from its string representation.
+     *
+     * @param name a name to look for.
+     * @return the corresponding AudioTranslationFormat.
+     */
+    @Generated
+    @JsonCreator
+    public static AudioTranslationFormat fromString(String name) {
+        return fromString(name, AudioTranslationFormat.class);
+    }
+
+    /**
+     * Gets known AudioTranslationFormat values.
+     *
+     * @return known AudioTranslationFormat values.
+     */
+    @Generated
+    public static Collection<AudioTranslationFormat> values() {
+        return values(AudioTranslationFormat.class);
+    }
+}
diff --git a/...nai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationOptions.java b/...nai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationOptions.java
@@ -14,20 +14,20 @@
 public final class AudioTranslationOptions {
 
     /*
-     * The audio data to transcribe. This must be the binary content of a file in one of the supported media formats:
+     * The audio data to translate. This must be the binary content of a file in one of the supported media formats:
      * flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm.
      */
     @Generated
     @JsonProperty(value = "file")
     private byte[] file;
 
     /*
-     * The requested format of the transcription response data, which will influence the content and detail of the
+     * The requested format of the translation response data, which will influence the content and detail of the
      * result.
      */
     @Generated
     @JsonProperty(value = "response_format")
-    private AudioTranscriptionFormat responseFormat;
+    private AudioTranslationFormat responseFormat;
 
     /*
      * An optional hint to guide the model's style or continue from a prior audio segment. The written language of the
@@ -49,7 +49,7 @@ public final class AudioTranslationOptions {
     private Double temperature;
 
     /*
-     * The model to use for this transcription request.
+     * The model to use for this translation request.
      */
     @Generated
     @JsonProperty(value = "model")
@@ -67,7 +67,7 @@ public AudioTranslationOptions(@JsonProperty(value = "file") byte[] file) {
     }
 
     /**
-     * Get the file property: The audio data to transcribe. This must be the binary content of a file in one of the
+     * Get the file property: The audio data to translate. This must be the binary content of a file in one of the
      * supported media formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm.
      *
      * @return the file value.
@@ -78,29 +78,16 @@ public byte[] getFile() {
     }
 
     /**
-     * Get the responseFormat property: The requested format of the transcription response data, which will influence
-     * the content and detail of the result.
+     * Get the responseFormat property: The requested format of the translation response data, which will influence the
+     * content and detail of the result.
      *
      * @return the responseFormat value.
      */
     @Generated
-    public AudioTranscriptionFormat getResponseFormat() {
+    public AudioTranslationFormat getResponseFormat() {
         return this.responseFormat;
     }
 
-    /**
-     * Set the responseFormat property: The requested format of the transcription response data, which will influence
-     * the content and detail of the result.
-     *
-     * @param responseFormat the responseFormat value to set.
-     * @return the AudioTranslationOptions object itself.
-     */
-    @Generated
-    public AudioTranslationOptions setResponseFormat(AudioTranscriptionFormat responseFormat) {
-        this.responseFormat = responseFormat;
-        return this;
-    }
-
     /**
      * Get the prompt property: An optional hint to guide the model's style or continue from a prior audio segment. The
      * written language of the prompt should match the primary spoken language of the audio data.
@@ -152,7 +139,7 @@ public AudioTranslationOptions setTemperature(Double temperature) {
     }
 
     /**
-     * Get the model property: The model to use for this transcription request.
+     * Get the model property: The model to use for this translation request.
      *
      * @return the model value.
      */
@@ -162,7 +149,7 @@ public String getModel() {
     }
 
     /**
-     * Set the model property: The model to use for this transcription request.
+     * Set the model property: The model to use for this translation request.
      *
      * @param model the model value to set.
      * @return the AudioTranslationOptions object itself.
@@ -172,4 +159,17 @@ public AudioTranslationOptions setModel(String model) {
         this.model = model;
         return this;
     }
+
+    /**
+     * Set the responseFormat property: The requested format of the translation response data, which will influence the
+     * content and detail of the result.
+     *
+     * @param responseFormat the responseFormat value to set.
+     * @return the AudioTranslationOptions object itself.
+     */
+    @Generated
+    public AudioTranslationOptions setResponseFormat(AudioTranslationFormat responseFormat) {
+        this.responseFormat = responseFormat;
+        return this;
+    }
 }