From ac39fe0ea1dd1967b909e708348ffb8076aea7a5 Mon Sep 17 00:00:00 2001
From: SDKAuto <sdkautomation@microsoft.com>
Date: Wed, 20 Sep 2023 17:53:08 +0000
Subject: [PATCH] CodeGen from PR 25734 in Azure/azure-rest-api-specs Azure
 OpenAI: minimal, partial specification for Whisper transcription/translation
 (#25734)

* minimalistic whisper .tsp definitions

* merge, format, remove client.tsp changes for mvp simplicity

* speculative example JSON update for string response types

* restore header traits for swagger hints

* review pass, prioritize object response for OpenAPI v2

* PR feedback: fully distinguish transcription/translation models
---
 .../azure/ai/openai/OpenAIAsyncClient.java    |   5 +-
 .../com/azure/ai/openai/OpenAIClient.java     |   5 +-
 .../ai/openai/models/AudioTranslation.java    | 119 ++++++++
 .../openai/models/AudioTranslationFormat.java |  65 +++++
 .../models/AudioTranslationOptions.java       |  46 +--
 .../models/AudioTranslationSegment.java       | 261 ++++++++++++++++++
 sdk/openai/azure-ai-openai/tsp-location.yaml  |   6 +-
 7 files changed, 477 insertions(+), 30 deletions(-)
 create mode 100644 sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslation.java
 create mode 100644 sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationFormat.java
 create mode 100644 sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationSegment.java
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIAsyncClient.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIAsyncClient.java
index b95e243439bdb..7866ef6a0513e 100644
--- a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIAsyncClient.java
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIAsyncClient.java
@@ -14,6 +14,7 @@
 import com.azure.ai.openai.models.AudioTranscription;
 import com.azure.ai.openai.models.AudioTranscriptionFormat;
 import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.ai.openai.models.AudioTranslation;
 import com.azure.ai.openai.models.AudioTranslationOptions;
 import com.azure.ai.openai.models.ChatCompletions;
 import com.azure.ai.openai.models.ChatCompletionsOptions;
@@ -1261,14 +1262,14 @@ public Mono<String> getAudioTranscriptionAsPlainText(
      */
     @Generated
     @ServiceMethod(returns = ReturnType.SINGLE)
-    public Mono<AudioTranscription> getAudioTranslationAsResponseObject(
+    public Mono<AudioTranslation> getAudioTranslationAsResponseObject(
             String deploymentOrModelName, AudioTranslationOptions audioTranslationOptions) {
         // Generated convenience method for getAudioTranslationAsResponseObjectWithResponse
         RequestOptions requestOptions = new RequestOptions();
         return getAudioTranslationAsResponseObjectWithResponse(
                         deploymentOrModelName, BinaryData.fromObject(audioTranslationOptions), requestOptions)
                 .flatMap(FluxUtil::toMono)
-                .map(protocolMethodData -> protocolMethodData.toObject(AudioTranscription.class));
+                .map(protocolMethodData -> protocolMethodData.toObject(AudioTranslation.class));
     }
 
     /**
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClient.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClient.java
index d38b041de3301..2bb1951d4d809 100644
--- a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClient.java
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClient.java
@@ -12,6 +12,7 @@
 import com.azure.ai.openai.models.AudioTranscription;
 import com.azure.ai.openai.models.AudioTranscriptionFormat;
 import com.azure.ai.openai.models.AudioTranscriptionOptions;
+import com.azure.ai.openai.models.AudioTranslation;
 import com.azure.ai.openai.models.AudioTranslationOptions;
 import com.azure.ai.openai.models.ChatCompletions;
 import com.azure.ai.openai.models.ChatCompletionsOptions;
@@ -1242,14 +1243,14 @@ public String getAudioTranscriptionAsPlainText(
      */
     @Generated
     @ServiceMethod(returns = ReturnType.SINGLE)
-    public AudioTranscription getAudioTranslationAsResponseObject(
+    public AudioTranslation getAudioTranslationAsResponseObject(
             String deploymentOrModelName, AudioTranslationOptions audioTranslationOptions) {
         // Generated convenience method for getAudioTranslationAsResponseObjectWithResponse
         RequestOptions requestOptions = new RequestOptions();
         return getAudioTranslationAsResponseObjectWithResponse(
                         deploymentOrModelName, BinaryData.fromObject(audioTranslationOptions), requestOptions)
                 .getValue()
-                .toObject(AudioTranscription.class);
+                .toObject(AudioTranslation.class);
     }
 
     /**
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslation.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslation.java
new file mode 100644
index 0000000000000..e428de21efdcc
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslation.java
@@ -0,0 +1,119 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) AutoRest Code Generator.
+
+package com.azure.ai.openai.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import java.time.Duration;
+import java.util.List;
+
+/** Result information for an operation that translated spoken audio into written text. */
+@Immutable
+public final class AudioTranslation {
+    /*
+     * The translated text for the provided audio data.
+     */
+    @Generated
+    @JsonProperty(value = "text")
+    private String text;
+
+    /*
+     * The label that describes which operation type generated the accompanying response data.
+     */
+    @Generated
+    @JsonProperty(value = "task")
+    private AudioTaskLabel task;
+
+    /*
+     * The spoken language that was detected in the translated audio data.
+     * This is expressed as a two-letter ISO-639-1 language code like 'en' or 'fr'.
+     */
+    @Generated
+    @JsonProperty(value = "language")
+    private String language;
+
+    /*
+     * The total duration of the audio processed to produce accompanying translation information.
+     */
+    @Generated
+    @JsonProperty(value = "duration")
+    private Double duration;
+
+    /*
+     * A collection of information about the timing, probabilities, and other detail of each processed audio segment.
+     */
+    @Generated
+    @JsonProperty(value = "segments")
+    private List<AudioTranslationSegment> segments;
+
+    /**
+     * Creates an instance of AudioTranslation class.
+     *
+     * @param text the text value to set.
+     */
+    @Generated
+    @JsonCreator
+    private AudioTranslation(@JsonProperty(value = "text") String text) {
+        this.text = text;
+    }
+
+    /**
+     * Get the text property: The translated text for the provided audio data.
+     *
+     * @return the text value.
+     */
+    @Generated
+    public String getText() {
+        return this.text;
+    }
+
+    /**
+     * Get the task property: The label that describes which operation type generated the accompanying response data.
+     *
+     * @return the task value.
+     */
+    @Generated
+    public AudioTaskLabel getTask() {
+        return this.task;
+    }
+
+    /**
+     * Get the language property: The spoken language that was detected in the translated audio data. This is expressed
+     * as a two-letter ISO-639-1 language code like 'en' or 'fr'.
+     *
+     * @return the language value.
+     */
+    @Generated
+    public String getLanguage() {
+        return this.language;
+    }
+
+    /**
+     * Get the duration property: The total duration of the audio processed to produce accompanying translation
+     * information.
+     *
+     * @return the duration value.
+     */
+    @Generated
+    public Duration getDuration() {
+        if (this.duration == null) {
+            return null;
+        }
+        return Duration.ofNanos((long) (this.duration * 1000_000_000L));
+    }
+
+    /**
+     * Get the segments property: A collection of information about the timing, probabilities, and other detail of each
+     * processed audio segment.
+     *
+     * @return the segments value.
+     */
+    @Generated
+    public List<AudioTranslationSegment> getSegments() {
+        return this.segments;
+    }
+}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationFormat.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationFormat.java
new file mode 100644
index 0000000000000..786fc9399f34e
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationFormat.java
@@ -0,0 +1,65 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) AutoRest Code Generator.
+
+package com.azure.ai.openai.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.ExpandableStringEnum;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import java.util.Collection;
+
+/** Defines available options for the underlying response format of output translation information. */
+public final class AudioTranslationFormat extends ExpandableStringEnum<AudioTranslationFormat> {
+    /** Use a response body that is a JSON object containing a single 'text' field for the translation. */
+    @Generated public static final AudioTranslationFormat JSON = fromString("json");
+
+    /**
+     * Use a response body that is a JSON object containing translation text along with timing, segments, and other
+     * metadata.
+     */
+    @Generated public static final AudioTranslationFormat VERBOSE_JSON = fromString("verbose_json");
+
+    /** Use a response body that is plain text containing the raw, unannotated translation. */
+    @Generated public static final AudioTranslationFormat TEXT = fromString("text");
+
+    /** Use a response body that is plain text in SubRip (SRT) format that also includes timing information. */
+    @Generated public static final AudioTranslationFormat SRT = fromString("srt");
+
+    /**
+     * Use a response body that is plain text in Web Video Text Tracks (VTT) format that also includes timing
+     * information.
+     */
+    @Generated public static final AudioTranslationFormat VTT = fromString("vtt");
+
+    /**
+     * Creates a new instance of AudioTranslationFormat value.
+     *
+     * @deprecated Use the {@link #fromString(String)} factory method.
+     */
+    @Generated
+    @Deprecated
+    public AudioTranslationFormat() {}
+
+    /**
+     * Creates or finds a AudioTranslationFormat from its string representation.
+     *
+     * @param name a name to look for.
+     * @return the corresponding AudioTranslationFormat.
+     */
+    @Generated
+    @JsonCreator
+    public static AudioTranslationFormat fromString(String name) {
+        return fromString(name, AudioTranslationFormat.class);
+    }
+
+    /**
+     * Gets known AudioTranslationFormat values.
+     *
+     * @return known AudioTranslationFormat values.
+     */
+    @Generated
+    public static Collection<AudioTranslationFormat> values() {
+        return values(AudioTranslationFormat.class);
+    }
+}
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationOptions.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationOptions.java
index 65f7b1f873ad0..8f883813827bf 100644
--- a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationOptions.java
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationOptions.java
@@ -14,7 +14,7 @@
 public final class AudioTranslationOptions {
 
     /*
-     * The audio data to transcribe. This must be the binary content of a file in one of the supported media formats:
+     * The audio data to translate. This must be the binary content of a file in one of the supported media formats:
      * flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm.
      */
     @Generated
@@ -22,12 +22,12 @@ public final class AudioTranslationOptions {
     private byte[] file;
 
     /*
-     * The requested format of the transcription response data, which will influence the content and detail of the
+     * The requested format of the translation response data, which will influence the content and detail of the
      * result.
      */
     @Generated
     @JsonProperty(value = "response_format")
-    private AudioTranscriptionFormat responseFormat;
+    private AudioTranslationFormat responseFormat;
 
     /*
      * An optional hint to guide the model's style or continue from a prior audio segment. The written language of the
@@ -49,7 +49,7 @@ public final class AudioTranslationOptions {
     private Double temperature;
 
     /*
-     * The model to use for this transcription request.
+     * The model to use for this translation request.
      */
     @Generated
     @JsonProperty(value = "model")
@@ -67,7 +67,7 @@ public AudioTranslationOptions(@JsonProperty(value = "file") byte[] file) {
     }
 
     /**
-     * Get the file property: The audio data to transcribe. This must be the binary content of a file in one of the
+     * Get the file property: The audio data to translate. This must be the binary content of a file in one of the
      * supported media formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm.
      *
      * @return the file value.
@@ -78,29 +78,16 @@ public byte[] getFile() {
     }
 
     /**
-     * Get the responseFormat property: The requested format of the transcription response data, which will influence
-     * the content and detail of the result.
+     * Get the responseFormat property: The requested format of the translation response data, which will influence the
+     * content and detail of the result.
      *
      * @return the responseFormat value.
      */
     @Generated
-    public AudioTranscriptionFormat getResponseFormat() {
+    public AudioTranslationFormat getResponseFormat() {
         return this.responseFormat;
     }
 
-    /**
-     * Set the responseFormat property: The requested format of the transcription response data, which will influence
-     * the content and detail of the result.
-     *
-     * @param responseFormat the responseFormat value to set.
-     * @return the AudioTranslationOptions object itself.
-     */
-    @Generated
-    public AudioTranslationOptions setResponseFormat(AudioTranscriptionFormat responseFormat) {
-        this.responseFormat = responseFormat;
-        return this;
-    }
-
     /**
      * Get the prompt property: An optional hint to guide the model's style or continue from a prior audio segment. The
      * written language of the prompt should match the primary spoken language of the audio data.
@@ -152,7 +139,7 @@ public AudioTranslationOptions setTemperature(Double temperature) {
     }
 
     /**
-     * Get the model property: The model to use for this transcription request.
+     * Get the model property: The model to use for this translation request.
      *
      * @return the model value.
      */
@@ -162,7 +149,7 @@ public String getModel() {
     }
 
     /**
-     * Set the model property: The model to use for this transcription request.
+     * Set the model property: The model to use for this translation request.
      *
      * @param model the model value to set.
      * @return the AudioTranslationOptions object itself.
@@ -172,4 +159,17 @@ public AudioTranslationOptions setModel(String model) {
         this.model = model;
         return this;
     }
+
+    /**
+     * Set the responseFormat property: The requested format of the translation response data, which will influence the
+     * content and detail of the result.
+     *
+     * @param responseFormat the responseFormat value to set.
+     * @return the AudioTranslationOptions object itself.
+     */
+    @Generated
+    public AudioTranslationOptions setResponseFormat(AudioTranslationFormat responseFormat) {
+        this.responseFormat = responseFormat;
+        return this;
+    }
 }
diff --git a/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationSegment.java b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationSegment.java
new file mode 100644
index 0000000000000..46cdb7ac89f64
--- /dev/null
+++ b/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/models/AudioTranslationSegment.java
@@ -0,0 +1,261 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) AutoRest Code Generator.
+
+package com.azure.ai.openai.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import java.time.Duration;
+import java.util.List;
+
+/**
+ * Extended information about a single segment of translated audio data. Segments generally represent roughly 5-10
+ * seconds of speech. Segment boundaries typically occur between words but not necessarily sentences.
+ */
+@Immutable
+public final class AudioTranslationSegment {
+    /*
+     * The 0-based index of this segment within a translation.
+     */
+    @Generated
+    @JsonProperty(value = "id")
+    private int id;
+
+    /*
+     * The time at which this segment started relative to the beginning of the translated audio.
+     */
+    @Generated
+    @JsonProperty(value = "start")
+    private double start;
+
+    /*
+     * The time at which this segment ended relative to the beginning of the translated audio.
+     */
+    @Generated
+    @JsonProperty(value = "end")
+    private double end;
+
+    /*
+     * The translated text that was part of this audio segment.
+     */
+    @Generated
+    @JsonProperty(value = "text")
+    private String text;
+
+    /*
+     * The temperature score associated with this audio segment.
+     */
+    @Generated
+    @JsonProperty(value = "temperature")
+    private double temperature;
+
+    /*
+     * The average log probability associated with this audio segment.
+     */
+    @Generated
+    @JsonProperty(value = "avg_logprob")
+    private double avgLogprob;
+
+    /*
+     * The compression ratio of this audio segment.
+     */
+    @Generated
+    @JsonProperty(value = "compression_ratio")
+    private double compressionRatio;
+
+    /*
+     * The probability of no speech detection within this audio segment.
+     */
+    @Generated
+    @JsonProperty(value = "no_speech_prob")
+    private double noSpeechProb;
+
+    /*
+     * The token IDs matching the translated text in this audio segment.
+     */
+    @Generated
+    @JsonProperty(value = "tokens")
+    private List<Integer> tokens;
+
+    /*
+     * The seek position associated with the processing of this audio segment.
+     * Seek positions are expressed as hundredths of seconds.
+     * The model may process several segments from a single seek position, so while the seek position will never
+     * represent
+     * a later time than the segment's start, the segment's start may represent a significantly later time than the
+     * segment's associated seek position.
+     */
+    @Generated
+    @JsonProperty(value = "seek")
+    private int seek;
+
+    /**
+     * Creates an instance of AudioTranslationSegment class.
+     *
+     * @param id the id value to set.
+     * @param start the start value to set.
+     * @param end the end value to set.
+     * @param text the text value to set.
+     * @param temperature the temperature value to set.
+     * @param avgLogprob the avgLogprob value to set.
+     * @param compressionRatio the compressionRatio value to set.
+     * @param noSpeechProb the noSpeechProb value to set.
+     * @param tokens the tokens value to set.
+     * @param seek the seek value to set.
+     */
+    @Generated
+    private AudioTranslationSegment(
+            int id,
+            Duration start,
+            Duration end,
+            String text,
+            double temperature,
+            double avgLogprob,
+            double compressionRatio,
+            double noSpeechProb,
+            List<Integer> tokens,
+            int seek) {
+        this.id = id;
+        this.start = (double) start.toNanos() / 1000_000_000L;
+        this.end = (double) end.toNanos() / 1000_000_000L;
+        this.text = text;
+        this.temperature = temperature;
+        this.avgLogprob = avgLogprob;
+        this.compressionRatio = compressionRatio;
+        this.noSpeechProb = noSpeechProb;
+        this.tokens = tokens;
+        this.seek = seek;
+    }
+
+    @Generated
+    @JsonCreator
+    private AudioTranslationSegment(
+            @JsonProperty(value = "id") int id,
+            @JsonProperty(value = "start") double start,
+            @JsonProperty(value = "end") double end,
+            @JsonProperty(value = "text") String text,
+            @JsonProperty(value = "temperature") double temperature,
+            @JsonProperty(value = "avg_logprob") double avgLogprob,
+            @JsonProperty(value = "compression_ratio") double compressionRatio,
+            @JsonProperty(value = "no_speech_prob") double noSpeechProb,
+            @JsonProperty(value = "tokens") List<Integer> tokens,
+            @JsonProperty(value = "seek") int seek) {
+        this(
+                id,
+                Duration.ofNanos((long) (start * 1000_000_000L)),
+                Duration.ofNanos((long) (end * 1000_000_000L)),
+                text,
+                temperature,
+                avgLogprob,
+                compressionRatio,
+                noSpeechProb,
+                tokens,
+                seek);
+    }
+
+    /**
+     * Get the id property: The 0-based index of this segment within a translation.
+     *
+     * @return the id value.
+     */
+    @Generated
+    public int getId() {
+        return this.id;
+    }
+
+    /**
+     * Get the start property: The time at which this segment started relative to the beginning of the translated audio.
+     *
+     * @return the start value.
+     */
+    @Generated
+    public Duration getStart() {
+        return Duration.ofNanos((long) (this.start * 1000_000_000L));
+    }
+
+    /**
+     * Get the end property: The time at which this segment ended relative to the beginning of the translated audio.
+     *
+     * @return the end value.
+     */
+    @Generated
+    public Duration getEnd() {
+        return Duration.ofNanos((long) (this.end * 1000_000_000L));
+    }
+
+    /**
+     * Get the text property: The translated text that was part of this audio segment.
+     *
+     * @return the text value.
+     */
+    @Generated
+    public String getText() {
+        return this.text;
+    }
+
+    /**
+     * Get the temperature property: The temperature score associated with this audio segment.
+     *
+     * @return the temperature value.
+     */
+    @Generated
+    public double getTemperature() {
+        return this.temperature;
+    }
+
+    /**
+     * Get the avgLogprob property: The average log probability associated with this audio segment.
+     *
+     * @return the avgLogprob value.
+     */
+    @Generated
+    public double getAvgLogprob() {
+        return this.avgLogprob;
+    }
+
+    /**
+     * Get the compressionRatio property: The compression ratio of this audio segment.
+     *
+     * @return the compressionRatio value.
+     */
+    @Generated
+    public double getCompressionRatio() {
+        return this.compressionRatio;
+    }
+
+    /**
+     * Get the noSpeechProb property: The probability of no speech detection within this audio segment.
+     *
+     * @return the noSpeechProb value.
+     */
+    @Generated
+    public double getNoSpeechProb() {
+        return this.noSpeechProb;
+    }
+
+    /**
+     * Get the tokens property: The token IDs matching the translated text in this audio segment.
+     *
+     * @return the tokens value.
+     */
+    @Generated
+    public List<Integer> getTokens() {
+        return this.tokens;
+    }
+
+    /**
+     * Get the seek property: The seek position associated with the processing of this audio segment. Seek positions are
+     * expressed as hundredths of seconds. The model may process several segments from a single seek position, so while
+     * the seek position will never represent a later time than the segment's start, the segment's start may represent a
+     * significantly later time than the segment's associated seek position.
+     *
+     * @return the seek value.
+     */
+    @Generated
+    public int getSeek() {
+        return this.seek;
+    }
+}
diff --git a/sdk/openai/azure-ai-openai/tsp-location.yaml b/sdk/openai/azure-ai-openai/tsp-location.yaml
index bc4052dd97e4d..376b199e9d8b8 100644
--- a/sdk/openai/azure-ai-openai/tsp-location.yaml
+++ b/sdk/openai/azure-ai-openai/tsp-location.yaml
@@ -1,5 +1,5 @@
 directory: specification/cognitiveservices/OpenAI.Inference
-additionalDirectories:
-    - specification/cognitiveservices/OpenAI.Authoring
-commit: dd2d1e8957ac6654272137e8d5874eacafd80a5f
 repo: Azure/azure-rest-api-specs
+commit: a66833cbdebb0574ba012f814ab271a382a7c500
+additionalDirectories: []
+