Skip to content

Commit

Permalink
CodeGen from PR 25734 in Azure/azure-rest-api-specs
Browse files Browse the repository at this point in the history
Azure OpenAI: minimal, partial specification for Whisper transcription/translation (Azure#25734)

* minimalistic whisper .tsp definitions

* merge, format, remove client.tsp changes for mvp simplicity

* speculative example JSON update for string response types

* restore header traits for swagger hints

* review pass, prioritize object response for OpenAPI v2

* PR feedback: fully distinguish transcription/translation models
  • Loading branch information
SDKAuto committed Sep 20, 2023
1 parent 88aa872 commit ac39fe0
Show file tree
Hide file tree
Showing 7 changed files with 477 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import com.azure.ai.openai.models.AudioTranscription;
import com.azure.ai.openai.models.AudioTranscriptionFormat;
import com.azure.ai.openai.models.AudioTranscriptionOptions;
import com.azure.ai.openai.models.AudioTranslation;
import com.azure.ai.openai.models.AudioTranslationOptions;
import com.azure.ai.openai.models.ChatCompletions;
import com.azure.ai.openai.models.ChatCompletionsOptions;
Expand Down Expand Up @@ -1261,14 +1262,14 @@ public Mono<String> getAudioTranscriptionAsPlainText(
*/
@Generated
@ServiceMethod(returns = ReturnType.SINGLE)
public Mono<AudioTranscription> getAudioTranslationAsResponseObject(
public Mono<AudioTranslation> getAudioTranslationAsResponseObject(
String deploymentOrModelName, AudioTranslationOptions audioTranslationOptions) {
// Generated convenience method for getAudioTranslationAsResponseObjectWithResponse
RequestOptions requestOptions = new RequestOptions();
return getAudioTranslationAsResponseObjectWithResponse(
deploymentOrModelName, BinaryData.fromObject(audioTranslationOptions), requestOptions)
.flatMap(FluxUtil::toMono)
.map(protocolMethodData -> protocolMethodData.toObject(AudioTranscription.class));
.map(protocolMethodData -> protocolMethodData.toObject(AudioTranslation.class));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import com.azure.ai.openai.models.AudioTranscription;
import com.azure.ai.openai.models.AudioTranscriptionFormat;
import com.azure.ai.openai.models.AudioTranscriptionOptions;
import com.azure.ai.openai.models.AudioTranslation;
import com.azure.ai.openai.models.AudioTranslationOptions;
import com.azure.ai.openai.models.ChatCompletions;
import com.azure.ai.openai.models.ChatCompletionsOptions;
Expand Down Expand Up @@ -1242,14 +1243,14 @@ public String getAudioTranscriptionAsPlainText(
*/
@Generated
@ServiceMethod(returns = ReturnType.SINGLE)
public AudioTranscription getAudioTranslationAsResponseObject(
public AudioTranslation getAudioTranslationAsResponseObject(
String deploymentOrModelName, AudioTranslationOptions audioTranslationOptions) {
// Generated convenience method for getAudioTranslationAsResponseObjectWithResponse
RequestOptions requestOptions = new RequestOptions();
return getAudioTranslationAsResponseObjectWithResponse(
deploymentOrModelName, BinaryData.fromObject(audioTranslationOptions), requestOptions)
.getValue()
.toObject(AudioTranscription.class);
.toObject(AudioTranslation.class);
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// Code generated by Microsoft (R) AutoRest Code Generator.

package com.azure.ai.openai.models;

import com.azure.core.annotation.Generated;
import com.azure.core.annotation.Immutable;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.time.Duration;
import java.util.List;

/** Result information for an operation that translated spoken audio into written text. */
@Immutable
public final class AudioTranslation {
/*
* The translated text for the provided audio data.
*/
@Generated
@JsonProperty(value = "text")
private String text;

/*
* The label that describes which operation type generated the accompanying response data.
*/
@Generated
@JsonProperty(value = "task")
private AudioTaskLabel task;

/*
* The spoken language that was detected in the translated audio data.
* This is expressed as a two-letter ISO-639-1 language code like 'en' or 'fr'.
*/
@Generated
@JsonProperty(value = "language")
private String language;

/*
* The total duration of the audio processed to produce accompanying translation information.
*/
@Generated
@JsonProperty(value = "duration")
private Double duration;

/*
* A collection of information about the timing, probabilities, and other detail of each processed audio segment.
*/
@Generated
@JsonProperty(value = "segments")
private List<AudioTranslationSegment> segments;

/**
* Creates an instance of AudioTranslation class.
*
* @param text the text value to set.
*/
@Generated
@JsonCreator
private AudioTranslation(@JsonProperty(value = "text") String text) {
this.text = text;
}

/**
* Get the text property: The translated text for the provided audio data.
*
* @return the text value.
*/
@Generated
public String getText() {
return this.text;
}

/**
* Get the task property: The label that describes which operation type generated the accompanying response data.
*
* @return the task value.
*/
@Generated
public AudioTaskLabel getTask() {
return this.task;
}

/**
* Get the language property: The spoken language that was detected in the translated audio data. This is expressed
* as a two-letter ISO-639-1 language code like 'en' or 'fr'.
*
* @return the language value.
*/
@Generated
public String getLanguage() {
return this.language;
}

/**
* Get the duration property: The total duration of the audio processed to produce accompanying translation
* information.
*
* @return the duration value.
*/
@Generated
public Duration getDuration() {
if (this.duration == null) {
return null;
}
return Duration.ofNanos((long) (this.duration * 1000_000_000L));
}

/**
* Get the segments property: A collection of information about the timing, probabilities, and other detail of each
* processed audio segment.
*
* @return the segments value.
*/
@Generated
public List<AudioTranslationSegment> getSegments() {
return this.segments;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// Code generated by Microsoft (R) AutoRest Code Generator.

package com.azure.ai.openai.models;

import com.azure.core.annotation.Generated;
import com.azure.core.util.ExpandableStringEnum;
import com.fasterxml.jackson.annotation.JsonCreator;
import java.util.Collection;

/** Defines available options for the underlying response format of output translation information. */
public final class AudioTranslationFormat extends ExpandableStringEnum<AudioTranslationFormat> {
/** Use a response body that is a JSON object containing a single 'text' field for the translation. */
@Generated public static final AudioTranslationFormat JSON = fromString("json");

/**
* Use a response body that is a JSON object containing translation text along with timing, segments, and other
* metadata.
*/
@Generated public static final AudioTranslationFormat VERBOSE_JSON = fromString("verbose_json");

/** Use a response body that is plain text containing the raw, unannotated translation. */
@Generated public static final AudioTranslationFormat TEXT = fromString("text");

/** Use a response body that is plain text in SubRip (SRT) format that also includes timing information. */
@Generated public static final AudioTranslationFormat SRT = fromString("srt");

/**
* Use a response body that is plain text in Web Video Text Tracks (VTT) format that also includes timing
* information.
*/
@Generated public static final AudioTranslationFormat VTT = fromString("vtt");

/**
* Creates a new instance of AudioTranslationFormat value.
*
* @deprecated Use the {@link #fromString(String)} factory method.
*/
@Generated
@Deprecated
public AudioTranslationFormat() {}

/**
* Creates or finds a AudioTranslationFormat from its string representation.
*
* @param name a name to look for.
* @return the corresponding AudioTranslationFormat.
*/
@Generated
@JsonCreator
public static AudioTranslationFormat fromString(String name) {
return fromString(name, AudioTranslationFormat.class);
}

/**
* Gets known AudioTranslationFormat values.
*
* @return known AudioTranslationFormat values.
*/
@Generated
public static Collection<AudioTranslationFormat> values() {
return values(AudioTranslationFormat.class);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,20 @@
public final class AudioTranslationOptions {

/*
* The audio data to transcribe. This must be the binary content of a file in one of the supported media formats:
* The audio data to translate. This must be the binary content of a file in one of the supported media formats:
* flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm.
*/
@Generated
@JsonProperty(value = "file")
private byte[] file;

/*
* The requested format of the transcription response data, which will influence the content and detail of the
* The requested format of the translation response data, which will influence the content and detail of the
* result.
*/
@Generated
@JsonProperty(value = "response_format")
private AudioTranscriptionFormat responseFormat;
private AudioTranslationFormat responseFormat;

/*
* An optional hint to guide the model's style or continue from a prior audio segment. The written language of the
Expand All @@ -49,7 +49,7 @@ public final class AudioTranslationOptions {
private Double temperature;

/*
* The model to use for this transcription request.
* The model to use for this translation request.
*/
@Generated
@JsonProperty(value = "model")
Expand All @@ -67,7 +67,7 @@ public AudioTranslationOptions(@JsonProperty(value = "file") byte[] file) {
}

/**
* Get the file property: The audio data to transcribe. This must be the binary content of a file in one of the
* Get the file property: The audio data to translate. This must be the binary content of a file in one of the
* supported media formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm.
*
* @return the file value.
Expand All @@ -78,29 +78,16 @@ public byte[] getFile() {
}

/**
* Get the responseFormat property: The requested format of the transcription response data, which will influence
* the content and detail of the result.
* Get the responseFormat property: The requested format of the translation response data, which will influence the
* content and detail of the result.
*
* @return the responseFormat value.
*/
@Generated
public AudioTranscriptionFormat getResponseFormat() {
public AudioTranslationFormat getResponseFormat() {
return this.responseFormat;
}

/**
* Set the responseFormat property: The requested format of the transcription response data, which will influence
* the content and detail of the result.
*
* @param responseFormat the responseFormat value to set.
* @return the AudioTranslationOptions object itself.
*/
@Generated
public AudioTranslationOptions setResponseFormat(AudioTranscriptionFormat responseFormat) {
this.responseFormat = responseFormat;
return this;
}

/**
* Get the prompt property: An optional hint to guide the model's style or continue from a prior audio segment. The
* written language of the prompt should match the primary spoken language of the audio data.
Expand Down Expand Up @@ -152,7 +139,7 @@ public AudioTranslationOptions setTemperature(Double temperature) {
}

/**
* Get the model property: The model to use for this transcription request.
* Get the model property: The model to use for this translation request.
*
* @return the model value.
*/
Expand All @@ -162,7 +149,7 @@ public String getModel() {
}

/**
* Set the model property: The model to use for this transcription request.
* Set the model property: The model to use for this translation request.
*
* @param model the model value to set.
* @return the AudioTranslationOptions object itself.
Expand All @@ -172,4 +159,17 @@ public AudioTranslationOptions setModel(String model) {
this.model = model;
return this;
}

/**
* Set the responseFormat property: The requested format of the translation response data, which will influence the
* content and detail of the result.
*
* @param responseFormat the responseFormat value to set.
* @return the AudioTranslationOptions object itself.
*/
@Generated
public AudioTranslationOptions setResponseFormat(AudioTranslationFormat responseFormat) {
this.responseFormat = responseFormat;
return this;
}
}
Loading

0 comments on commit ac39fe0

Please sign in to comment.