Skip to content

Commit

Permalink
[azopenai] Add in whisper (audio transcription and translation) suppo…
Browse files Browse the repository at this point in the history
…rt (Azure#21599)

- Support whisper/audio APIs + example
-  Fix issue with prompt_annotations being renamed to prompt_filter_results (but could be either).
  • Loading branch information
richardpark-msft authored Sep 22, 2023
1 parent 8db55a4 commit 0a1ed18
Show file tree
Hide file tree
Showing 35 changed files with 1,993 additions and 592 deletions.
4 changes: 2 additions & 2 deletions eng/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
},
{
"Name": "azopenai",
"CoverageGoal": 0.39
"CoverageGoal": 0.34
},
{
"Name": "aztemplate",
Expand Down Expand Up @@ -110,4 +110,4 @@
"CoverageGoal": 0.80
}
]
}
}
5 changes: 4 additions & 1 deletion sdk/ai/azopenai/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# Release History

## 0.2.1 (Unreleased)
## 0.3.0 (Unreleased)

### Features Added
- Support for Whisper audio APIs for transcription and translation using `GetAudioTranscription` and `GetAudioTranslation`.

### Breaking Changes
- ChatChoiceContentFilterResults content filtering fields are now all typed as ContentFilterResult, instead of unique types for each field.
- `PromptAnnotations` renamed to `PromptFilterResults` in `ChatCompletions` and `Completions`.

### Bugs Fixed

Expand Down
2 changes: 1 addition & 1 deletion sdk/ai/azopenai/assets.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
"AssetsRepo": "Azure/azure-sdk-assets",
"AssetsRepoPrefixPath": "go",
"TagPrefix": "go/ai/azopenai",
"Tag": "go/ai/azopenai_7be6ae3c15"
"Tag": "go/ai/azopenai_5ce13f37c4"
}
155 changes: 125 additions & 30 deletions sdk/ai/azopenai/autorest.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ directive:
transform: $["$ref"] = "#/components/schemas/State"; delete $.allOf;
- from: openapi-document
where: $.components.schemas["ContentFilterResult"].properties.severity
transform: $["$ref"] = "#/components/schemas/ContentFilterSeverity"; delete $.allOf;
transform: $.$ref = $.allOf[0].$ref; delete $.allOf;
- from: openapi-document
where: $.components.schemas["ChatChoice"].properties.finish_reason
transform: $["$ref"] = "#/components/schemas/CompletionsFinishReason"; delete $.oneOf;
Expand All @@ -109,6 +109,102 @@ directive:
- from: openapi-document
where: $.components.schemas["AzureCognitiveSearchChatExtensionConfiguration"].properties.queryType
transform: $["$ref"] = "#/components/schemas/AzureCognitiveSearchQueryType"; delete $.allOf;
- from: openapi-document
where: $.components.schemas["ContentFilterResults"].properties.sexual
transform: $.$ref = $.allOf[0].$ref; delete $.allOf;
- from: openapi-document
where: $.components.schemas["ContentFilterResults"].properties.hate
transform: $.$ref = $.allOf[0].$ref; delete $.allOf;
- from: openapi-document
where: $.components.schemas["ContentFilterResults"].properties.self_harm
transform: $.$ref = $.allOf[0].$ref; delete $.allOf;
- from: openapi-document
where: $.components.schemas["ContentFilterResults"].properties.violence
transform: $.$ref = $.allOf[0].$ref; delete $.allOf;

#
# [BEGIN] Whisper
#

# the whisper operations are really long since they are a conglomeration of _all_ the
# possible return types.
- rename-operation:
from: getAudioTranscriptionAsPlainText_getAudioTranscriptionAsResponseObject
to: GetAudioTranscriptionInternal
- rename-operation:
from: getAudioTranslationAsPlainText_getAudioTranslationAsResponseObject
to: GetAudioTranslationInternal

# fixup the responses
- from: openapi-document
where: $.paths["/deployments/{deploymentId}/audio/transcriptions"]
transform: |
delete $.post.responses["200"].statusCode;
$.post.responses["200"].content["application/json"].schema["$ref"] = "#/components/schemas/AudioTranscription"; delete $.post.responses["200"].content["application/json"].schema.anyOf;
- from: openapi-document
where: $.paths["/deployments/{deploymentId}/audio/translations"]
transform: |
delete $.post.responses["200"].statusCode;
$.post.responses["200"].content["application/json"].schema["$ref"] = "#/components/schemas/AudioTranscription"; delete $.post.responses["200"].content["application/json"].schema.anyOf;
# hide the generated functions, in favor of our public wrappers.
- from:
- client.go
- models.go
- models_serde.go
- response_types.go
- options.go
where: $
transform: |
return $
.replace(/GetAudioTranscriptionInternal([^){ ]*)/g, "getAudioTranscriptionInternal$1")
.replace(/GetAudioTranslationInternal([^){ ]*)/g, "getAudioTranslationInternal$1");
# some multipart fixing
- from: client.go
where: $
transform: |
return $
.replace(/(func.*getAudio(?:Translation|Transcription)InternalCreateRequest\(.+?)options/g, "$1body")
.replace(/runtime\.SetMultipartFormData\(.+?\)/sg, "setMultipartFormData(req, file, *body)")
# response type parsing (can be text/plain _or_ JSON)
- from: client.go
where: $
transform: |
return $
.replace(/client\.getAudioTranscriptionInternalHandleResponse/g, "getAudioTranscriptionInternalHandleResponse")
.replace(/client\.getAudioTranslationInternalHandleResponse/g, "getAudioTranslationInternalHandleResponse")
# Whisper openapi3 generation: we have two oneOf that point to the same type.
# and we want to activate our multipart support in the generator.
- from: openapi-document
where: $.paths
transform: |
let makeMultipart = (item) => {
if (item["application/json"] == null) { return item; }
item["multipart/form-data"] = {
...item["application/json"]
};
delete item["application/json"];
}
makeMultipart($["/deployments/{deploymentId}/audio/transcriptions"].post.requestBody.content);
makeMultipart($["/deployments/{deploymentId}/audio/translations"].post.requestBody.content);
- from: openapi-document
where: $.components.schemas
transform: |
let fix = (v) => { if (v.allOf != null) { v.$ref = v.allOf[0].$ref; delete v.allOf; } };
fix($.AudioTranscriptionOptions.properties.response_format);
fix($.AudioTranscription.properties.task);
fix($.AudioTranslationOptions.properties.response_format);
fix($.AudioTranslation.properties.task);
#
# [END] Whisper
#

# Fix "AutoGenerated" models
- from: openapi-document
where: $.components.schemas["ChatCompletions"].properties.usage
Expand Down Expand Up @@ -155,13 +251,26 @@ directive:
- models_serde.go
- models.go
where: $
transform: return $.replace(/AzureCoreFoundations/g, "azureCoreFoundations");
- from:
- models_serde.go
- models.go
where: $
transform: return $.replace(/(?:\/\/.*\s)?func \(\w \*?(?:ErrorResponse|ErrorResponseError|InnerError|InnerErrorInnererror)\).*\{\s(?:.+\s)+\}\s/g, "");

transform: |
return $
// InnerError is actually a recursive type, no need for this innererrorinnererror type
.replace(/\/\/ AzureCoreFoundationsInnerErrorInnererror.+?\n}/s, "")
// also, remove its marshalling functions
.replace(/\/\/ (Unmarshal|Marshal)JSON implements[^\n]+?AzureCoreFoundationsInnerErrorInnererror.+?\n}/sg, "")
// Remove any references to the type and replace them with InnerError.
.replace(/Innererror \*(AzureCoreFoundationsInnerErrorInnererror|AzureCoreFoundationsErrorInnererror)/g, "InnerError *InnerError")
// Fix the marshallers/unmarshallers to use the right case.
.replace(/(a|c).Innererror/g, '$1.InnerError')
// We have two "inner error" types that are identical (ErrorInnerError and InnerError). Let's eliminate the one that's not actually directly referenced.
.replace(/\/\/azureCoreFoundationsInnerError.+?\n}/s, "")
//
// Fix the AzureCoreFoundation naming to match our style.
//
.replace(/AzureCoreFoundations/g, "")
- from: constants.go
where: $
transform: >-
Expand All @@ -185,15 +294,6 @@ directive:
return $
.replace(/runtime\.JoinPaths\(client.endpoint, urlPath\)/g, "client.formatURL(urlPath, getDeployment(body))");
# Some ImageGenerations hackery to represent the ImageLocation/ImagePayload polymorphism.
# - Remove the auto-generated ImageGenerationsDataItem.
# - Replace the ImageGenerations.Data type with []ImageGenerationDataItem
# - from: models.go
# where: $
# transform: |
# return $.replace(/type ImageGenerationsDataItem struct {[^}]+}/, "// ImageGenerationsDataItem represents an image URL or payload\ntype ImageGenerationsDataItem struct{\nImageLocation\nImagePayload\n}")
# $.replace(/(type ImageGenerations struct.+?)Data any/g, "$1Data []ImageGenerationsDataItem")

- from: models.go
where: $
transform: |
Expand Down Expand Up @@ -261,16 +361,6 @@ directive:
where: $
transform: return $.replace(/Logprobs/g, "LogProbs")

# delete ContentFilterResult in favor of our custom representation.
- from:
- models.go
- models_serde.go
where: $
transform: |
return $.replace(/\/\/ ContentFilterResult.+?\n}/s, "")
.replace(/\/\/ MarshalJSON implements the json.Marshaller interface for type ContentFilterResult.+?\n}/s, "")
.replace(/\/\/ UnmarshalJSON implements the json.Unmarshaller interface for type ContentFilterResult.+?\n}/s, "");
- from: constants.go
where: $
transform: return $.replace(/\/\/ PossibleazureOpenAIOperationStateValues returns.+?\n}/s, "");
Expand All @@ -295,14 +385,14 @@ directive:
where: $
transform: |
return $
.replace(/\/\/ The model name.*?Model \*string/sg, "// REQUIRED: Deployment specifies the name of the deployment (for Azure OpenAI) or model (for OpenAI) to use for this request.\nDeployment string");
.replace(/\/\/ The model.*?Model \*string/sg, "// REQUIRED: Deployment specifies the name of the deployment (for Azure OpenAI) or model (for OpenAI) to use for this request.\nDeployment string");
- from: models_serde.go
where: $
transform: |
return $
.replace(/populate\(objectMap, "model", (c|e).Model\)/g, 'populate(objectMap, "model", &$1.Deployment)')
.replace(/err = unpopulate\(val, "Model", &(c|e).Model\)/g, 'err = unpopulate(val, "Model", &$1.Deployment)');
.replace(/populate\(objectMap, "model", (c|e|a).Model\)/g, 'populate(objectMap, "model", &$1.Deployment)')
.replace(/err = unpopulate\(val, "Model", &(c|e|a).Model\)/g, 'err = unpopulate(val, "Model", &$1.Deployment)');
# Make the Azure extensions internal - we expose these through the GetChatCompletions*() functions
# and just treat which endpoint we use as an implementation detail.
Expand Down Expand Up @@ -344,4 +434,9 @@ directive:
return $.replace(
/(AzureChatExtensionTypeAzureCognitiveSearch AzureChatExtensionType)/,
"// AzureChatExtensionTypeAzureCognitiveSearch enables the use of an Azure Cognitive Search index with chat completions.\n// [AzureChatExtensionConfiguration.Parameter] should be of type [AzureCognitiveSearchChatExtensionConfiguration].\n$1");
# HACK: prompt_filter_results <-> prompt_annotations change
- from: models_serde.go
where: $
transform: return $.replace(/case "prompt_filter_results":/g, 'case "prompt_annotations":\nfallthrough\ncase "prompt_filter_results":')
```
15 changes: 9 additions & 6 deletions sdk/ai/azopenai/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,15 @@ stages:
# Azure OpenAI
AOAI_ENDPOINT: $(AOAI-ENDPOINT)
AOAI_API_KEY: $(AOAI-API-KEY)
AOAI_CHAT_COMPLETIONS_MODEL_DEPLOYMENT: $(AOAI-CHAT-COMPLETIONS-MODEL-DEPLOYMENT)
AOAI_COMPLETIONS_MODEL_DEPLOYMENT: $(AOAI-COMPLETIONS-MODEL-DEPLOYMENT)
AOAI_EMBEDDINGS_MODEL_DEPLOYMENT: $(AOAI-EMBEDDINGS-MODEL-DEPLOYMENT)
AOAI_CHAT_COMPLETIONS_MODEL: $(AOAI-CHAT-COMPLETIONS-MODEL-DEPLOYMENT)
AOAI_COMPLETIONS_MODEL: $(AOAI-COMPLETIONS-MODEL-DEPLOYMENT)
AOAI_EMBEDDINGS_MODEL: $(AOAI-EMBEDDINGS-MODEL-DEPLOYMENT)

# Azure OpenAI "Canary"
AOAI_COMPLETIONS_MODEL_DEPLOYMENT_CANARY: $(AOAI-COMPLETIONS-MODEL-DEPLOYMENT-CANARY)
AOAI_COMPLETIONS_MODEL_CANARY: $(AOAI-COMPLETIONS-MODEL-DEPLOYMENT-CANARY)
AOAI_API_KEY_CANARY: $(AOAI-API-KEY-CANARY)
AOAI_EMBEDDINGS_MODEL_DEPLOYMENT_CANARY: $(AOAI-EMBEDDINGS-MODEL-DEPLOYMENT-CANARY)
AOAI_CHAT_COMPLETIONS_MODEL_DEPLOYMENT_CANARY: $(AOAI-CHAT-COMPLETIONS-MODEL-DEPLOYMENT-CANARY)
AOAI_EMBEDDINGS_MODEL_CANARY: $(AOAI-EMBEDDINGS-MODEL-DEPLOYMENT-CANARY)
AOAI_CHAT_COMPLETIONS_MODEL_CANARY: $(AOAI-CHAT-COMPLETIONS-MODEL-DEPLOYMENT-CANARY)
AOAI_ENDPOINT_CANARY: $(AOAI-ENDPOINT-CANARY)

# OpenAI
Expand All @@ -61,3 +61,6 @@ stages:
COGNITIVE_SEARCH_API_INDEX: $(COGNITIVE-SEARCH-API-INDEX)
COGNITIVE_SEARCH_API_KEY: $(COGNITIVE-SEARCH-API-KEY)

AOAI_ENDPOINT_WHISPER: $(AOAI-ENDPOINT-WHISPER)
AOAI_API_KEY_WHISPER: $(AOAI-API-KEY-WHISPER)
AOAI_MODEL_WHISPER: $(AOAI-MODEL-WHISPER)
Loading

0 comments on commit 0a1ed18

Please sign in to comment.