Add vision (#30)

SwiftBeta · Dec 23, 2023 · f92ea81 · f92ea81
1 parent 8480033
commit f92ea81
Show file tree

Hide file tree

Showing 5 changed files with 166 additions and 7 deletions.
diff --git a/Sources/SwiftOpenAI/OpenAI/DataModels/Message/MessageChatImageInput.swift b/Sources/SwiftOpenAI/OpenAI/DataModels/Message/MessageChatImageInput.swift
@@ -0,0 +1,15 @@
+import Foundation
+
+public struct MessageChatImageInput: Identifiable, Hashable {
+    public var id: UUID
+    public var text: String
+    public var imageURL: String
+    public var role: MessageRoleType
+
+    public init(text: String, imageURL: String, role: MessageRoleType) {
+        self.id = UUID()
+        self.text = text
+        self.role = role
+        self.imageURL = imageURL
+    }
+}
diff --git a/Sources/SwiftOpenAI/OpenAI/OpenAIEndpoints/List/ChatCompletionsImageInputEndpoint.swift b/Sources/SwiftOpenAI/OpenAI/OpenAIEndpoints/List/ChatCompletionsImageInputEndpoint.swift
@@ -0,0 +1,46 @@
+import Foundation
+
+struct ChatCompletionsImageInputEndpoint: Endpoint {
+    private let model: OpenAIModelType
+    private var messages: [[String: Any]] = []
+
+    private let optionalParameters: ChatCompletionsOptionalParameters?
+
+    var method: HTTPMethod {
+        .POST
+    }
+
+    var path: String = "chat/completions"
+
+    init(model: OpenAIModelType,
+         messages: [MessageChatImageInput],
+         optionalParameters: ChatCompletionsOptionalParameters?) {
+        self.model = model
+        self.messages = Self.mapMessageModelToDictionary(messages: messages)
+        self.optionalParameters = optionalParameters
+    }
+
+    var parameters: [String: Any]? {
+        ["model": self.model.name as Any,
+         "messages": self.messages as Any,
+         "temperature": self.optionalParameters?.temperature as Any,
+         "top_p": self.optionalParameters?.topP as Any,
+         "n": self.optionalParameters?.n as Any,
+         "stop": self.optionalParameters?.stop as Any,
+         "stream": self.optionalParameters?.stream as Any,
+         "max_tokens": self.optionalParameters?.maxTokens as Any]
+    }
+
+    private static func mapMessageModelToDictionary(messages: [MessageChatImageInput]) -> [[String: Any]] {
+        return messages.map { message in
+            var contentArray: [[String: Any]] = []
+            contentArray.append(["type": "text", "text": message.text])
+
+            if !message.imageURL.isEmpty {
+                contentArray.append(["type": "image_url", "image_url": ["url": message.imageURL]])
+            }
+
+            return ["role": message.role.rawValue, "content": contentArray]
+        }
+    }
+}
diff --git a/Sources/SwiftOpenAI/OpenAI/OpenAIEndpoints/OpenAIEndpoints.swift b/Sources/SwiftOpenAI/OpenAI/OpenAIEndpoints/OpenAIEndpoints.swift
@@ -2,34 +2,43 @@ import Foundation
 
 enum OpenAIEndpoints {
     case listModels
-
+    
     case completions(model: OpenAIModelType,
                      optionalParameters: CompletionsOptionalParameters?)
-
+    
     case chatCompletions(model: OpenAIModelType,
                          messages: [MessageChatGPT],
                          optionalParameters: ChatCompletionsOptionalParameters?)
-
+
+    case chatCompletionsWithImageInput(model: OpenAIModelType,
+                                       messages: [MessageChatImageInput],
+                                       optionalParameters: ChatCompletionsOptionalParameters?)
+
     case createImage(model: OpenAIImageModelType,
                      prompt: String,
                      numberOfImages: Int,
                      size: ImageSize)
-
+    
     case embeddings(model: OpenAIModelType, input: String)
-
+    
     case moderations(input: String)
-
+    
     case createSpeech(model: OpenAITTSModelType, input: String, voice: OpenAIVoiceType, responseFormat: OpenAIAudioResponseType, speed: Double)
 
     case createTranscription(file: Data, model: OpenAITranscriptionModelType, language: String, prompt: String, responseFormat: OpenAIAudioResponseType, temperature: Double)
-
+    
     public var endpoint: Endpoint {
         switch self {
         case .listModels:
             return ListModelsEndpoint()
         case .completions(model: let model, optionalParameters: let optionalParameters):
             return CompletionsEndpoint(model: model,
                                        optionalParameters: optionalParameters)
+
+        case .chatCompletionsWithImageInput(model: let model, messages: let messages, optionalParameters: let optionalParameters):
+            return ChatCompletionsImageInputEndpoint(model: model,
+                                                     messages: messages,
+                                                     optionalParameters: optionalParameters)
         case .chatCompletions(let model, let messages, let optionalParameters):
             return ChatCompletionsEndpoint(model: model,
                                            messages: messages,

diff --git a/.../SwiftOpenAI/OpenAI/Requests/ChatCompletions/CreateChatCompletionsImageInputRequest.swift b/.../SwiftOpenAI/OpenAI/Requests/ChatCompletions/CreateChatCompletionsImageInputRequest.swift
@@ -0,0 +1,44 @@
+import Foundation
+import Foundation
+
+protocol CreateChatCompletionsImageInputRequestProtocol {
+    func execute(api: API,
+                 apiKey: String,
+                 model: OpenAIModelType,
+                 messages: [MessageChatImageInput],
+                 optionalParameters: ChatCompletionsOptionalParameters?) async throws -> ChatCompletionsDataModel?
+}
+
+final public class CreateChatCompletionsImageInputRequest: CreateChatCompletionsImageInputRequestProtocol {
+    public typealias Init = (_ api: API,
+                             _ apiKey: String,
+                             _ model: OpenAIModelType,
+                             _ messages: [MessageChatImageInput],
+                             _ optionalParameters: ChatCompletionsOptionalParameters?) async throws -> ChatCompletionsDataModel?
+
+    public init() { }
+
+    public func execute(api: API,
+                        apiKey: String,
+                        model: OpenAIModelType,
+                        messages: [MessageChatImageInput],
+                        optionalParameters: ChatCompletionsOptionalParameters?) async throws -> ChatCompletionsDataModel? {
+        var endpoint = OpenAIEndpoints.chatCompletionsWithImageInput(model: model, messages: messages, optionalParameters: optionalParameters).endpoint
+        api.routeEndpoint(&endpoint, environment: OpenAIEnvironmentV1())
+
+        var urlRequest = api.buildURLRequest(endpoint: endpoint)
+        api.addHeaders(urlRequest: &urlRequest,
+                       headers: ["Content-Type": "application/json",
+                                 "Authorization": "Bearer \(apiKey)"])
+
+        let result = await api.execute(with: urlRequest)
+
+        let jsonDecoder = JSONDecoder()
+        jsonDecoder.keyDecodingStrategy = .convertFromSnakeCase
+
+        return try api.parse(result,
+                             type: ChatCompletionsDataModel.self,
+                             jsonDecoder: jsonDecoder,
+                             errorType: OpenAIAPIError.self)
+    }
+}
diff --git a/Sources/SwiftOpenAI/OpenAI/SwiftOpenAI.swift b/Sources/SwiftOpenAI/OpenAI/SwiftOpenAI.swift
@@ -9,6 +9,10 @@ protocol OpenAIProtocol {
     func createChatCompletions(model: OpenAIModelType,
                                messages: [MessageChatGPT],
                                optionalParameters: ChatCompletionsOptionalParameters?) async throws -> ChatCompletionsDataModel?
+
+    func createChatCompletionsWithImageInput(model: OpenAIModelType,
+                               messages: [MessageChatImageInput],
+                               optionalParameters: ChatCompletionsOptionalParameters?) async throws -> ChatCompletionsDataModel?
 
     func createChatCompletionsStream(model: OpenAIModelType,
                                      messages: [MessageChatGPT],
@@ -34,6 +38,7 @@ public class SwiftOpenAI: OpenAIProtocol {
     private let listModelsRequest: ListModelsRequest.Init
     private let completionsRequest: CompletionsRequest.Init
     private let createChatCompletionsRequest: CreateChatCompletionsRequest.Init
+    private let createChatCompletionsImageInputRequest: CreateChatCompletionsImageInputRequest.Init
     private let createChatCompletionsStreamRequest: CreateChatCompletionsStreamRequest.Init
     private let createImagesRequest: CreateImagesRequest.Init
     private let embeddingsRequest: EmbeddingsRequest.Init
@@ -46,6 +51,7 @@ public class SwiftOpenAI: OpenAIProtocol {
                 listModelsRequest: @escaping ListModelsRequest.Init = ListModelsRequest().execute,
                 completionsRequest: @escaping CompletionsRequest.Init = CompletionsRequest().execute,
                 createChatCompletionsRequest: @escaping CreateChatCompletionsRequest.Init = CreateChatCompletionsRequest().execute,
+                createChatCompletionsImageInputRequest: @escaping CreateChatCompletionsImageInputRequest.Init = CreateChatCompletionsImageInputRequest().execute,
                 createChatCompletionsStreamRequest: @escaping CreateChatCompletionsStreamRequest.Init = CreateChatCompletionsStreamRequest().execute,
                 createImagesRequest: @escaping CreateImagesRequest.Init = CreateImagesRequest().execute,
                 embeddingsRequest: @escaping EmbeddingsRequest.Init = EmbeddingsRequest().execute,
@@ -57,6 +63,7 @@ public class SwiftOpenAI: OpenAIProtocol {
         self.listModelsRequest = listModelsRequest
         self.completionsRequest = completionsRequest
         self.createChatCompletionsRequest = createChatCompletionsRequest
+        self.createChatCompletionsImageInputRequest = createChatCompletionsImageInputRequest
         self.createChatCompletionsStreamRequest = createChatCompletionsStreamRequest
         self.createImagesRequest = createImagesRequest
         self.embeddingsRequest = embeddingsRequest
@@ -159,6 +166,44 @@ public class SwiftOpenAI: OpenAIProtocol {
                                       optionalParameters: ChatCompletionsOptionalParameters? = nil) async throws -> ChatCompletionsDataModel? {
         try await createChatCompletionsRequest(api, apiKey, model, messages, optionalParameters)
     }
+
+    /**
+      Generates completions for a chat-based conversation using the OpenAI API with a specified model and the ability to include image inputs, returning the entire response as a single object.
+
+      This method extends the functionality of the OpenAI API to generate completions for a chat-based conversation that can include both text and image inputs. The conversation is represented by an array of `MessageChatImageInput` objects. Each `MessageChatImageInput` object can contain either text or an image, allowing for a more dynamic interaction. You can customize the completion behavior by providing an optional `ChatCompletionsOptionalParameters` object.
+
+      The method makes use of the new Swift concurrency model and supports async/await calls.
+
+      - Parameters:
+        - model: An `OpenAIModelType` value representing the desired OpenAI model to use for generating chat completions.
+        - messages: An array of `MessageChatImageInput` objects representing the chat-based conversation. These objects can contain either text or image inputs.
+        - optionalParameters: An optional `ChatCompletionsOptionalParameters` object containing additional parameters for customizing the chat completion behavior, such as `maxTokens`, `temperature`, and `stopPhrases`. If `nil`, the API's default settings will be used.
+
+      - Throws: An error if the API call fails, or if there is a problem with parsing the received JSON data.
+
+      - Returns: An optional `ChatCompletionsDataModel` object containing the chat completions generated by the specified model. Returns `nil` if there was an issue fetching the data or parsing the JSON response.
+
+      Example usage:
+     
+             do {
+                 let myMessage = MessageChatImageInput(text: message,
+                                                       imageURL: imageVisionURL,
+                                                       role: .user)
+                                         
+                 let optionalParameters: ChatCompletionsOptionalParameters = .init(temperature: 0.5, stop: ["stopstring"], stream: false, maxTokens: 1200)
+                 let result = try await openAI.createChatCompletionsWithImageInput(model: .gpt4(.gpt_4_vision_preview), messages: [myMessage], optionalParameters: optionalParameters)
+                 print("Result \(result)")
+             } catch {
+                 print("Error: \(error)")
+             }
+     
+    */
+    public func createChatCompletionsWithImageInput(model: OpenAIModelType,
+                                                    messages: [MessageChatImageInput],
+                                                    optionalParameters: ChatCompletionsOptionalParameters? = nil) async throws -> ChatCompletionsDataModel? {
+        try await createChatCompletionsImageInputRequest(api, apiKey, model, messages, optionalParameters)
+    }
+
 
     /**
       Generates completions for a chat-based conversation using the OpenAI API with a specified model and optional parameters, returning an asynchronous throwing stream of responses.