-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Text To Speech] Add Text To Speech implementation (#28)
* Add Text To Speech implementation
- Loading branch information
Showing
9 changed files
with
235 additions
and
3 deletions.
There are no files selected for viewing
8 changes: 8 additions & 0 deletions
8
Sources/SwiftOpenAI/OpenAI/DataModels/Audio/OpenAIAudioResponseType.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
import Foundation | ||
|
||
public enum OpenAIAudioResponseType: String { | ||
case mp3 | ||
case opus | ||
case aac | ||
case flac | ||
} |
17 changes: 17 additions & 0 deletions
17
Sources/SwiftOpenAI/OpenAI/DataModels/Audio/OpenAITTSModelType.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
import Foundation | ||
|
||
public enum OpenAITTSModelType { | ||
case tts(TTS) | ||
|
||
var name: String { | ||
switch self { | ||
case .tts(let model): | ||
return model.rawValue | ||
} | ||
} | ||
} | ||
|
||
public enum TTS: String { | ||
case tts1 = "tts-1" | ||
case tts1HD = "tts-1-hd" | ||
} |
10 changes: 10 additions & 0 deletions
10
Sources/SwiftOpenAI/OpenAI/DataModels/Audio/OpenAIVoiceType.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
import Foundation | ||
|
||
public enum OpenAIVoiceType: String { | ||
case alloy | ||
case echo | ||
case fable | ||
case onyx | ||
case nova | ||
case shimmer | ||
} |
35 changes: 35 additions & 0 deletions
35
Sources/SwiftOpenAI/OpenAI/OpenAIEndpoints/List/CreateSpeechEndpoint.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import Foundation | ||
|
||
struct CreateSpeechEndpoint: Endpoint { | ||
private let model: OpenAITTSModelType | ||
private let input: String | ||
private let voice: OpenAIVoiceType | ||
private let responseFormat: OpenAIAudioResponseType | ||
private let speed: Double | ||
|
||
var method: HTTPMethod { | ||
.POST | ||
} | ||
|
||
var path: String = "audio/speech" | ||
|
||
init(model: OpenAITTSModelType, | ||
input: String, | ||
voice: OpenAIVoiceType, | ||
responseFormat: OpenAIAudioResponseType, | ||
speed: Double) { | ||
self.model = model | ||
self.input = input | ||
self.voice = voice | ||
self.responseFormat = responseFormat | ||
self.speed = speed | ||
} | ||
|
||
var parameters: [String: Any]? { | ||
["model": self.model.name as Any, | ||
"input": self.input as Any, | ||
"voice": self.voice.rawValue as Any, | ||
"response_format": self.responseFormat.rawValue as Any, | ||
"speed": self.speed as Any] | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
51 changes: 51 additions & 0 deletions
51
Sources/SwiftOpenAI/OpenAI/Requests/Audio/CreateSpeechRequest.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import Foundation | ||
|
||
protocol CreateSpeechRequestProtocol { | ||
func execute(api: API, | ||
apiKey: String, | ||
model: OpenAITTSModelType, | ||
input: String, | ||
voice: OpenAIVoiceType, | ||
responseFormat: OpenAIAudioResponseType, | ||
speed: Double) async throws -> Data? | ||
} | ||
|
||
final public class CreateSpeechRequest: CreateSpeechRequestProtocol { | ||
public typealias Init = (_ api: API, | ||
_ apiKey: String, | ||
_ model: OpenAITTSModelType, | ||
_ input: String, | ||
_ voice: OpenAIVoiceType, | ||
_ responseFormat: OpenAIAudioResponseType, | ||
_ speed: Double) async throws -> Data? | ||
|
||
public init() { } | ||
|
||
public func execute(api: API, | ||
apiKey: String, | ||
model: OpenAITTSModelType, | ||
input: String, | ||
voice: OpenAIVoiceType, | ||
responseFormat: OpenAIAudioResponseType, | ||
speed: Double) async throws -> Data? { | ||
var endpoint = OpenAIEndpoints.createSpeech(model: model, input: input, voice: voice, responseFormat: responseFormat, speed: speed).endpoint | ||
api.routeEndpoint(&endpoint, environment: OpenAIEnvironmentV1()) | ||
|
||
var urlRequest = api.buildURLRequest(endpoint: endpoint) | ||
api.addHeaders(urlRequest: &urlRequest, | ||
headers: ["Content-Type": "application/json", | ||
"Authorization": "Bearer \(apiKey)"]) | ||
|
||
let result = await api.execute(with: urlRequest) | ||
|
||
let jsonDecoder = JSONDecoder() | ||
jsonDecoder.keyDecodingStrategy = .convertFromSnakeCase | ||
|
||
switch result { | ||
case .success(let data): | ||
return data | ||
case .failure(let error): | ||
throw error | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
36 changes: 36 additions & 0 deletions
36
Tests/SwiftOpenAITests/OpenAITests/Unit Tests/CreateSpeech/CreateSpeechEndpointSpec.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import XCTest | ||
@testable import SwiftOpenAI | ||
|
||
final class CreateSpeechEndpointSpec: XCTestCase { | ||
func testEndpointCreateSpeech_WithModelTTS() throws { | ||
let model: OpenAITTSModelType = .tts(.tts1) | ||
let input = "Please create an audio with this input" | ||
let voice: OpenAIVoiceType = .alloy | ||
let responseFormat: OpenAIAudioResponseType = .mp3 | ||
let speed = 1.0 | ||
|
||
let sut = OpenAIEndpoints.createSpeech( | ||
model: model, | ||
input: input, | ||
voice: voice, | ||
responseFormat: responseFormat, | ||
speed: speed | ||
).endpoint | ||
|
||
let modelParameter = sut.parameters!["model"] as! String | ||
let inputParameter = sut.parameters!["input"] as! String | ||
let voiceParameter = sut.parameters!["voice"] as! String | ||
let responseFormatParameter = sut.parameters!["response_format"] as! String | ||
let speedFormatParameter = sut.parameters!["speed"] as! Double | ||
|
||
XCTAssertEqual(sut.path, "audio/speech") | ||
XCTAssertEqual(sut.method, .POST) | ||
XCTAssertEqual(sut.parameters?.count, 5) | ||
XCTAssertEqual(modelParameter, model.name) | ||
XCTAssertEqual(inputParameter, input) | ||
XCTAssertEqual(voiceParameter, voice.rawValue) | ||
XCTAssertEqual(responseFormatParameter, responseFormat.rawValue) | ||
XCTAssertEqual(speedFormatParameter, speed) | ||
} | ||
} | ||
|
34 changes: 34 additions & 0 deletions
34
Tests/SwiftOpenAITests/OpenAITests/Unit Tests/CreateSpeech/CreateSpeechRequestSpec.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
import XCTest | ||
@testable import SwiftOpenAI | ||
|
||
final class CreateSpeechRequestSpec: XCTestCase { | ||
private let api = API() | ||
|
||
func testRequest_CreatedWithCorrectHeaders() throws { | ||
let apiKey = "1234567890" | ||
let model: OpenAITTSModelType = .tts(.tts1) | ||
let input = "Please create an audio with this input" | ||
let voice: OpenAIVoiceType = .alloy | ||
let responseFormat: OpenAIAudioResponseType = .mp3 | ||
let speed = 1.0 | ||
|
||
var endpoint = OpenAIEndpoints.createSpeech( | ||
model: model, | ||
input: input, | ||
voice: voice, | ||
responseFormat: responseFormat, | ||
speed: speed | ||
).endpoint | ||
|
||
api.routeEndpoint(&endpoint, environment: OpenAIEnvironmentV1()) | ||
|
||
var sut = api.buildURLRequest(endpoint: endpoint) | ||
api.addHeaders(urlRequest: &sut, | ||
headers: ["Content-Type" : "application/json", | ||
"Authorization" : "Bearer \(apiKey)"]) | ||
|
||
XCTAssertEqual(sut.allHTTPHeaderFields?.count, 2) | ||
XCTAssertEqual(sut.allHTTPHeaderFields?["Content-Type"], "application/json") | ||
XCTAssertEqual(sut.allHTTPHeaderFields?["Authorization"], "Bearer 1234567890") | ||
} | ||
} |