-
Notifications
You must be signed in to change notification settings - Fork 176
/
Example01_AudioFromFileWithToolsAsync.cs
215 lines (191 loc) · 9.18 KB
/
Example01_AudioFromFileWithToolsAsync.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
using NUnit.Framework;
using OpenAI.Images;
using OpenAI.RealtimeConversation;
using System;
using System.ClientModel;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
namespace OpenAI.Examples;
#pragma warning disable OPENAI002
public partial class RealtimeExamples
{
[Test]
public async Task Example01_AudioFromFileWithToolsAsync()
{
RealtimeConversationClient client = new(
model: "gpt-4o-realtime-preview",
credential: new ApiKeyCredential(Environment.GetEnvironmentVariable("OPENAI_API_KEY")));
using RealtimeConversationSession session = await client.StartConversationSessionAsync();
// Session options control connection-wide behavior shared across all conversations,
// including audio input format and voice activity detection settings.
ConversationSessionOptions sessionOptions = new()
{
Instructions = "You are a cheerful assistant that talks like a pirate. "
+ "Always inform the user when you are about to call a tool. "
+ "Prefer to call tools whenever applicable.",
Voice = ConversationVoice.Alloy,
Tools = { CreateSampleWeatherTool() },
InputAudioFormat = ConversationAudioFormat.Pcm16,
OutputAudioFormat = ConversationAudioFormat.Pcm16,
// Input transcription options must be provided to enable transcribed feedback for input audio
InputTranscriptionOptions = new()
{
Model = "whisper-1",
},
};
await session.ConfigureSessionAsync(sessionOptions);
// Conversation history or text input are provided by adding messages to the conversation.
// Adding a message will not automatically begin a response turn.
await session.AddItemAsync(
ConversationItem.CreateUserMessage(["I'm trying to decide what to wear on my trip."]));
string inputAudioPath = FindFile("Assets\\realtime_whats_the_weather_pcm16_24khz_mono.wav");
using Stream inputAudioStream = File.OpenRead(inputAudioPath);
_ = session.SendInputAudioAsync(inputAudioStream);
Dictionary<string, Stream> outputAudioStreamsById = [];
await foreach (ConversationUpdate update in session.ReceiveUpdatesAsync())
{
if (update is ConversationSessionStartedUpdate sessionStartedUpdate)
{
Console.WriteLine($"<<< Session started. ID: {sessionStartedUpdate.SessionId}");
Console.WriteLine();
}
if (update is ConversationInputSpeechStartedUpdate speechStartedUpdate)
{
Console.WriteLine(
$" -- Voice activity detection started at {speechStartedUpdate.AudioStartTime}");
}
if (update is ConversationInputSpeechFinishedUpdate speechFinishedUpdate)
{
Console.WriteLine(
$" -- Voice activity detection ended at {speechFinishedUpdate.AudioEndTime}");
}
// Item started updates notify that the model generation process will insert a new item into
// the conversation and begin streaming its content via content updates.
if (update is ConversationItemStreamingStartedUpdate itemStreamingStartedUpdate)
{
Console.WriteLine($" -- Begin streaming of new item");
if (!string.IsNullOrEmpty(itemStreamingStartedUpdate.FunctionName))
{
Console.Write($" {itemStreamingStartedUpdate.FunctionName}: ");
}
}
if (update is ConversationItemStreamingPartDeltaUpdate deltaUpdate)
{
// With audio output enabled, the audio transcript of the delta update contains an approximation of
// the words spoken by the model. Without audio output, the text of the delta update will contain
// the segments making up the text content of a message.
Console.Write(deltaUpdate.AudioTranscript);
Console.Write(deltaUpdate.Text);
Console.Write(deltaUpdate.FunctionArguments);
if (deltaUpdate.AudioBytes is not null)
{
if (!outputAudioStreamsById.TryGetValue(deltaUpdate.ItemId, out Stream value))
{
string filename = $"output_{sessionOptions.OutputAudioFormat}_{deltaUpdate.ItemId}.raw";
value = File.OpenWrite(filename);
outputAudioStreamsById[deltaUpdate.ItemId] = value;
}
value.Write(deltaUpdate.AudioBytes);
}
}
// Item finished updates arrive when all streamed data for an item has arrived and the
// accumulated results are available. In the case of function calls, this is the point
// where all arguments are expected to be present.
if (update is ConversationItemStreamingFinishedUpdate itemStreamingFinishedUpdate)
{
Console.WriteLine();
Console.WriteLine($" -- Item streaming finished, item_id={itemStreamingFinishedUpdate.ItemId}");
if (itemStreamingFinishedUpdate.FunctionCallId is not null)
{
Console.WriteLine($" + Responding to tool invoked by item: {itemStreamingFinishedUpdate.FunctionName}");
ConversationItem functionOutputItem = ConversationItem.CreateFunctionCallOutput(
callId: itemStreamingFinishedUpdate.FunctionCallId,
output: "70 degrees Fahrenheit and sunny");
await session.AddItemAsync(functionOutputItem);
}
else if (itemStreamingFinishedUpdate.MessageContentParts?.Count > 0)
{
Console.Write($" + [{itemStreamingFinishedUpdate.MessageRole}]: ");
foreach (ConversationContentPart contentPart in itemStreamingFinishedUpdate.MessageContentParts)
{
Console.Write(contentPart.AudioTranscript);
}
Console.WriteLine();
}
}
if (update is ConversationInputTranscriptionFinishedUpdate transcriptionCompletedUpdate)
{
Console.WriteLine();
Console.WriteLine($" -- User audio transcript: {transcriptionCompletedUpdate.Transcript}");
Console.WriteLine();
}
if (update is ConversationResponseFinishedUpdate turnFinishedUpdate)
{
Console.WriteLine($" -- Model turn generation finished. Status: {turnFinishedUpdate.Status}");
// Here, if we processed tool calls in the course of the model turn, we finish the
// client turn to resume model generation. The next model turn will reflect the tool
// responses that were already provided.
if (turnFinishedUpdate.CreatedItems.Any(item => item.FunctionName?.Length > 0))
{
Console.WriteLine($" -- Ending client turn for pending tool responses");
await session.StartResponseAsync();
}
else
{
break;
}
}
if (update is ConversationErrorUpdate errorUpdate)
{
Console.WriteLine();
Console.WriteLine($"ERROR: {errorUpdate.Message}");
break;
}
}
foreach ((string itemId, Stream outputAudioStream) in outputAudioStreamsById)
{
Console.WriteLine($"Raw audio output for {itemId}: {outputAudioStream.Length} bytes");
outputAudioStream.Dispose();
}
}
private static ConversationFunctionTool CreateSampleWeatherTool()
{
return new ConversationFunctionTool()
{
Name = "get_weather_for_location",
Description = "gets the weather for a location",
Parameters = BinaryData.FromString("""
{
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
},
"unit": {
"type": "string",
"enum": ["c","f"]
}
},
"required": ["location","unit"]
}
""")
};
}
private static string FindFile(string fileName)
{
for (string currentDirectory = Directory.GetCurrentDirectory();
currentDirectory != null && currentDirectory != Path.GetPathRoot(currentDirectory);
currentDirectory = Directory.GetParent(currentDirectory)?.FullName!)
{
string filePath = Path.Combine(currentDirectory, fileName);
if (File.Exists(filePath))
{
return filePath;
}
}
throw new FileNotFoundException($"File '{fileName}' not found.");
}
}