Skip to content

Commit

Permalink
Merge pull request #560 from rjeike/bugfix/transcript-timestamps
Browse files Browse the repository at this point in the history
Implemented support for Timestamp Granularity on Audio Transcripts
  • Loading branch information
kayhantolga authored May 18, 2024
2 parents 05d8db0 + 8188125 commit 934fc3b
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 1 deletion.
7 changes: 7 additions & 0 deletions OpenAI.Playground/TestHelpers/AudioTestHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,19 @@ public static async Task RunSimpleAudioCreateTranscriptionTest(IOpenAIService sd
FileName = fileName,
File = sampleFile,
Model = Models.WhisperV1,
TimestampGranularities =
[
StaticValues.AudioStatics.TimestampGranularity.Word,
StaticValues.AudioStatics.TimestampGranularity.Segment
],
ResponseFormat = StaticValues.AudioStatics.ResponseFormat.VerboseJson
});


if (audioResult.Successful)
{
Console.WriteLine($"Segments: {audioResult.Segments.Count}");
Console.WriteLine($"Words: {audioResult.Words.Count}");
Console.WriteLine(string.Join("\n", audioResult.Text));
}
else
Expand Down
12 changes: 11 additions & 1 deletion OpenAI.SDK/Managers/OpenAIAudioService.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
using OpenAI.Extensions;
using System.Text;
using System.Text.Json;
using OpenAI.Extensions;
using OpenAI.Interfaces;
using OpenAI.ObjectModels;
using OpenAI.ObjectModels.RequestModels;
Expand Down Expand Up @@ -54,6 +56,14 @@ private async Task<AudioCreateTranscriptionResponse> Create(AudioCreateTranscrip
}

multipartContent.Add(new StringContent(audioCreateTranscriptionRequest.Model), "model");

if (audioCreateTranscriptionRequest.TimestampGranularities != null)
{
foreach (var granularity in audioCreateTranscriptionRequest.TimestampGranularities)
{
multipartContent.Add(new StringContent(granularity), "timestamp_granularities[]");
}
}

if (audioCreateTranscriptionRequest.Language != null)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@ public record AudioCreateTranscriptionRequest : IOpenAiModels.IModel, IOpenAiMod
/// </summary>
public string? Language { get; set; }

/// <summary>
/// The timestamp granularities to populate for this transcription. response_format must be set verbose_json to use
/// timestamp granularities. Either or both of these options are supported:
/// <a cref="StaticValues.AudioStatics.TimestampGranularity.Word">word</a>, or
/// <a cref="StaticValues.AudioStatics.TimestampGranularity.Segment">segment</a>. Note: There is no
/// additional latency for segment timestamps, but generating word timestamps incurs additional latency.
/// </summary>
public List<string>? TimestampGranularities { get; set; }

/// <summary>
/// The audio file to transcribe, in one of these formats: mp3, mp4, mpeg, mpga, m4a, wav, or webm.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,18 @@ public record AudioCreateTranscriptionResponse : BaseResponse

[JsonPropertyName("duration")] public float Duration { get; set; }

[JsonPropertyName("words")] public List<WordSegment> Words { get; set; }

[JsonPropertyName("segments")] public List<Segment> Segments { get; set; }

public class WordSegment
{
[JsonPropertyName("word")] public string Word { get; set; }

[JsonPropertyName("start")] public float Start { get; set; }

[JsonPropertyName("end")] public float End { get; set; }
}

public class Segment
{
Expand Down
7 changes: 7 additions & 0 deletions OpenAI.SDK/ObjectModels/StaticValueHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,13 @@ public static class ResponseFormat
public static string VerboseJson => "verbose_json";
public static string Vtt => "vtt";
}

public static class TimestampGranularity
{
public static string Word => "word";
public static string Segment => "segment";
}

public static class CreateSpeechResponseFormat
{
public static string Mp3 => "mp3";
Expand Down

0 comments on commit 934fc3b

Please sign in to comment.