forked from openai/openai-node
-
Notifications
You must be signed in to change notification settings - Fork 0
/
transcriptions.ts
84 lines (73 loc) · 2.83 KB
/
transcriptions.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
import { APIResource } from '../../resource';
import * as Core from '../../core';
import * as TranscriptionsAPI from './transcriptions';
import * as AudioAPI from './audio';
export class Transcriptions extends APIResource {
/**
* Transcribes audio into the input language.
*/
create(body: TranscriptionCreateParams, options?: Core.RequestOptions): Core.APIPromise<Transcription> {
return this._client.post('/audio/transcriptions', Core.multipartFormRequestOptions({ body, ...options }));
}
}
/**
* Represents a transcription response returned by model, based on the provided
* input.
*/
export interface Transcription {
/**
* The transcribed text.
*/
text: string;
}
export interface TranscriptionCreateParams {
/**
* The audio file object (not file name) to transcribe, in one of these formats:
* flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
*/
file: Core.Uploadable;
/**
* ID of the model to use. Only `whisper-1` (which is powered by our open source
* Whisper V2 model) is currently available.
*/
model: (string & {}) | AudioAPI.AudioModel;
/**
* The language of the input audio. Supplying the input language in
* [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
* improve accuracy and latency.
*/
language?: string;
/**
* An optional text to guide the model's style or continue a previous audio
* segment. The
* [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
* should match the audio language.
*/
prompt?: string;
/**
* The format of the transcript output, in one of these options: `json`, `text`,
* `srt`, `verbose_json`, or `vtt`.
*/
response_format?: 'json' | 'text' | 'srt' | 'verbose_json' | 'vtt';
/**
* The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
* output more random, while lower values like 0.2 will make it more focused and
* deterministic. If set to 0, the model will use
* [log probability](https://en.wikipedia.org/wiki/Log_probability) to
* automatically increase the temperature until certain thresholds are hit.
*/
temperature?: number;
/**
* The timestamp granularities to populate for this transcription.
* `response_format` must be set `verbose_json` to use timestamp granularities.
* Either or both of these options are supported: `word`, or `segment`. Note: There
* is no additional latency for segment timestamps, but generating word timestamps
* incurs additional latency.
*/
timestamp_granularities?: Array<'word' | 'segment'>;
}
export namespace Transcriptions {
export import Transcription = TranscriptionsAPI.Transcription;
export import TranscriptionCreateParams = TranscriptionsAPI.TranscriptionCreateParams;
}