-
Notifications
You must be signed in to change notification settings - Fork 2
/
transcribe.js
161 lines (141 loc) · 4.09 KB
/
transcribe.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
const stream = require('stream')
const Duplex = stream.Duplex
const recorder = require('node-record-lpcm16')
const fs = require('fs')
class DuplexStream extends Duplex {
constructor(options) {
super(options)
this.waiting = false
}
_write(chunk, encoding, callback) {
this.waiting = false
this.push(chunk)
callback()
}
_read(size) {
if(!this.waiting) {
this.waiting = true
}
}
}
class MicrophoneStream {
constructor(encoding, sampleRate) {
this.encoding = encoding
this.sampleRate = sampleRate
}
start(){
this.setupBuffer()
this.rec = recorder.start({
sampleRate: this.sampleRate,
verbose: false,
recordProgram: 'rec',
silence: '10.0',
threshold: 0
})
.on('error', console.error)
.pipe(this.buffer)
}
stop(){
this.rec.unpipe(this.buffer)
recorder.stop()
}
audio(){
return this.buffer
}
setupBuffer() {
var bytesPerSample = 2 * NUM_CHANNELS
var bytesPerSecond = this.sampleRate * bytesPerSample
this.buffer = new DuplexStream()
}
}
const speech = require('@google-cloud/speech')
const DEFAULT_ENCODING = 'LINEAR16'
const DEFAULT_SAMPLE_RATE_IN_HERTZ = 16000
const DEFAULT_LANGUAGE_CODE = 'hi-IN'
const MAX_API_TIME_LIMIT_IN_MS = 60000
const NUM_CHANNELS = 1
const NUM_CHUNKS_PER_INTERVAL = 10
const SLIDING_WINDOW_BUFFER_SECS = 10
const OUT_OF_RANGE_ERROR_CODE = 11
class SpeechToText {
constructor(microphoneStream, languageCode) {
this.client = new speech.SpeechClient()
this.mic = microphoneStream
this.encoding = microphoneStream.encoding
this.sampleRate = microphoneStream.sampleRate
this.isTranscribing = false
this.config = {
config: {
encoding: this.encoding,
sampleRateHertz: this.sampleRate,
languageCode: languageCode
},
interimResults: true
}
this.initializeStream()
}
handle_reconnect_error(error) {
if(error.code == OUT_OF_RANGE_ERROR_CODE) {
this.initializeStream()
if(this.isTranscribing) {
this.transcribe()
}
}
else {
console.error
}
}
initializeStream() {
this.stream = this.client
.streamingRecognize(this.config)
.on('error', (error) => this.handle_reconnect_error(error))
.on('data', (data) => this.print_transcribed_text(data))
}
transcribe(){
if(this.mic && this.mic.audio()) {
this.isTranscribing = true
this.mic.audio().pipe(this.stream)
}
}
print_transcribed_text(data) {
if(data.results[0] && data.results[0].alternatives[0]) {
if(data.results[0].isFinal) {
process.stdout.write(data.results[0].alternatives[0].transcript + "\n")
}
else {
process.stdout.write(data.results[0].alternatives[0].transcript + "\r")
}
}
}
}
const argv = require('yargs')
.alias('h', 'help')
.help('help')
.usage('Usage: $0 [-e encoding] [-r sample_rate_in_hz] [-l language_code_BCP-47]')
.showHelpOnFail(false, "Specify --help for help with command usage")
.options({
e: {
alias: 'encoding',
describe: 'encoding type',
type: 'string',
default: DEFAULT_ENCODING
},
r: {
alias: 'samplerate',
describe: 'sample rate in Hertz',
type: 'number',
default: DEFAULT_SAMPLE_RATE_IN_HERTZ
},
l: {
alias: 'languagecode',
describe: 'BCP-47 language code',
type: 'string',
default: DEFAULT_LANGUAGE_CODE
}
}).argv
process.stdout.write(`Config:\n encoding: ${argv.e}\n sample rate: ${argv.r}\n language code: ${argv.l}\n`)
process.stdout.write(`Transcript:\n`)
var microphoneStream = new MicrophoneStream(argv.e, argv.r)
microphoneStream.start()
var speechToTextClient = new SpeechToText(microphoneStream, argv.l)
speechToTextClient.transcribe()