forked from gooofy/zamia-speech
-
Notifications
You must be signed in to change notification settings - Fork 2
/
import_mozcv1.py
executable file
·109 lines (84 loc) · 3.18 KB
/
import_mozcv1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2018 by Marc Puels
# Copyright 2016 by G.Bartsch
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# convert mozilla common speech to voxforge-style packages
#
import sys
import os
import codecs
import traceback
import logging
import csv
from optparse import OptionParser
from nltools import misc
PROC_TITLE = 'moz_cv1_to_vf'
DEFAULT_NUM_CPUS = 12
#
# init terminal
#
misc.init_app (PROC_TITLE)
#
# command line
#
parser = OptionParser("usage: %prog [options]")
parser.add_option ("-n", "--num-cpus", dest="num_cpus", type="int", default=DEFAULT_NUM_CPUS,
help="number of cpus to use in parallel, default: %d" % DEFAULT_NUM_CPUS)
parser.add_option ("-v", "--verbose", action="store_true", dest="verbose",
help="enable debug output")
(options, args) = parser.parse_args()
if options.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
#
# config
#
config = misc.load_config ('.speechrc')
speech_arc = config.get("speech", "speech_arc")
speech_corpora = config.get("speech", "speech_corpora")
#
# convert mp3 to wav, create one dir per utt
# (since we have no speaker information)
#
cnt = 0
with open('tmp/run_parallel.sh', 'w') as scriptf:
for csvfn in ['cv-valid-test.csv', 'cv-valid-train.csv', 'cv-valid-dev.csv']:
with codecs.open('%s/cv_corpus_v1/%s' % (speech_arc, csvfn), 'r', 'utf8') as csvfile:
r = csv.reader(csvfile, delimiter=',', quotechar='|')
first = True
for row in r:
if first:
first = False
continue
print ', '.join(row)
uttid = wavfn = row[0].replace('/', '_').replace('.mp3', '').replace('-', '_')
spk = uttid
misc.mkdirs('%s/cv_corpus_v1/%s-v1/etc' % (speech_corpora, spk))
misc.mkdirs('%s/cv_corpus_v1/%s-v1/wav' % (speech_corpora, spk))
with codecs.open ('%s/cv_corpus_v1/%s-v1/etc/prompts-original' % (speech_corpora, spk), 'a', 'utf8') as promptsf:
promptsf.write('%s %s\n' % (uttid, row[1]))
wavfn = '%s/cv_corpus_v1/%s-v1/wav/%s.wav' % (speech_corpora, spk, uttid)
cmd = 'ffmpeg -i %s/cv_corpus_v1/%s %s' % (speech_arc, row[0], wavfn)
print cnt, wavfn
scriptf.write('echo %6d %s &\n' % (cnt, wavfn))
scriptf.write('%s &\n' % cmd)
cnt += 1
if (cnt % options.num_cpus) == 0:
scriptf.write('wait\n')
cmd = "bash tmp/run_parallel.sh"
print cmd
# os.system(cmd)