-
Notifications
You must be signed in to change notification settings - Fork 43
/
vtt_to_srt.py
183 lines (110 loc) · 4.17 KB
/
vtt_to_srt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#!/usr/bin/python
#---------------------------------------
# vtt_to_srt.py
# (c) Jansen A. Simanullang
#---------------------------------------
# Usage:
#
# python vtt_to_srt.py pathname [-r]
#
# pathname - a file or directory with files to be converted'
#
# -r - walk path recursively
#
# example:
# python vtt_to_srt.py
#
# features:
# convert file individually
# check a directory and all its subdirectories
# convert all vtt files to srt subtitle format
#
# real world cases:
# convert vtt web subtitles
import os, re, sys, io
from stat import *
def convertContent(fileContents):
replacement = re.sub(r'(\d\d:\d\d:\d\d).(\d\d\d) --> (\d\d:\d\d:\d\d).(\d\d\d)(?:[ \-\w]+:[\w\%\d:]+)*\n', r'\1,\2 --> \3,\4\n', fileContents)
replacement = re.sub(r'(\d\d:\d\d).(\d\d\d) --> (\d\d:\d\d).(\d\d\d)(?:[ \-\w]+:[\w\%\d:]+)*\n', r'\1,\2 --> \3,\4\n', replacement)
replacement = re.sub(r'(\d\d).(\d\d\d) --> (\d\d).(\d\d\d)(?:[ \-\w]+:[\w\%\d:]+)*\n', r'\1,\2 --> \3,\4\n', replacement)
replacement = re.sub(r'WEBVTT\n', '', replacement)
replacement = re.sub(r'Kind:[ \-\w]+\n', '', replacement)
replacement = re.sub(r'Language:[ \-\w]+\n', '', replacement)
#replacement = re.sub(r'^\d+\n', '', replacement)
#replacement = re.sub(r'\n\d+\n', '\n', replacement)
replacement = re.sub(r'<c[.\w\d]*>', '', replacement)
replacement = re.sub(r'</c>', '', replacement)
replacement = re.sub(r'<\d\d:\d\d:\d\d.\d\d\d>', '', replacement)
replacement = re.sub(r'::[\-\w]+\([\-.\w\d]+\)[ ]*{[.,:;\(\) \-\w\d]+\n }\n', '', replacement)
replacement = re.sub(r'Style:\n##\n', '', replacement)
return replacement
def fileCreate(strNamaFile, strData):
#--------------------------------
# fileCreate(strNamaFile, strData)
# create a text file
#
try:
f = open(strNamaFile, "w")
f.writelines(str(strData))
f.close()
except IOError:
strNamaFile = strNamaFile.split(os.sep)[-1]
f = open(strNamaFile, "w")
f.writelines(str(strData))
f.close()
print "file created: " + strNamaFile + "\n"
def readTextFile(strNamaFile):
f = open(strNamaFile, mode='r')
print "file being read: " + strNamaFile + "\n"
return f.read().decode("utf8").encode('ascii', 'ignore')
def vtt_to_srt(strNamaFile):
fileContents = readTextFile(strNamaFile)
strData = ""
strData = strData + convertContent(fileContents)
strNamaFile = strNamaFile.replace(".vtt",".srt")
print strNamaFile
fileCreate(strNamaFile, strData)
def walktree(TopMostPath, callback):
'''recursively descend the directory tree rooted at TopMostPath,
calling the callback function for each regular file'''
for f in os.listdir(TopMostPath):
pathname = os.path.join(TopMostPath, f)
mode = os.stat(pathname)[ST_MODE]
if S_ISDIR(mode):
# It's a directory, recurse into it
walktree(pathname, callback)
elif S_ISREG(mode):
# It's a file, call the callback function
callback(pathname, rec)
else:
# Unknown file type, print a message
print 'Skipping %s' % pathname
def walkdir(TopMostPath, callback):
for f in os.listdir(TopMostPath):
pathname = os.path.join(TopMostPath, f)
if not os.path.isdir(pathname):
# It's a file, call the callback function
callback(pathname)
def convertVTTtoSRT(f):
if '.vtt' in f:
vtt_to_srt(f)
def vtts_to_srt(directory, rec = False):
TopMostPath = directory
if rec:
walktree(TopMostPath, convertVTTtoSRT)
else:
walkdir(TopMostPath, convertVTTtoSRT)
def print_usage():
print '\nUsage:\tpython vtt_to_srt.py pathname [-r]\n'
print '\tpathname\t- a file or directory with files to be converted'
print '\t-r\t\t- walk path recursively\n'
if __name__ == '__main__':
if len(sys.argv) < 2 or sys.argv[1] == '--help' or not os.path.exists(sys.argv[1]):
print_usage()
exit()
path = sys.argv[1]
rec = True if len(sys.argv) > 2 and sys.argv[2] == '-r' else False
if os.path.isdir(path):
vtts_to_srt(path, rec)
else:
vtt_to_srt(path)