-
Notifications
You must be signed in to change notification settings - Fork 190
/
process_dataset.py
48 lines (46 loc) · 1.67 KB
/
process_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# processing the raw data of the video datasets (Something-something and jester)
# generate the meta files:
# category.txt: the list of categories.
# train_videofolder.txt: each row contains [videoname num_frames classIDX]
# val_videofolder.txt: same as above
#
# Bolei Zhou, Dec.2 2017
#
#
import os
import pdb
dataset_name = 'something-something-v1' # 'jester-v1'
with open('%s-labels.csv'% dataset_name) as f:
lines = f.readlines()
categories = []
for line in lines:
line = line.rstrip()
categories.append(line)
categories = sorted(categories)
with open('category.txt','w') as f:
f.write('\n'.join(categories))
dict_categories = {}
for i, category in enumerate(categories):
dict_categories[category] = i
files_input = ['%s-validation.csv'%dataset_name,'%s-train.csv'%dataset_name]
files_output = ['val_videofolder.txt','train_videofolder.txt']
for (filename_input, filename_output) in zip(files_input, files_output):
with open(filename_input) as f:
lines = f.readlines()
folders = []
idx_categories = []
for line in lines:
line = line.rstrip()
items = line.split(';')
folders.append(items[0])
idx_categories.append(os.path.join(dict_categories[items[1]]))
output = []
for i in range(len(folders)):
curFolder = folders[i]
curIDX = idx_categories[i]
# counting the number of frames in each video folders
dir_files = os.listdir(os.path.join('20bn-%s'%dataset_name, curFolder))
output.append('%s %d %d'%(curFolder, len(dir_files), curIDX))
print('%d/%d'%(i, len(folders)))
with open(filename_output,'w') as f:
f.write('\n'.join(output))