-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathprocess_sth_v2_dataset.py
48 lines (44 loc) · 1.94 KB
/
process_sth_v2_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# This code hase been acquired from TRN-pytorch repository
# 'https://github.com/metalbubble/TRN-pytorch/blob/master/process_dataset.py'
# which is prepared by Bolei Zhou
#
# Processing the raw dataset of Something Something V2
#
# generate the meta files:
# category.txt: the list of categories.
# train_videofolder.txt: each row contains [videoname num_frames classIDX]
# val_videofolder.txt: same as above
#
# Created by Bolei Zhou, Dec.2 2017
import os
import pdb
import json
ROOT_DATASET = '/usr/home/sut/datasets/something-something-v2/'
ROOT_DATASET_FRAMES = '/usr/home/sut/datasets/something-something-v2/rgb'
dataset_name = 'something-something-v2'
with open(f'{ROOT_DATASET}{dataset_name}-labels.json') as labels_json:
dict_categories = json.load(labels_json)
with open(os.path.join(ROOT_DATASET,'category.txt'), 'w') as f:
f.write('\n'.join(dict_categories.keys()))
files_input = ['%s%s-validation.json' % (ROOT_DATASET, dataset_name), '%s%s-train.json' % (ROOT_DATASET,dataset_name)]
files_output = ['val_videofolder.txt', 'train_videofolder.txt']
for (filename_input, filename_output) in zip(files_input, files_output):
with open(filename_input) as f:
lines = json.load(f)
folders = []
idx_categories = []
for line in lines:
folders.append(line['id'])
label = str(line['template']).replace('[','')
label = label.replace(']','')
idx_categories.append(os.path.join(str(dict_categories[label])))
output = []
for i in range(len(folders)):
curFolder = folders[i]
curIDX = idx_categories[i]
# counting the number of frames in each video folders
dir_files = os.listdir(os.path.join(ROOT_DATASET_FRAMES, curFolder))
output.append('%s %d %d' % (curFolder, len(dir_files), int(curIDX)))
print('%d/%d' % (i, len(folders)))
with open(os.path.join(ROOT_DATASET,filename_output), 'w') as f:
f.write('\n'.join(output))