-
Notifications
You must be signed in to change notification settings - Fork 84
/
preprocess_dataset.py
105 lines (96 loc) · 3.93 KB
/
preprocess_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from scipy.io import loadmat
from PIL import Image
import numpy as np
import os
from glob import glob
import cv2
import argparse
def cal_new_size(im_h, im_w, min_size, max_size):
if im_h < im_w:
if im_h < min_size:
ratio = 1.0 * min_size / im_h
im_h = min_size
im_w = round(im_w*ratio)
elif im_h > max_size:
ratio = 1.0 * max_size / im_h
im_h = max_size
im_w = round(im_w*ratio)
else:
ratio = 1.0
else:
if im_w < min_size:
ratio = 1.0 * min_size / im_w
im_w = min_size
im_h = round(im_h*ratio)
elif im_w > max_size:
ratio = 1.0 * max_size / im_w
im_w = max_size
im_h = round(im_h*ratio)
else:
ratio = 1.0
return im_h, im_w, ratio
def find_dis(point):
square = np.sum(point*points, axis=1)
dis = np.sqrt(np.maximum(square[:, None] - 2*np.matmul(point, point.T) + square[None, :], 0.0))
dis = np.mean(np.partition(dis, 3, axis=1)[:, 1:4], axis=1, keepdims=True)
return dis
def generate_data(im_path):
im = Image.open(im_path)
im_w, im_h = im.size
mat_path = im_path.replace('.jpg', '_ann.mat')
points = loadmat(mat_path)['annPoints'].astype(np.float32)
idx_mask = (points[:, 0] >= 0) * (points[:, 0] <= im_w) * (points[:, 1] >= 0) * (points[:, 1] <= im_h)
points = points[idx_mask]
im_h, im_w, rr = cal_new_size(im_h, im_w, min_size, max_size)
im = np.array(im)
if rr != 1.0:
im = cv2.resize(np.array(im), (im_w, im_h), cv2.INTER_CUBIC)
points = points * rr
return Image.fromarray(im), points
def parse_args():
parser = argparse.ArgumentParser(description='Test ')
parser.add_argument('--origin-dir', default='/home/teddy/UCF-QNRF_ECCV18',
help='original data directory')
parser.add_argument('--data-dir', default='/home/teddy/UCF-Train-Val-Test',
help='processed data directory')
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_args()
save_dir = args.data_dir
min_size = 512
max_size = 2048
for phase in ['Train', 'Test']:
sub_dir = os.path.join(args.origin_dir, phase)
if phase == 'Train':
sub_phase_list = ['train', 'val']
for sub_phase in sub_phase_list:
sub_save_dir = os.path.join(save_dir, sub_phase)
if not os.path.exists(sub_save_dir):
os.makedirs(sub_save_dir)
with open('{}.txt'.format(sub_phase)) as f:
for i in f:
im_path = os.path.join(sub_dir, i.strip())
name = os.path.basename(im_path)
print(name)
im, points = generate_data(im_path)
if sub_phase == 'train':
dis = find_dis(points)
points = np.concatenate((points, dis), axis=1)
im_save_path = os.path.join(sub_save_dir, name)
im.save(im_save_path)
gd_save_path = im_save_path.replace('jpg', 'npy')
np.save(gd_save_path, points)
else:
sub_save_dir = os.path.join(save_dir, 'test')
if not os.path.exists(sub_save_dir):
os.makedirs(sub_save_dir)
im_list = glob(os.path.join(sub_dir, '*jpg'))
for im_path in im_list:
name = os.path.basename(im_path)
print(name)
im, points = generate_data(im_path)
im_save_path = os.path.join(sub_save_dir, name)
im.save(im_save_path)
gd_save_path = im_save_path.replace('jpg', 'npy')
np.save(gd_save_path, points)