-
Notifications
You must be signed in to change notification settings - Fork 46
/
video_pose_ed.py
291 lines (209 loc) · 11.8 KB
/
video_pose_ed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
import sys
import os
import math
import imageio
from moviepy.editor import *
import time
def read_video(video_name):
# Read video from file
video_name_input = 'testset/' + video_name
video = VideoFileClip(video_name_input)
return video
def video2frame(video_name):
video = read_video(video_name)
video_frame_number = int(video.duration * video.fps) ## duration: second / fps: frame per second
video_frame_ciphers = math.ceil(math.log(video_frame_number, 10)) ## ex. 720 -> 3
if not os.path.exists('testset/' + video_name):
os.makedirs('testset/' + video_name)
for i in range(0, video_frame_number):
video.save_frame('testset/' + video_name + '/frame_' + str(i).zfill(video_frame_ciphers) + '.jpg', i/video.fps)
def video2poseframe(video_name):
import numpy as np
sys.path.append(os.path.dirname(__file__) + "/../")
from scipy.misc import imread, imsave
from config import load_config
from dataset.factory import create as create_dataset
from nnet import predict
from util import visualize
from dataset.pose_dataset import data_to_input
from multiperson.detections import extract_detections
from multiperson.predict import SpatialModel, eval_graph, get_person_conf_multicut
from multiperson.visualize import PersonDraw, visualize_detections
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
import random
cfg = load_config("demo/pose_cfg_multi.yaml")
dataset = create_dataset(cfg)
sm = SpatialModel(cfg)
sm.load()
# Load and setup CNN part detector
sess, inputs, outputs = predict.setup_pose_prediction(cfg)
################
video = read_video(video_name)
video_frame_number = int(video.duration * video.fps) ## duration: second / fps: frame per second
video_frame_ciphers = math.ceil(math.log(video_frame_number, 10)) ## ex. 720 -> 3
if not os.path.exists('testset/' + video_name):
os.makedirs('testset/' + video_name)
for i in range(0, video_frame_number):
image = video.get_frame(i/video.fps)
######################
image_batch = data_to_input(image)
# Compute prediction with the CNN
outputs_np = sess.run(outputs, feed_dict={inputs: image_batch})
scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats)
detections = extract_detections(cfg, scmap, locref, pairwise_diff)
unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections)
person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array)
print('person_conf_multi: ')
print(type(person_conf_multi))
print(person_conf_multi)
# Add library to save image
image_img = Image.fromarray(image)
# Save image with points of pose
draw = ImageDraw.Draw(image_img)
people_num = 0
point_num = 17
print('person_conf_multi.size: ')
print(person_conf_multi.size)
people_num = person_conf_multi.size / (point_num * 2)
people_num = int(people_num)
print('people_num: ')
print(people_num)
point_i = 0 # index of points
point_r = 5 # radius of points
people_real_num = 0
for people_i in range(0, people_num):
point_color_r = random.randrange(0, 256)
point_color_g = random.randrange(0, 256)
point_color_b = random.randrange(0, 256)
point_color = (point_color_r, point_color_g, point_color_b, 255)
point_count = 0
for point_i in range(0, point_num):
if person_conf_multi[people_i][point_i][0] + person_conf_multi[people_i][point_i][1] != 0: # If coordinates of point is (0, 0) == meaningless data
point_count = point_count + 1
if point_count > 5: # If there are more than 5 point in person, we define he/she is REAL PERSON
people_real_num = people_real_num + 1
for point_i in range(0, point_num):
draw.ellipse((person_conf_multi[people_i][point_i][0] - point_r, person_conf_multi[people_i][point_i][1] - point_r, person_conf_multi[people_i][point_i][0] + point_r, person_conf_multi[people_i][point_i][1] + point_r), fill=point_color)
print('people_real_num: ')
print(people_real_num)
video_name_result = 'testset/' + video_name + '/frame_pose_' + str(i).zfill(video_frame_ciphers) + '.jpg'
image_img.save(video_name_result, "JPG")
def video2posevideo(video_name):
time_start = time.clock()
import numpy as np
sys.path.append(os.path.dirname(__file__) + "/../")
from scipy.misc import imread, imsave
from config import load_config
from dataset.factory import create as create_dataset
from nnet import predict
from util import visualize
from dataset.pose_dataset import data_to_input
from multiperson.detections import extract_detections
from multiperson.predict import SpatialModel, eval_graph, get_person_conf_multicut
from multiperson.visualize import PersonDraw, visualize_detections
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont
font = ImageFont.truetype("./font/NotoSans-Bold.ttf", 24)
import random
cfg = load_config("demo/pose_cfg_multi.yaml")
dataset = create_dataset(cfg)
sm = SpatialModel(cfg)
sm.load()
draw_multi = PersonDraw()
# Load and setup CNN part detector
sess, inputs, outputs = predict.setup_pose_prediction(cfg)
################
video = read_video(video_name)
video_frame_number = int(video.duration * video.fps) ## duration: second / fps: frame per second
video_frame_ciphers = math.ceil(math.log(video_frame_number, 10)) ## ex. 720 -> 3
pose_frame_list = []
point_r = 3 # radius of points
point_min = 10 # threshold of points - If there are more than point_min points in person, we define he/she is REAL PERSON
part_min = 3 # threshold of parts - If there are more than part_min parts in person, we define he/she is REAL PERSON / part means head, arm and leg
point_num = 17 # There are 17 points in 1 person
def ellipse_set(person_conf_multi, people_i, point_i):
return (person_conf_multi[people_i][point_i][0] - point_r, person_conf_multi[people_i][point_i][1] - point_r, person_conf_multi[people_i][point_i][0] + point_r, person_conf_multi[people_i][point_i][1] + point_r)
def line_set(person_conf_multi, people_i, point_i, point_j):
return (person_conf_multi[people_i][point_i][0], person_conf_multi[people_i][point_i][1], person_conf_multi[people_i][point_j][0], person_conf_multi[people_i][point_j][1])
def draw_ellipse_and_line(draw, person_conf_multi, people_i, a, b, c, point_color):
draw.ellipse(ellipse_set(person_conf_multi, people_i, a), fill=point_color)
draw.ellipse(ellipse_set(person_conf_multi, people_i, b), fill=point_color)
draw.ellipse(ellipse_set(person_conf_multi, people_i, c), fill=point_color)
draw.line(line_set(person_conf_multi, people_i, a, b), fill=point_color, width=5)
draw.line(line_set(person_conf_multi, people_i, b, c), fill=point_color, width=5)
for i in range(0, video_frame_number):
image = video.get_frame(i/video.fps)
######################
image_batch = data_to_input(image)
# Compute prediction with the CNN
outputs_np = sess.run(outputs, feed_dict={inputs: image_batch})
scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats)
detections = extract_detections(cfg, scmap, locref, pairwise_diff)
unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections)
person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array)
# print('person_conf_multi: ')
# print(type(person_conf_multi))
# print(person_conf_multi)
# Add library to save image
image_img = Image.fromarray(image)
# Save image with points of pose
draw = ImageDraw.Draw(image_img)
people_num = 0
people_real_num = 0
people_part_num = 0
people_num = person_conf_multi.size / (point_num * 2)
people_num = int(people_num)
print('people_num: ' + str(people_num))
for people_i in range(0, people_num):
point_color_r = random.randrange(0, 256)
point_color_g = random.randrange(0, 256)
point_color_b = random.randrange(0, 256)
point_color = (point_color_r, point_color_g, point_color_b, 255)
point_list = []
point_count = 0
point_i = 0 # index of points
part_count = 0 # count of parts in THAT person
# To find rectangle which include that people - list of points x, y coordinates
people_x = []
people_y = []
for point_i in range(0, point_num):
if person_conf_multi[people_i][point_i][0] + person_conf_multi[people_i][point_i][1] != 0: # If coordinates of point is (0, 0) == meaningless data
point_count = point_count + 1
point_list.append(point_i)
# Draw each parts
if (5 in point_list) and (7 in point_list) and (9 in point_list): # Draw left arm
draw_ellipse_and_line(draw, person_conf_multi, people_i, 5, 7, 9, point_color)
part_count = part_count + 1
if (6 in point_list) and (8 in point_list) and (10 in point_list): # Draw right arm
draw_ellipse_and_line(draw, person_conf_multi, people_i, 6, 8, 10, point_color)
part_count = part_count + 1
if (11 in point_list) and (13 in point_list) and (15 in point_list): # Draw left leg
draw_ellipse_and_line(draw, person_conf_multi, people_i, 11, 13, 15, point_color)
part_count = part_count + 1
if (12 in point_list) and (14 in point_list) and (16 in point_list): # Draw right leg
draw_ellipse_and_line(draw, person_conf_multi, people_i, 12, 14, 16, point_color)
part_count = part_count + 1
if point_count >= point_min:
people_real_num = people_real_num + 1
for point_i in range(0, point_num):
if person_conf_multi[people_i][point_i][0] + person_conf_multi[people_i][point_i][1] != 0: # If coordinates of point is (0, 0) == meaningless data
draw.ellipse(ellipse_set(person_conf_multi, people_i, point_i), fill=point_color)
people_x.append(person_conf_multi[people_i][point_i][0])
people_y.append(person_conf_multi[people_i][point_i][1])
# Draw rectangle which include that people
draw.rectangle([min(people_x), min(people_y), max(people_x), max(people_y)], fill=point_color, outline=5)
if part_count >= part_min:
people_part_num = people_part_num + 1
draw.text((0, 0), 'People(by point): ' + str(people_real_num) + ' (threshold = ' + str(point_min) + ')', (0,0,0), font=font)
draw.text((0, 32), 'People(by line): ' + str(people_part_num) + ' (threshold = ' + str(part_min) + ')', (0,0,0), font=font)
draw.text((0, 64), 'Frame: ' + str(i) + '/' + str(video_frame_number), (0,0,0), font=font)
draw.text((0, 96), 'Total time required: ' + str(round(time.clock() - time_start, 1)) + 'sec', (0,0,0))
print('people_real_num: ' + str(people_real_num))
print('people_part_num: ' + str(people_part_num))
print('frame: ' + str(i))
image_img_numpy = np.asarray(image_img)
pose_frame_list.append(image_img_numpy)
video_pose = ImageSequenceClip(pose_frame_list, fps=video.fps)
video_pose.write_videofile("testset/" + video_name + "_pose.mp4", fps=video.fps)
print("Time(s): " + str(time.clock() - time_start))