-
Notifications
You must be signed in to change notification settings - Fork 1
/
caption.py
71 lines (58 loc) · 2.45 KB
/
caption.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import time
import tarfile
import glob
import six.moves.urllib as urllib
import cv2
from tqdm import tqdm
import numpy as np
import tensorflow as tf
from ssd_mobilenet_utils import *
from captionbot import CaptionBot
c = CaptionBot()
def run_detection(image, interpreter):
interpreter.set_tensor(input_details[0]['index'], image)
interpreter.invoke()
boxes = interpreter.get_tensor(output_details[0]['index'])
classes = interpreter.get_tensor(output_details[1]['index'])
scores = interpreter.get_tensor(output_details[2]['index'])
num = interpreter.get_tensor(output_details[3]['index'])
boxes, scores, classes = np.squeeze(boxes), np.squeeze(scores), np.squeeze(classes + 1).astype(np.int32)
out_scores, out_boxes, out_classes = non_max_suppression(scores, boxes, classes)
# Print predictions info
#print('Found {} boxes for {}'.format(len(out_boxes), 'images/dog.jpg'))
return out_scores, out_boxes, out_classes
def real_time_object_detection(interpreter, colors):
camera = cv2.VideoCapture(0)
while camera.isOpened():
start = time.time()
ret, frame = camera.read()
if ret:
cv2.imwrite('image.jpg',frame)
image_data = preprocess_image_for_tflite(frame, model_image_size=300)
out_scores, out_boxes, out_classes = run_detection(image_data, interpreter)
result = draw_boxes(frame, out_scores, out_boxes, out_classes, class_names, colors)
end = time.time()
t = end - start
fps = "Fps: {:.2f}".format(1 / t)
cv2.putText(result, fps, (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
cv2.imshow("Object detection - ssdlite_mobilenet_v2", frame)
key = cv2.waitKey(1)
if key == ord('a'):
print("Generating Caption...")
caption = c.file_caption('/home/aditya/Hack-a-bit2019/' + 'image.jpg')
print(caption)
elif key == 27:
break
else:
continue
camera.release()
cv2.destroyAllWindows()
interpreter = tf.lite.Interpreter(model_path="model_data/ssdlite_mobilenet_v2.tflite")
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
class_names = read_classes('model_data/coco_classes.txt')
colors = generate_colors(class_names)
real_time_object_detection(interpreter, colors)