-
Notifications
You must be signed in to change notification settings - Fork 0
/
api.py
226 lines (173 loc) · 10.2 KB
/
api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
# -*- coding: utf-8 -*-
import PIL.Image
import dlib
import numpy as np
from PIL import ImageFile
try:
import face_recognition_models
except Exception:
print("Please install `face_recognition_models` with this command before using `face_recognition`:\n")
print("pip install git+https://github.com/ageitgey/face_recognition_models")
quit()
ImageFile.LOAD_TRUNCATED_IMAGES = True
face_detector = dlib.get_frontal_face_detector()
predictor_68_point_model = face_recognition_models.pose_predictor_model_location()
pose_predictor_68_point = dlib.shape_predictor(predictor_68_point_model)
predictor_5_point_model = face_recognition_models.pose_predictor_five_point_model_location()
pose_predictor_5_point = dlib.shape_predictor(predictor_5_point_model)
cnn_face_detection_model = face_recognition_models.cnn_face_detector_model_location()
cnn_face_detector = dlib.cnn_face_detection_model_v1(cnn_face_detection_model)
face_recognition_model = face_recognition_models.face_recognition_model_location()
face_encoder = dlib.face_recognition_model_v1(face_recognition_model)
def _rect_to_css(rect):
"""
Convert a dlib 'rect' object to a plain tuple in (top, right, bottom, left) order
:param rect: a dlib 'rect' object
:return: a plain tuple representation of the rect in (top, right, bottom, left) order
"""
return rect.top(), rect.right(), rect.bottom(), rect.left()
def _css_to_rect(css):
"""
Convert a tuple in (top, right, bottom, left) order to a dlib `rect` object
:param css: plain tuple representation of the rect in (top, right, bottom, left) order
:return: a dlib `rect` object
"""
return dlib.rectangle(css[3], css[0], css[1], css[2])
def _trim_css_to_bounds(css, image_shape):
"""
Make sure a tuple in (top, right, bottom, left) order is within the bounds of the image.
:param css: plain tuple representation of the rect in (top, right, bottom, left) order
:param image_shape: numpy shape of the image array
:return: a trimmed plain tuple representation of the rect in (top, right, bottom, left) order
"""
return max(css[0], 0), min(css[1], image_shape[1]), min(css[2], image_shape[0]), max(css[3], 0)
def face_distance(face_encodings, face_to_compare):
"""
Given a list of face encodings, compare them to a known face encoding and get a euclidean distance
for each comparison face. The distance tells you how similar the faces are.
:param face_encodings: List of face encodings to compare
:param face_to_compare: A face encoding to compare against
:return: A numpy ndarray with the distance for each face in the same order as the 'faces' array
"""
if len(face_encodings) == 0:
return np.empty((0))
return np.linalg.norm(face_encodings - face_to_compare, axis=1)
def load_image_file(file, mode='RGB'):
"""
Loads an image file (.jpg, .png, etc) into a numpy array
:param file: image file name or file object to load
:param mode: format to convert the image to. Only 'RGB' (8-bit RGB, 3 channels) and 'L' (black and white) are supported.
:return: image contents as numpy array
"""
im = PIL.Image.open(file)
if mode:
im = im.convert(mode)
return np.array(im)
def _raw_face_locations(img, number_of_times_to_upsample=1, model="hog"):
"""
Returns an array of bounding boxes of human faces in a image
:param img: An image (as a numpy array)
:param number_of_times_to_upsample: How many times to upsample the image looking for faces. Higher numbers find smaller faces.
:param model: Which face detection model to use. "hog" is less accurate but faster on CPUs. "cnn" is a more accurate
deep-learning model which is GPU/CUDA accelerated (if available). The default is "hog".
:return: A list of dlib 'rect' objects of found face locations
"""
if model == "cnn":
return cnn_face_detector(img, number_of_times_to_upsample)
else:
return face_detector(img, number_of_times_to_upsample)
def face_locations(img, number_of_times_to_upsample=1, model="hog"):
"""
Returns an array of bounding boxes of human faces in a image
:param img: An image (as a numpy array)
:param number_of_times_to_upsample: How many times to upsample the image looking for faces. Higher numbers find smaller faces.
:param model: Which face detection model to use. "hog" is less accurate but faster on CPUs. "cnn" is a more accurate
deep-learning model which is GPU/CUDA accelerated (if available). The default is "hog".
:return: A list of tuples of found face locations in css (top, right, bottom, left) order
"""
if model == "cnn":
return [_trim_css_to_bounds(_rect_to_css(face.rect), img.shape) for face in _raw_face_locations(img, number_of_times_to_upsample, "cnn")]
else:
return [_trim_css_to_bounds(_rect_to_css(face), img.shape) for face in _raw_face_locations(img, number_of_times_to_upsample, model)]
def _raw_face_locations_batched(images, number_of_times_to_upsample=1, batch_size=128):
"""
Returns an 2d array of dlib rects of human faces in a image using the cnn face detector
:param images: A list of images (each as a numpy array)
:param number_of_times_to_upsample: How many times to upsample the image looking for faces. Higher numbers find smaller faces.
:return: A list of dlib 'rect' objects of found face locations
"""
return cnn_face_detector(images, number_of_times_to_upsample, batch_size=batch_size)
def batch_face_locations(images, number_of_times_to_upsample=1, batch_size=128):
"""
Returns an 2d array of bounding boxes of human faces in a image using the cnn face detector
If you are using a GPU, this can give you much faster results since the GPU
can process batches of images at once. If you aren't using a GPU, you don't need this function.
:param images: A list of images (each as a numpy array)
:param number_of_times_to_upsample: How many times to upsample the image looking for faces. Higher numbers find smaller faces.
:param batch_size: How many images to include in each GPU processing batch.
:return: A list of tuples of found face locations in css (top, right, bottom, left) order
"""
def convert_cnn_detections_to_css(detections):
return [_trim_css_to_bounds(_rect_to_css(face.rect), images[0].shape) for face in detections]
raw_detections_batched = _raw_face_locations_batched(images, number_of_times_to_upsample, batch_size)
return list(map(convert_cnn_detections_to_css, raw_detections_batched))
def _raw_face_landmarks(face_image, face_locations=None, model="large"):
if face_locations is None:
face_locations = _raw_face_locations(face_image)
else:
face_locations = [_css_to_rect(face_location) for face_location in face_locations]
pose_predictor = pose_predictor_68_point
if model == "small":
pose_predictor = pose_predictor_5_point
return [pose_predictor(face_image, face_location) for face_location in face_locations]
def face_landmarks(face_image, face_locations=None, model="large"):
"""
Given an image, returns a dict of face feature locations (eyes, nose, etc) for each face in the image
:param face_image: image to search
:param face_locations: Optionally provide a list of face locations to check.
:param model: Optional - which model to use. "large" (default) or "small" which only returns 5 points but is faster.
:return: A list of dicts of face feature locations (eyes, nose, etc)
"""
landmarks = _raw_face_landmarks(face_image, face_locations, model)
landmarks_as_tuples = [[(p.x, p.y) for p in landmark.parts()] for landmark in landmarks]
# For a definition of each point index, see https://cdn-images-1.medium.com/max/1600/1*AbEg31EgkbXSQehuNJBlWg.png
if model == 'large':
return [{
"chin": points[0:17],
"left_eyebrow": points[17:22],
"right_eyebrow": points[22:27],
"nose_bridge": points[27:31],
"nose_tip": points[31:36],
"left_eye": points[36:42],
"right_eye": points[42:48],
"top_lip": points[48:55] + [points[64]] + [points[63]] + [points[62]] + [points[61]] + [points[60]],
"bottom_lip": points[54:60] + [points[48]] + [points[60]] + [points[67]] + [points[66]] + [points[65]] + [points[64]]
} for points in landmarks_as_tuples]
elif model == 'small':
return [{
"nose_tip": [points[4]],
"left_eye": points[2:4],
"right_eye": points[0:2],
} for points in landmarks_as_tuples]
else:
raise ValueError("Invalid landmarks model type. Supported models are ['small', 'large'].")
def face_encodings(face_image, known_face_locations=None, num_jitters=1, model="small"):
"""
Given an image, return the 128-dimension face encoding for each face in the image.
:param face_image: The image that contains one or more faces
:param known_face_locations: Optional - the bounding boxes of each face if you already know them.
:param num_jitters: How many times to re-sample the face when calculating encoding. Higher is more accurate, but slower (i.e. 100 is 100x slower)
:param model: Optional - which model to use. "large" or "small" (default) which only returns 5 points but is faster.
:return: A list of 128-dimensional face encodings (one for each face in the image)
"""
raw_landmarks = _raw_face_landmarks(face_image, known_face_locations, model)
return [np.array(face_encoder.compute_face_descriptor(face_image, raw_landmark_set, num_jitters)) for raw_landmark_set in raw_landmarks]
def compare_faces(known_face_encodings, face_encoding_to_check, tolerance=0.6):
"""
Compare a list of face encodings against a candidate encoding to see if they match.
:param known_face_encodings: A list of known face encodings
:param face_encoding_to_check: A single face encoding to compare against the list
:param tolerance: How much distance between faces to consider it a match. Lower is more strict. 0.6 is typical best performance.
:return: A list of True/False values indicating which known_face_encodings match the face encoding to check
"""
return list(face_distance(known_face_encodings, face_encoding_to_check) <= tolerance)