-
Notifications
You must be signed in to change notification settings - Fork 62
/
main.py
158 lines (146 loc) · 8.1 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import cv2
import argparse
import numpy as np
class yolop():
def __init__(self, confThreshold=0.25, nmsThreshold=0.5, objThreshold=0.45):
with open('bdd100k.names', 'rt') as f:
self.classes = f.read().rstrip('\n').split('\n') ###这个是在bdd100k数据集上训练的模型做opencv部署的,如果你在自己的数据集上训练出的模型做opencv部署,那么需要修改self.classes
num_classes = len(self.classes)
anchors = [[3,9,5,11,4,20], [7,18,6,39,12,31], [19,50,38,81,68,157]]
self.nl = len(anchors)
self.na = len(anchors[0]) // 2
self.no = num_classes + 5
self.stride = np.array([8., 16., 32.])
self.anchor_grid = np.asarray(anchors, dtype=np.float32).reshape(self.nl, -1, 2)
self.inpWidth = 640
self.inpHeight = 640
self.generate_grid()
self.net = cv2.dnn.readNet('yolop.onnx')
self.confThreshold = confThreshold
self.nmsThreshold = nmsThreshold
self.objThreshold = objThreshold
self.mean = np.array([0.485, 0.456, 0.406], dtype=np.float32).reshape(1, 1, 3)
self.std = np.array([0.229, 0.224, 0.225], dtype=np.float32).reshape(1, 1, 3)
self.keep_ratio = True
def generate_grid(self):
self.grid = [np.zeros(1)] * self.nl
self.length = []
self.areas = []
for i in range(self.nl):
h, w = int(self.inpHeight/self.stride[i]), int(self.inpWidth/self.stride[i])
self.length.append(int(self.na * h * w))
self.areas.append(h*w)
if self.grid[i].shape[2:4] != (h,w):
self.grid[i] = self._make_grid(w, h)
def _make_grid(self, nx=20, ny=20):
xv, yv = np.meshgrid(np.arange(ny), np.arange(nx))
return np.stack((xv, yv), 2).reshape((-1, 2)).astype(np.float32)
def postprocess(self, frame, outs, newh, neww, padh, padw):
frameHeight = frame.shape[0]
frameWidth = frame.shape[1]
ratioh, ratiow = frameHeight / newh, frameWidth / neww
# Scan through all the bounding boxes output from the network and keep only the
# ones with high confidence scores. Assign the box's class label as the class with the highest score.
classIds = []
confidences = []
boxes = []
for detection in outs:
scores = detection[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > self.confThreshold and detection[4] > self.objThreshold:
center_x = int((detection[0]-padw) * ratiow)
center_y = int((detection[1]-padh) * ratioh)
width = int(detection[2] * ratiow)
height = int(detection[3] * ratioh)
left = int(center_x - width / 2)
top = int(center_y - height / 2)
classIds.append(classId)
confidences.append(float(confidence) * detection[4])
boxes.append([left, top, width, height])
# Perform non maximum suppression to eliminate redundant overlapping boxes with
# lower confidences.
indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confThreshold, self.nmsThreshold)
for i in indices:
i = i[0]
box = boxes[i]
left = box[0]
top = box[1]
width = box[2]
height = box[3]
frame = self.drawPred(frame, classIds[i], confidences[i], left, top, left + width, top + height)
return frame
def drawPred(self, frame, classId, conf, left, top, right, bottom):
# Draw a bounding box.
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), thickness=2)
label = '%.2f' % conf
label = '%s:%s' % (self.classes[classId], label)
# Display the label at the top of the bounding box
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
top = max(top, labelSize[1])
# cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED)
cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=1)
return frame
def resize_image(self, srcimg):
padh, padw, newh, neww = 0, 0, self.inpHeight, self.inpWidth
if self.keep_ratio and srcimg.shape[0] != srcimg.shape[1]:
hw_scale = srcimg.shape[0] / srcimg.shape[1]
if hw_scale > 1:
newh, neww = self.inpHeight, int(self.inpWidth / hw_scale)
img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
padw = int((self.inpWidth - neww) * 0.5)
img = cv2.copyMakeBorder(img, 0, 0, padw, self.inpWidth - neww - padw, cv2.BORDER_CONSTANT,
value=0) # add border
else:
newh, neww = int(self.inpHeight * hw_scale), self.inpWidth
img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
padh = int((self.inpHeight - newh) * 0.5)
img = cv2.copyMakeBorder(img, padh, self.inpHeight - newh - padh, 0, 0, cv2.BORDER_CONSTANT, value=0)
else:
img = cv2.resize(srcimg, (self.inpWidth, self.inpHeight), interpolation=cv2.INTER_AREA)
return img, newh, neww, padh, padw
def _normalize(self, img): ### c++: https://blog.csdn.net/wuqingshan2010/article/details/107727909
img = img.astype(np.float32) / 255.0
img = (img - self.mean) / self.std
return img
def detect(self, srcimg):
img, newh, neww, padh, padw = self.resize_image(srcimg)
img = self._normalize(img)
blob = cv2.dnn.blobFromImage(img)
# Sets the input to the network
self.net.setInput(blob)
# Runs the forward pass to get output of the output layers
outs = self.net.forward(self.net.getUnconnectedOutLayersNames())
# inference output
outimg = srcimg.copy()
drive_area_mask = outs[1][:, padh:(self.inpHeight - padh), padw:(self.inpWidth - padw)]
seg_id = np.argmax(drive_area_mask, axis=0).astype(np.uint8)
seg_id = cv2.resize(seg_id, (srcimg.shape[1], srcimg.shape[0]), interpolation=cv2.INTER_NEAREST)
outimg[seg_id == 1] = [0, 255, 0]
lane_line_mask = outs[2][:, padh:(self.inpHeight - padh), padw:(self.inpWidth - padw)]
seg_id = np.argmax(lane_line_mask, axis=0).astype(np.uint8)
seg_id = cv2.resize(seg_id, (srcimg.shape[1], srcimg.shape[0]), interpolation=cv2.INTER_NEAREST)
outimg[seg_id == 1] = [255, 0, 0]
det_out = outs[0]
row_ind = 0
for i in range(self.nl):
det_out[row_ind:row_ind+self.length[i], 0:2] = (det_out[row_ind:row_ind+self.length[i], 0:2] * 2. - 0.5 + np.tile(self.grid[i],(self.na, 1))) * int(self.stride[i])
det_out[row_ind:row_ind+self.length[i], 2:4] = (det_out[row_ind:row_ind+self.length[i], 2:4] * 2) ** 2 * np.repeat(self.anchor_grid[i], self.areas[i], axis=0)
row_ind += self.length[i]
outimg = self.postprocess(outimg, det_out, newh, neww, padh, padw)
return outimg
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--imgpath", type=str, default='images/0ace96c3-48481887.jpg', help="image path")
parser.add_argument('--confThreshold', default=0.25, type=float, help='class confidence')
parser.add_argument('--nmsThreshold', default=0.45, type=float, help='nms iou thresh')
parser.add_argument('--objThreshold', default=0.5, type=float, help='object confidence')
args = parser.parse_args()
yolonet = yolop(confThreshold=args.confThreshold, nmsThreshold=args.nmsThreshold, objThreshold=args.objThreshold)
srcimg = cv2.imread(args.imgpath)
outimg = yolonet.detect(srcimg)
winName = 'Deep learning object detection in OpenCV'
cv2.namedWindow(winName, 0)
cv2.imshow(winName, outimg)
cv2.waitKey(0)
cv2.destroyAllWindows()