Add YOLOv5Face model support (#38)

* update .gitignore * Added checking for cmake include dir * fixed missing trt_backend option bug when init from trt * remove un-need data layout and add pre-check for dtype * changed RGB2BRG to BGR2RGB in ppcls model * add model_zoo yolov6 c++/python demo * fixed CMakeLists.txt typos * update yolov6 cpp/README.md * add yolox c++/pybind and model_zoo demo * move some helpers to private * fixed CMakeLists.txt typos * add normalize with alpha and beta * add version notes for yolov5/yolov6/yolox * add copyright to yolov5.cc * revert normalize * fixed some bugs in yolox * Add YOLOv5Face Model support * fixed examples/vision typos * fixed runtime_option print func bugs
PaddlePaddle · Jul 25, 2022 · fc71d79 · fc71d79
1 parent 36fc77e
commit fc71d79
Show file tree

Hide file tree

Showing 27 changed files with 1,240 additions and 16 deletions.
diff --git a/.gitignore b/.gitignore
@@ -12,3 +12,5 @@ fastdeploy.egg-info
 fastdeploy/version.py
 fastdeploy/LICENSE*
 fastdeploy/ThirdPartyNotices*
+*.so*
+fastdeploy/libs/third_libs
diff --git a/examples/vision/deepcam_yolov5face.cc b/examples/vision/deepcam_yolov5face.cc
@@ -0,0 +1,53 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+int main() {
+  namespace vis = fastdeploy::vision;
+
+  std::string model_file = "../resources/models/yolov5s-face.onnx";
+  std::string img_path = "../resources/images/test_face_det.jpg";
+  std::string vis_path =
+      "../resources/outputs/deepcam_yolov5face_vis_result.jpg";
+
+  auto model = vis::deepcam::YOLOv5Face(model_file);
+  if (!model.Initialized()) {
+    std::cerr << "Init Failed! Model: " << model_file << std::endl;
+    return -1;
+  } else {
+    std::cout << "Init Done! Model:" << model_file << std::endl;
+  }
+  model.EnableDebug();
+
+  cv::Mat im = cv::imread(img_path);
+  cv::Mat vis_im = im.clone();
+
+  vis::FaceDetectionResult res;
+  if (!model.Predict(&im, &res, 0.1f, 0.3f)) {
+    std::cerr << "Prediction Failed." << std::endl;
+    return -1;
+  } else {
+    std::cout << "Prediction Done!" << std::endl;
+  }
+
+  // 输出预测框结果
+  std::cout << res.Str() << std::endl;
+
+  // 可视化预测结果
+  vis::Visualize::VisFaceDetection(&vis_im, res, 2, 0.3f);
+  cv::imwrite(vis_path, vis_im);
+  std::cout << "Detect Done! Saved: " << vis_path << std::endl;
+  return 0;
+}
diff --git a/fastdeploy/__init__.py b/fastdeploy/__init__.py
@@ -32,6 +32,8 @@ def RuntimeOptionStr(runtime_option):
     for attr in attrs:
         if attr.startswith("__"):
             continue
+        if hasattr(getattr(runtime_option, attr), "__call__"):
+            continue
         message += "  {} : {}\t\n".format(attr, getattr(runtime_option, attr))
     message.strip("\n")
     message += ")"

diff --git a/fastdeploy/vision.h b/fastdeploy/vision.h
@@ -15,16 +15,17 @@
 
 #include "fastdeploy/core/config.h"
 #ifdef ENABLE_VISION
+#include "fastdeploy/vision/deepcam/yolov5face.h"
 #include "fastdeploy/vision/megvii/yolox.h"
 #include "fastdeploy/vision/meituan/yolov6.h"
 #include "fastdeploy/vision/ppcls/model.h"
 #include "fastdeploy/vision/ppdet/ppyoloe.h"
-#include "fastdeploy/vision/rangilyu/nanodet_plus.h"
 #include "fastdeploy/vision/ppseg/model.h"
+#include "fastdeploy/vision/rangilyu/nanodet_plus.h"
 #include "fastdeploy/vision/ultralytics/yolov5.h"
+#include "fastdeploy/vision/wongkinyiu/scaledyolov4.h"
 #include "fastdeploy/vision/wongkinyiu/yolor.h"
 #include "fastdeploy/vision/wongkinyiu/yolov7.h"
-#include "fastdeploy/vision/wongkinyiu/scaledyolov4.h"
 #endif
 
 #include "fastdeploy/vision/visualize/visualize.h"
diff --git a/fastdeploy/vision/__init__.py b/fastdeploy/vision/__init__.py
@@ -22,4 +22,5 @@
 from . import megvii
 from . import visualize
 from . import wongkinyiu
+from . import deepcam
 from . import rangilyu
diff --git a/fastdeploy/vision/common/result.cc b/fastdeploy/vision/common/result.cc
@@ -72,6 +72,73 @@ std::string DetectionResult::Str() {
   return out;
 }
 
+FaceDetectionResult::FaceDetectionResult(const FaceDetectionResult& res) {
+  boxes.assign(res.boxes.begin(), res.boxes.end());
+  landmarks.assign(res.landmarks.begin(), res.landmarks.end());
+  scores.assign(res.scores.begin(), res.scores.end());
+  landmarks_per_face = res.landmarks_per_face;
+}
+
+void FaceDetectionResult::Clear() {
+  std::vector<std::array<float, 4>>().swap(boxes);
+  std::vector<float>().swap(scores);
+  std::vector<std::array<float, 2>>().swap(landmarks);
+  landmarks_per_face = 0;
+}
+
+void FaceDetectionResult::Reserve(int size) {
+  boxes.reserve(size);
+  scores.reserve(size);
+  if (landmarks_per_face > 0) {
+    landmarks.reserve(size * landmarks_per_face);
+  }
+}
+
+void FaceDetectionResult::Resize(int size) {
+  boxes.resize(size);
+  scores.resize(size);
+  if (landmarks_per_face > 0) {
+    landmarks.resize(size * landmarks_per_face);
+  }
+}
+
+std::string FaceDetectionResult::Str() {
+  std::string out;
+  // format without landmarks
+  if (landmarks_per_face <= 0) {
+    out = "FaceDetectionResult: [xmin, ymin, xmax, ymax, score]\n";
+    for (size_t i = 0; i < boxes.size(); ++i) {
+      out = out + std::to_string(boxes[i][0]) + "," +
+            std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) +
+            ", " + std::to_string(boxes[i][3]) + ", " +
+            std::to_string(scores[i]) + "\n";
+    }
+    return out;
+  }
+  // format with landmarks
+  FDASSERT((landmarks.size() == boxes.size() * landmarks_per_face),
+           "The size of landmarks != boxes.size * landmarks_per_face.");
+  out = "FaceDetectionResult: [xmin, ymin, xmax, ymax, score, (x, y) x " +
+        std::to_string(landmarks_per_face) + "]\n";
+  for (size_t i = 0; i < boxes.size(); ++i) {
+    out = out + std::to_string(boxes[i][0]) + "," +
+          std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) +
+          ", " + std::to_string(boxes[i][3]) + ", " +
+          std::to_string(scores[i]) + ", ";
+    for (size_t j = 0; j < landmarks_per_face; ++j) {
+      out = out + "(" +
+            std::to_string(landmarks[i * landmarks_per_face + j][0]) + "," +
+            std::to_string(landmarks[i * landmarks_per_face + j][1]);
+      if (j < landmarks_per_face - 1) {
+        out = out + "), ";
+      } else {
+        out = out + ")\n";
+      }
+    }
+  }
+  return out;
+}
+
 void SegmentationResult::Clear() {
   std::vector<std::vector<int64_t>>().swap(masks);
 }

diff --git a/fastdeploy/vision/common/result.h b/fastdeploy/vision/common/result.h
@@ -21,7 +21,8 @@ enum FASTDEPLOY_DECL ResultType {
   UNKNOWN_RESULT,
   CLASSIFY,
   DETECTION,
-  SEGMENTATION
+  SEGMENTATION,
+  FACE_DETECTION
 };
 
 struct FASTDEPLOY_DECL BaseResult {
@@ -56,6 +57,31 @@ struct FASTDEPLOY_DECL DetectionResult : public BaseResult {
   std::string Str();
 };
 
+struct FASTDEPLOY_DECL FaceDetectionResult : public BaseResult {
+  // box: xmin, ymin, xmax, ymax
+  std::vector<std::array<float, 4>> boxes;
+  // landmark: x, y, landmarks may empty if the
+  // model don't detect face with landmarks.
+  // Note, one face might have multiple landmarks,
+  // such as 5/19/21/68/98/..., etc.
+  std::vector<std::array<float, 2>> landmarks;
+  std::vector<float> scores;
+  ResultType type = ResultType::FACE_DETECTION;
+  // set landmarks_per_face manually in your post processes.
+  int landmarks_per_face;
+
+  FaceDetectionResult() { landmarks_per_face = 0; }
+  FaceDetectionResult(const FaceDetectionResult& res);
+
+  void Clear();
+
+  void Reserve(int size);
+
+  void Resize(int size);
+
+  std::string Str();
+};
+
 struct FASTDEPLOY_DECL SegmentationResult : public BaseResult {
   // mask
   std::vector<std::vector<int64_t>> masks;

diff --git a/fastdeploy/vision/deepcam/__init__.py b/fastdeploy/vision/deepcam/__init__.py
@@ -0,0 +1,117 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+import logging
+from ... import FastDeployModel, Frontend
+from ... import fastdeploy_main as C
+
+
+class YOLOv5Face(FastDeployModel):
+    def __init__(self,
+                 model_file,
+                 params_file="",
+                 runtime_option=None,
+                 model_format=Frontend.ONNX):
+        # 调用基函数进行backend_option的初始化
+        # 初始化后的option保存在self._runtime_option
+        super(YOLOv5Face, self).__init__(runtime_option)
+
+        self._model = C.vision.deepcam.YOLOv5Face(
+            model_file, params_file, self._runtime_option, model_format)
+        # 通过self.initialized判断整个模型的初始化是否成功
+        assert self.initialized, "YOLOv5Face initialize failed."
+
+    def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5):
+        return self._model.predict(input_image, conf_threshold,
+                                   nms_iou_threshold)
+
+    # 一些跟YOLOv5Face模型有关的属性封装
+    # 多数是预处理相关，可通过修改如model.size = [1280, 1280]改变预处理时resize的大小（前提是模型支持）
+    @property
+    def size(self):
+        return self._model.size
+
+    @property
+    def padding_value(self):
+        return self._model.padding_value
+
+    @property
+    def is_no_pad(self):
+        return self._model.is_no_pad
+
+    @property
+    def is_mini_pad(self):
+        return self._model.is_mini_pad
+
+    @property
+    def is_scale_up(self):
+        return self._model.is_scale_up
+
+    @property
+    def stride(self):
+        return self._model.stride
+
+    @property
+    def landmarks_per_face(self):
+        return self._model.landmarks_per_face
+
+    @size.setter
+    def size(self, wh):
+        assert isinstance(wh, [list, tuple]),\
+            "The value to set `size` must be type of tuple or list."
+        assert len(wh) == 2,\
+            "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format(
+            len(wh))
+        self._model.size = wh
+
+    @padding_value.setter
+    def padding_value(self, value):
+        assert isinstance(
+            value,
+            list), "The value to set `padding_value` must be type of list."
+        self._model.padding_value = value
+
+    @is_no_pad.setter
+    def is_no_pad(self, value):
+        assert isinstance(
+            value, bool), "The value to set `is_no_pad` must be type of bool."
+        self._model.is_no_pad = value
+
+    @is_mini_pad.setter
+    def is_mini_pad(self, value):
+        assert isinstance(
+            value,
+            bool), "The value to set `is_mini_pad` must be type of bool."
+        self._model.is_mini_pad = value
+
+    @is_scale_up.setter
+    def is_scale_up(self, value):
+        assert isinstance(
+            value,
+            bool), "The value to set `is_scale_up` must be type of bool."
+        self._model.is_scale_up = value
+
+    @stride.setter
+    def stride(self, value):
+        assert isinstance(
+            value, int), "The value to set `stride` must be type of int."
+        self._model.stride = value
+
+    @landmarks_per_face.setter
+    def landmarks_per_face(self, value):
+        assert isinstance(
+            value,
+            int), "The value to set `landmarks_per_face` must be type of int."
+        self._model.landmarks_per_face = value
diff --git a/fastdeploy/vision/deepcam/deepcam_pybind.cc b/fastdeploy/vision/deepcam/deepcam_pybind.cc
@@ -0,0 +1,43 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/pybind/main.h"
+
+namespace fastdeploy {
+void BindDeepCam(pybind11::module& m) {
+  auto deepcam_module =
+      m.def_submodule("deepcam", "https://github.com/deepcam-cn/yolov5-face");
+  pybind11::class_<vision::deepcam::YOLOv5Face, FastDeployModel>(deepcam_module,
+                                                                 "YOLOv5Face")
+      .def(pybind11::init<std::string, std::string, RuntimeOption, Frontend>())
+      .def("predict",
+           [](vision::deepcam::YOLOv5Face& self, pybind11::array& data,
+              float conf_threshold, float nms_iou_threshold) {
+             auto mat = PyArrayToCvMat(data);
+             vision::FaceDetectionResult res;
+             self.Predict(&mat, &res, conf_threshold, nms_iou_threshold);
+             return res;
+           })
+      .def_readwrite("size", &vision::deepcam::YOLOv5Face::size)
+      .def_readwrite("padding_value",
+                     &vision::deepcam::YOLOv5Face::padding_value)
+      .def_readwrite("is_mini_pad", &vision::deepcam::YOLOv5Face::is_mini_pad)
+      .def_readwrite("is_no_pad", &vision::deepcam::YOLOv5Face::is_no_pad)
+      .def_readwrite("is_scale_up", &vision::deepcam::YOLOv5Face::is_scale_up)
+      .def_readwrite("stride", &vision::deepcam::YOLOv5Face::stride)
+      .def_readwrite("landmarks_per_face",
+                     &vision::deepcam::YOLOv5Face::landmarks_per_face);
+}
+
+}  // namespace fastdeploy