From 93b69a8661d7011935a324a936d8d3ba2391f970 Mon Sep 17 00:00:00 2001 From: LittleMouse Date: Tue, 26 Nov 2024 09:03:26 +0800 Subject: [PATCH] [update] supports seg & pose model. --- projects/llm_framework/main_yolo/SConstruct | 2 +- .../main_yolo/src/EngineWrapper.cpp | 109 ++++++++++++++++-- .../main_yolo/src/EngineWrapper.hpp | 2 +- projects/llm_framework/main_yolo/src/main.cpp | 9 +- .../llm_framework/main_yolo/yolo11s-pose.json | 106 +++++++++++++++++ .../llm_framework/main_yolo/yolo11s-seg.json | 106 +++++++++++++++++ projects/llm_framework/main_yolo/yolo11s.json | 11 +- 7 files changed, 325 insertions(+), 20 deletions(-) create mode 100644 projects/llm_framework/main_yolo/yolo11s-pose.json create mode 100644 projects/llm_framework/main_yolo/yolo11s-seg.json diff --git a/projects/llm_framework/main_yolo/SConstruct b/projects/llm_framework/main_yolo/SConstruct index 3a42ea0..1870067 100644 --- a/projects/llm_framework/main_yolo/SConstruct +++ b/projects/llm_framework/main_yolo/SConstruct @@ -27,7 +27,7 @@ static_file = Glob('../static_lib/module-llm/libabsl_*') static_file = Glob('../static_lib/libopencv-4.6-aarch64-none/lib/lib*') STATIC_LIB += static_file * 2 -STATIC_FILES += [AFile('yolo11s.json')] +STATIC_FILES += [AFile('yolo11s.json'), AFile('yolo11s-seg.json'), AFile('yolo11s-pose.json')] env['COMPONENTS'].append({'target':'llm_yolo', 'SRCS':SRCS, diff --git a/projects/llm_framework/main_yolo/src/EngineWrapper.cpp b/projects/llm_framework/main_yolo/src/EngineWrapper.cpp index 12bbb94..286c6ae 100644 --- a/projects/llm_framework/main_yolo/src/EngineWrapper.cpp +++ b/projects/llm_framework/main_yolo/src/EngineWrapper.cpp @@ -266,27 +266,112 @@ const char* CLASS_NAMES[] = { "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"}; +const char* OBB_CLASS_NAMES[] = {"plane", + "ship", + "storage tank", + "baseball diamond", + "tennis court", + "basketball court", + "ground track field", + "harbor", + "bridge", + "large vehicle", + "small vehicle", + "helicopter", + "roundabout", + "soccer ball field", + "swimming pool"}; + +static const std::vector> COCO_COLORS = { + {56, 0, 255}, {226, 255, 0}, {0, 94, 255}, {0, 37, 255}, {0, 255, 94}, {255, 226, 0}, {0, 18, 255}, + {255, 151, 0}, {170, 0, 255}, {0, 255, 56}, {255, 0, 75}, {0, 75, 255}, {0, 255, 169}, {255, 0, 207}, + {75, 255, 0}, {207, 0, 255}, {37, 0, 255}, {0, 207, 255}, {94, 0, 255}, {0, 255, 113}, {255, 18, 0}, + {255, 0, 56}, {18, 0, 255}, {0, 255, 226}, {170, 255, 0}, {255, 0, 245}, {151, 255, 0}, {132, 255, 0}, + {75, 0, 255}, {151, 0, 255}, {0, 151, 255}, {132, 0, 255}, {0, 255, 245}, {255, 132, 0}, {226, 0, 255}, + {255, 37, 0}, {207, 255, 0}, {0, 255, 207}, {94, 255, 0}, {0, 226, 255}, {56, 255, 0}, {255, 94, 0}, + {255, 113, 0}, {0, 132, 255}, {255, 0, 132}, {255, 170, 0}, {255, 0, 188}, {113, 255, 0}, {245, 0, 255}, + {113, 0, 255}, {255, 188, 0}, {0, 113, 255}, {255, 0, 0}, {0, 56, 255}, {255, 0, 113}, {0, 255, 188}, + {255, 0, 94}, {255, 0, 18}, {18, 255, 0}, {0, 255, 132}, {0, 188, 255}, {0, 245, 255}, {0, 169, 255}, + {37, 255, 0}, {255, 0, 151}, {188, 0, 255}, {0, 255, 37}, {0, 255, 0}, {255, 0, 170}, {255, 0, 37}, + {255, 75, 0}, {0, 0, 255}, {255, 207, 0}, {255, 0, 226}, {255, 245, 0}, {188, 255, 0}, {0, 255, 18}, + {0, 255, 75}, {0, 255, 151}, {255, 56, 0}, {245, 255, 0}}; + +static const std::vector> KPS_COLORS = { + {0, 255, 0}, {0, 255, 0}, {0, 255, 0}, {0, 255, 0}, {0, 255, 0}, {255, 128, 0}, + {255, 128, 0}, {255, 128, 0}, {255, 128, 0}, {255, 128, 0}, {255, 128, 0}, {51, 153, 255}, + {51, 153, 255}, {51, 153, 255}, {51, 153, 255}, {51, 153, 255}, {51, 153, 255}}; + +static const std::vector> LIMB_COLORS = { + {51, 153, 255}, {51, 153, 255}, {51, 153, 255}, {51, 153, 255}, {255, 51, 255}, {255, 51, 255}, {255, 51, 255}, + {255, 128, 0}, {255, 128, 0}, {255, 128, 0}, {255, 128, 0}, {255, 128, 0}, {0, 255, 0}, {0, 255, 0}, + {0, 255, 0}, {0, 255, 0}, {0, 255, 0}, {0, 255, 0}, {0, 255, 0}}; + +static const std::vector> SKELETON = { + {16, 14}, {14, 12}, {17, 15}, {15, 13}, {12, 13}, {6, 12}, {7, 13}, {6, 7}, {6, 8}, {7, 9}, + {8, 10}, {9, 11}, {2, 3}, {1, 2}, {1, 3}, {2, 4}, {3, 5}, {4, 6}, {5, 7}}; + void post_process(AX_ENGINE_IO_INFO_T* io_info, AX_ENGINE_IO_T* io_data, const cv::Mat& mat, int& input_w, int& input_h, - int& cls_num, float& prob_threshold, float& nms_threshold, std::vector& objects) + int& cls_num, float& prob_threshold, float& nms_threshold, std::vector& objects, + std::string& model_type) { // std::vector objects; std::vector proposals; - for (int i = 0; i < 3; ++i) { - auto feat_ptr = (float*)io_data->pOutputs[i].pVirAddr; - int32_t stride = (1 << i) * 8; - detection::generate_proposals_yolov8_native(stride, feat_ptr, prob_threshold, proposals, input_w, input_h, - cls_num); + if (model_type == "detect") { + for (int i = 0; i < 3; ++i) { + auto feat_ptr = (float*)io_data->pOutputs[i].pVirAddr; + int32_t stride = (1 << i) * 8; + detection::generate_proposals_yolov8_native(stride, feat_ptr, prob_threshold, proposals, input_w, input_h, + cls_num); + } + detection::get_out_bbox(proposals, objects, nms_threshold, input_h, input_w, mat.rows, mat.cols); + detection::draw_objects(mat, objects, CLASS_NAMES, "yolo11_out"); + } else if (model_type == "segment") { + float* output_ptr[3] = {(float*)io_data->pOutputs[0].pVirAddr, (float*)io_data->pOutputs[1].pVirAddr, + (float*)io_data->pOutputs[2].pVirAddr}; + float* output_seg_ptr[3] = {(float*)io_data->pOutputs[3].pVirAddr, (float*)io_data->pOutputs[4].pVirAddr, + (float*)io_data->pOutputs[5].pVirAddr}; + for (int i = 0; i < 3; ++i) { + auto feat_ptr = output_ptr[i]; + auto feat_seg_ptr = output_seg_ptr[i]; + int32_t stride = (1 << i) * 8; + detection::generate_proposals_yolov8_seg_native(stride, feat_ptr, feat_seg_ptr, prob_threshold, proposals, + input_w, input_h, cls_num); + } + auto mask_proto_ptr = (float*)io_data->pOutputs[6].pVirAddr; + detection::get_out_bbox_mask(proposals, objects, mask_proto_ptr, 32, 4, nms_threshold, input_h, input_w, + mat.rows, mat.cols); + detection::draw_objects_mask(mat, objects, CLASS_NAMES, COCO_COLORS, "yolo11_seg_out"); + } else if (model_type == "pose") { + float* output_ptr[3] = {(float*)io_data->pOutputs[0].pVirAddr, (float*)io_data->pOutputs[1].pVirAddr, + (float*)io_data->pOutputs[2].pVirAddr}; + float* output_kps_ptr[3] = {(float*)io_data->pOutputs[3].pVirAddr, (float*)io_data->pOutputs[4].pVirAddr, + (float*)io_data->pOutputs[5].pVirAddr}; + + for (int i = 0; i < 3; ++i) { + auto feat_ptr = output_ptr[i]; + auto feat_kps_ptr = output_kps_ptr[i]; + int32_t stride = (1 << i) * 8; + detection::generate_proposals_yolov8_pose_native(stride, feat_ptr, feat_kps_ptr, prob_threshold, proposals, + input_h, input_w, 17, cls_num); + } + detection::get_out_bbox_kps(proposals, objects, nms_threshold, input_h, input_w, mat.rows, mat.cols); + detection::draw_keypoints(mat, objects, KPS_COLORS, LIMB_COLORS, SKELETON, "yolo11_pose_out"); + } else if (model_type == "obb") { + std::vector strides = {8, 16, 32}; + std::vector grid_strides; + detection::generate_grids_and_stride(input_w, input_h, strides, grid_strides); + auto feat_ptr = (float*)io_data->pOutputs[0].pVirAddr; + detection::obb::generate_proposals_yolov8_obb_native(grid_strides, feat_ptr, prob_threshold, proposals, input_w, + input_h, cls_num); + detection::obb::get_out_obb_bbox(proposals, objects, nms_threshold, input_h, input_w, mat.rows, mat.cols); + detection::obb::draw_objects_obb(mat, objects, OBB_CLASS_NAMES, "yolo11_obb_out", 1); } - detection::get_out_bbox(proposals, objects, nms_threshold, input_h, input_w, mat.rows, mat.cols); - fprintf(stdout, "detection num: %zu\n", objects.size()); - - detection::draw_objects(mat, objects, CLASS_NAMES, "yolo11_out"); } int EngineWrapper::Post_Process(cv::Mat& mat, int& input_w, int& input_, int& cls_num, float& pron_threshold, - float& nms_threshold, std::vector& objects) + float& nms_threshold, std::vector& objects, std::string& model_type) { - post_process(m_io_info, &m_io, mat, input_w, input_, cls_num, pron_threshold, nms_threshold, objects); + post_process(m_io_info, &m_io, mat, input_w, input_, cls_num, pron_threshold, nms_threshold, objects, model_type); return 0; } diff --git a/projects/llm_framework/main_yolo/src/EngineWrapper.hpp b/projects/llm_framework/main_yolo/src/EngineWrapper.hpp index 4c018f4..3a6dfa0 100644 --- a/projects/llm_framework/main_yolo/src/EngineWrapper.hpp +++ b/projects/llm_framework/main_yolo/src/EngineWrapper.hpp @@ -50,7 +50,7 @@ class EngineWrapper { int RunSync(); int Post_Process(cv::Mat& mat, int& input_w, int& input_, int& cls_num, float& pron_threshold, float& nms_threshold, - std::vector& objects); + std::vector& objects, std::string& model_type); int GetOutput(void* pOutput, int index); diff --git a/projects/llm_framework/main_yolo/src/main.cpp b/projects/llm_framework/main_yolo/src/main.cpp index 71f98eb..7c06c4d 100644 --- a/projects/llm_framework/main_yolo/src/main.cpp +++ b/projects/llm_framework/main_yolo/src/main.cpp @@ -26,6 +26,7 @@ static std::string base_model_config_path_; typedef struct { std::string yolo_model; + std::string model_type = "detect"; std::vector cls_name; int img_h = 640; int img_w = 640; @@ -112,6 +113,8 @@ class llm_task { CONFIG_AUTO_SET(file_body["mode_param"], pron_threshold); CONFIG_AUTO_SET(file_body["mode_param"], nms_threshold); CONFIG_AUTO_SET(file_body["mode_param"], cls_name); + CONFIG_AUTO_SET(file_body["mode_param"], cls_num); + CONFIG_AUTO_SET(file_body["mode_param"], model_type); mode_config_.yolo_model = base_model + mode_config_.yolo_model; yolo_ = std::make_unique(); if (0 != yolo_->Init(mode_config_.yolo_model.c_str())) { @@ -154,7 +157,8 @@ class llm_task { } std::vector objects; yolo_->Post_Process(src, mode_config_.img_w, mode_config_.img_h, mode_config_.cls_num, - mode_config_.pron_threshold, mode_config_.nms_threshold, objects); + mode_config_.pron_threshold, mode_config_.nms_threshold, objects, + mode_config_.model_type); std::vector yolo_output; for (size_t i = 0; i < objects.size(); i++) { const detection::Object &obj = objects[i]; @@ -166,6 +170,9 @@ class llm_task { output["bbox"].push_back(format_float(obj.rect.y, 0)); output["bbox"].push_back(format_float(obj.rect.x + obj.rect.width, 0)); output["bbox"].push_back(format_float(obj.rect.y + obj.rect.height, 0)); + if (mode_config_.model_type == "segment") output["mask"] = obj.mask_feat; + if (mode_config_.model_type == "pose") output["kps"] = obj.kps_feat; + if (mode_config_.model_type == "obb") output["angle"] = obj.angle; yolo_output.push_back(output); if (out_callback_) out_callback_(yolo_output, false); } diff --git a/projects/llm_framework/main_yolo/yolo11s-pose.json b/projects/llm_framework/main_yolo/yolo11s-pose.json new file mode 100644 index 0000000..7b28eaa --- /dev/null +++ b/projects/llm_framework/main_yolo/yolo11s-pose.json @@ -0,0 +1,106 @@ +{ + "mode":"yolo11s", + "type":"cv", + "capabilities":[ + "object detect" + ], + "input_type":[ + "cv.jpeg.base64" + ], + "output_type":[ + "cv.yolobox" + ], + "mode_param":{ + "yolo_model":"yolo11s-pose.axmodel", + "model_type":"pose", + "img_h":640, + "img_w":640, + "cls_num":1, + "pron_threshold":0.45, + "nms_threshold":0.45, + "cls_name":[ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush" + ] + }, + "mode_param_bak":{ + } +} \ No newline at end of file diff --git a/projects/llm_framework/main_yolo/yolo11s-seg.json b/projects/llm_framework/main_yolo/yolo11s-seg.json new file mode 100644 index 0000000..62d8980 --- /dev/null +++ b/projects/llm_framework/main_yolo/yolo11s-seg.json @@ -0,0 +1,106 @@ +{ + "mode":"yolo11s", + "type":"cv", + "capabilities":[ + "segment" + ], + "input_type":[ + "cv.jpeg.base64" + ], + "output_type":[ + "cv.yolobox" + ], + "mode_param":{ + "yolo_model":"yolo11s-seg.axmodel", + "model_type":"segment", + "img_h":640, + "img_w":640, + "cls_num":80, + "pron_threshold":0.45, + "nms_threshold":0.45, + "cls_name":[ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush" + ] + }, + "mode_param_bak":{ + } +} \ No newline at end of file diff --git a/projects/llm_framework/main_yolo/yolo11s.json b/projects/llm_framework/main_yolo/yolo11s.json index f29b077..a58db69 100644 --- a/projects/llm_framework/main_yolo/yolo11s.json +++ b/projects/llm_framework/main_yolo/yolo11s.json @@ -12,6 +12,12 @@ ], "mode_param":{ "yolo_model":"yolo11s.axmodel", + "model_type":"detect", + "img_h":640, + "img_w":640, + "cls_num":80, + "pron_threshold":0.45, + "nms_threshold":0.45, "cls_name":[ "person", "bicycle", @@ -96,10 +102,5 @@ ] }, "mode_param_bak":{ - "model_config.img_h":640, - "model_config.img_w":640, - "model_config.cls_num":80, - "model_config.pron_threshold":0.45, - "model_config.nms_threshold":0.45 } } \ No newline at end of file