From 449c989a66c705564b8f567eeb21b54f755827e4 Mon Sep 17 00:00:00 2001 From: cryoco Date: Wed, 21 Apr 2021 15:14:45 +0800 Subject: [PATCH] add demo --- c++/paddle_infer_demo/CMakeLists.txt | 87 +++++++++++++++++++++++++ c++/paddle_infer_demo/README.md | 4 ++ c++/paddle_infer_demo/run.sh | 30 +++++++++ c++/paddle_infer_demo/test_yolov3.py | 86 ++++++++++++++++++++++++ c++/paddle_infer_demo/yolov3_test.cc | 97 ++++++++++++++++++++++++++++ 5 files changed, 304 insertions(+) create mode 100755 c++/paddle_infer_demo/CMakeLists.txt create mode 100644 c++/paddle_infer_demo/README.md create mode 100644 c++/paddle_infer_demo/run.sh create mode 100644 c++/paddle_infer_demo/test_yolov3.py create mode 100644 c++/paddle_infer_demo/yolov3_test.cc diff --git a/c++/paddle_infer_demo/CMakeLists.txt b/c++/paddle_infer_demo/CMakeLists.txt new file mode 100755 index 0000000000000..75652e156fe1e --- /dev/null +++ b/c++/paddle_infer_demo/CMakeLists.txt @@ -0,0 +1,87 @@ +cmake_minimum_required(VERSION 3.16) +project(cpp_inference_demo CXX C) +if(COMMAND cmake_policy) + cmake_policy(SET CMP0003 NEW) +endif(COMMAND cmake_policy) +option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON) +option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF) +option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON) +option(USE_TENSORRT "Compile demo with TensorRT." OFF) + + +macro(safe_set_static_flag) + foreach(flag_var + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) + if(${flag_var} MATCHES "/MD") + string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + endif(${flag_var} MATCHES "/MD") + endforeach(flag_var) +endmacro() + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -g") +set(CMAKE_STATIC_LIBRARY_PREFIX "") +message("flags" ${CMAKE_CXX_FLAGS}) + +if(NOT DEFINED PADDLE_LIB) + message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib") +endif() +if(NOT DEFINED DEMO_NAME) + message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name") +endif() + + +include_directories("${PADDLE_LIB}") +include_directories("${PADDLE_LIB}/third_party/install/protobuf/include") +include_directories("${PADDLE_LIB}/third_party/install/glog/include") +include_directories("${PADDLE_LIB}/third_party/install/gflags/include") +include_directories("${PADDLE_LIB}/third_party/install/xxhash/include") +include_directories("${PADDLE_LIB}/third_party/install/zlib/include") +include_directories("${PADDLE_LIB}/third_party/boost") +include_directories("${PADDLE_LIB}/third_party/eigen3") + +link_directories("${PADDLE_LIB}/third_party/install/zlib/lib") + +link_directories("${PADDLE_LIB}/third_party/install/protobuf/lib") +link_directories("${PADDLE_LIB}/third_party/install/glog/lib") +link_directories("${PADDLE_LIB}/third_party/install/gflags/lib") +link_directories("${PADDLE_LIB}/third_party/install/xxhash/lib") +link_directories("${PADDLE_LIB}/paddle/lib") + +add_executable(${DEMO_NAME} ${DEMO_NAME}.cc) + +if(WITH_MKL) + include_directories("${PADDLE_LIB}/third_party/install/mklml/include") + set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} + ${PADDLE_LIB}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(MKLDNN_PATH "${PADDLE_LIB}/third_party/install/mkldnn") + if(EXISTS ${MKLDNN_PATH}) + include_directories("${MKLDNN_PATH}/include") + set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0) + endif() +else() + set(MATH_LIB ${PADDLE_LIB}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX}) +endif() + +# Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a +if(WITH_STATIC_LIB) + set(DEPS + ${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_STATIC_LIBRARY_SUFFIX}) +else() + set(DEPS + ${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_SHARED_LIBRARY_SUFFIX}) +endif() + +set(EXTERNAL_LIB "-lrt -ldl -lpthread") +set(DEPS ${DEPS} + ${MATH_LIB} ${MKLDNN_LIB} + glog gflags protobuf xxhash + ${EXTERNAL_LIB}) + +if(WITH_GPU) + set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX} ) + set(DEPS ${DEPS} /usr/local/cuda-10.2/targets/x86_64-linux/lib/libcublas${CMAKE_SHARED_LIBRARY_SUFFIX} ) + set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX} ) +endif() + +target_link_libraries(${DEMO_NAME} ${DEPS}) diff --git a/c++/paddle_infer_demo/README.md b/c++/paddle_infer_demo/README.md new file mode 100644 index 0000000000000..76d15bce3e93a --- /dev/null +++ b/c++/paddle_infer_demo/README.md @@ -0,0 +1,4 @@ +## 模型下载链接: +[yolov3-r34-float](https://paddle-inference-dist.bj.bcebos.com/Paddle-Inference-Demo/yolov3_r34_float.tgz) +[yolov3-r34-int8](https://paddle-inference-dist.bj.bcebos.com/Paddle-Inference-Demo/yolov3_r34_int8.tgz) + diff --git a/c++/paddle_infer_demo/run.sh b/c++/paddle_infer_demo/run.sh new file mode 100644 index 0000000000000..9dc00174b9927 --- /dev/null +++ b/c++/paddle_infer_demo/run.sh @@ -0,0 +1,30 @@ +mkdir -p build +cd build +rm -rf * + +#DEMO_NAME=hrnet_test +DEMO_NAME=yolov3_test + +WITH_MKL=ON +WITH_GPU=ON +USE_TENSORRT=ON + +LIB_DIR=/pr/Paddle/build/paddle_inference_install_dir +MODEL_DIR=/paddle_infer_demo/yolov3_r34_float/ + +#echo $MODEL_DIR +CUDNN_LIB=/pr/nvidia/cudnn-8.1/lib64 +CUDA_LIB=/usr/local/cuda-10.2/lib64 + +cmake .. -DPADDLE_LIB=${LIB_DIR} \ + -DWITH_MKL=${WITH_MKL} \ + -DDEMO_NAME=${DEMO_NAME} \ + -DWITH_GPU=${WITH_GPU} \ + -DWITH_STATIC_LIB=OFF \ + -DUSE_TENSORRT=${USE_TENSORRT} \ + -DCUDNN_LIB=${CUDNN_LIB} \ + -DCUDA_LIB=${CUDA_LIB} + +make -j + +./${DEMO_NAME} --model_file=${MODEL_DIR}/model.pdmodel --params_file=${MODEL_DIR}/model.pdiparams --batch_size=1 diff --git a/c++/paddle_infer_demo/test_yolov3.py b/c++/paddle_infer_demo/test_yolov3.py new file mode 100644 index 0000000000000..8eade99aa0db2 --- /dev/null +++ b/c++/paddle_infer_demo/test_yolov3.py @@ -0,0 +1,86 @@ +import numpy as np +import argparse +import time +import os + + +from paddle.inference import Config +from paddle.inference import create_predictor + +def init_predictor(args): + config = Config() + if args.model_dir == "": + config.set_model(args.model_file, args.params_file) + else: + config.set_model(args.model_dir) + #config.disable_glog_info() + config.enable_use_gpu(1000, 3) + predictor = create_predictor(config) + return predictor + +def run(args, predictor, data): + # copy data to input tensor + + input_names = predictor.get_input_names() + for i, name in enumerate(input_names): + input_tensor = predictor.get_input_handle(name) + input_tensor.reshape(data[i].shape) + data[i] = data[i].copy() + input_tensor.copy_from_cpu(data[i]) + + + # warm up + for i in range(10): + predictor.run() + + # do the inference + repeat = 100 + start = time.clock() + for i in range(repeat): + for i, name in enumerate(input_names): + input_tensor = predictor.get_input_handle(name) + input_tensor.reshape(data[i].shape) + input_tensor.copy_from_cpu(data[i]) + predictor.run() + output_names = predictor.get_output_names() + for i, name in enumerate(output_names): + output_tensor = predictor.get_output_handle(name) + output_data = output_tensor.copy_to_cpu() + end = time.clock() + + precision = "int8" if args.use_int8 else "float32" + latency = (end - start) * 1000 / repeat + print("latency:", latency, "ms") + + results = [] + # get out data from output tensor + output_names = predictor.get_output_names() + for i, name in enumerate(output_names): + output_tensor = predictor.get_output_handle(name) + output_data = output_tensor.copy_to_cpu() + results.append(output_data) + + return results + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_file", type=str, default="", help="Model filename, Specify this when your model is a combined model.") + parser.add_argument("--params_file", type=str, default="", help="Parameter filename, Specify this when your model is a combined model.") + parser.add_argument("--model_dir", type=str, default="", help="Model dir, If you load a non-combined model, specify the directory of the model.") + parser.add_argument("--int8", dest='use_int8', action='store_true', help="Use int8.") + parser.add_argument("--float32", dest='use_int8', action='store_false', help="Use float32.") + parser.set_defaults(use_int8=False) + parser.add_argument("--min", type=int, default=3, help="min_subgraph_size for tensorrt") + return parser.parse_args() + +def fake_input(shape): + fake_img = np.ones(shape).astype(np.float32) + return fake_img + +if __name__ == '__main__': + args = parse_args() + pred = init_predictor(args) + input_shape = (1, 3, 608, 608) + fake_img = fake_input(input_shape) + im_size = np.array([[608, 608]]).astype('int32') + result = run(args, pred, [fake_img, im_size]) diff --git a/c++/paddle_infer_demo/yolov3_test.cc b/c++/paddle_infer_demo/yolov3_test.cc new file mode 100644 index 0000000000000..c36fb833e3ddb --- /dev/null +++ b/c++/paddle_infer_demo/yolov3_test.cc @@ -0,0 +1,97 @@ +#include "paddle/include/paddle_inference_api.h" + +#include +#include +#include +#include + +#include +#include + +using paddle_infer::Config; +using paddle_infer::Predictor; +using paddle_infer::CreatePredictor; +using paddle_infer::PrecisionType; + +DEFINE_string(model_file, "", "Directory of the inference model."); +DEFINE_string(params_file, "", "Directory of the inference model."); +DEFINE_string(model_dir, "", "Directory of the inference model."); +DEFINE_int32(batch_size, 1, "Directory of the inference model."); + +using Time = decltype(std::chrono::high_resolution_clock::now()); +Time time() { return std::chrono::high_resolution_clock::now(); }; +double time_diff(Time t1, Time t2) { + typedef std::chrono::microseconds ms; + auto diff = t2 - t1; + ms counter = std::chrono::duration_cast(diff); + return counter.count(); +} + +std::shared_ptr InitPredictor() { + Config config; + if (FLAGS_model_dir != "") { + config.SetModel(FLAGS_model_dir); + } else { + config.SetModel(FLAGS_model_file, FLAGS_params_file); + } + config.EnableUseGpu(1000, 3); + //config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, 10, PrecisionType::kFloat32, false, false); + return CreatePredictor(config); +} + +void run(Predictor *predictor, const std::vector &input, + const std::vector &input_shape, const std::vector &input_im, + const std::vector &input_im_shape, std::vector *out_data) { + auto input_names = predictor->GetInputNames(); + auto im_shape_handle = predictor->GetInputHandle(input_names[0]); + im_shape_handle->Reshape(input_shape); + im_shape_handle->CopyFromCpu(input.data()); + + auto image_handle = predictor->GetInputHandle(input_names[1]); + image_handle->Reshape(input_im_shape); + image_handle->CopyFromCpu(input_im.data()); + + int warmup = 10; + int repeat = 100; + + for (int i = 0; i < warmup; i++) + predictor->Run(); + + auto time1 = time(); + for (int i = 0; i < repeat; i++) { + predictor->Run(); + } + auto time2 = time(); + double latency = time_diff(time1, time2) / repeat / 1000; + std::cout << "batch: " << FLAGS_batch_size << " predict cost: " << latency << "ms" << std::endl; + + auto output_names = predictor->GetOutputNames(); + auto output_t = predictor->GetOutputHandle(output_names[0]); + std::vector output_shape = output_t->shape(); + int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, + std::multiplies()); + + out_data->resize(out_num); + output_t->CopyToCpu(out_data->data()); +} + +int main(int argc, char *argv[]) { + google::ParseCommandLineFlags(&argc, &argv, true); + auto predictor = InitPredictor(); + + const int height = 608; + const int width = 608; + const int channels = 3; + std::vector input_shape = {FLAGS_batch_size, channels, height, width}; + std::vector input_data(FLAGS_batch_size * channels * height * width); + for (size_t i = 0; i < input_data.size(); ++i) { + input_data[i] = i % 255 * 0.13f; + } + std::vector input_im_shape = {FLAGS_batch_size, 2}; + std::vector input_im_data(FLAGS_batch_size * 2, 608); + + std::vector out_data; + run(predictor.get(), input_data, input_shape, input_im_data, input_im_shape, + &out_data); + return 0; +}