Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#123 from cryoco/add-infer-month-demo
Browse files Browse the repository at this point in the history
add demo
  • Loading branch information
cryoco authored Apr 21, 2021
2 parents 66c81be + 449c989 commit 1054925
Show file tree
Hide file tree
Showing 5 changed files with 304 additions and 0 deletions.
87 changes: 87 additions & 0 deletions c++/paddle_infer_demo/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
cmake_minimum_required(VERSION 3.16)
project(cpp_inference_demo CXX C)
if(COMMAND cmake_policy)
cmake_policy(SET CMP0003 NEW)
endif(COMMAND cmake_policy)
option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON)
option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF)
option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON)
option(USE_TENSORRT "Compile demo with TensorRT." OFF)


macro(safe_set_static_flag)
foreach(flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
if(${flag_var} MATCHES "/MD")
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
endif(${flag_var} MATCHES "/MD")
endforeach(flag_var)
endmacro()

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -g")
set(CMAKE_STATIC_LIBRARY_PREFIX "")
message("flags" ${CMAKE_CXX_FLAGS})

if(NOT DEFINED PADDLE_LIB)
message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib")
endif()
if(NOT DEFINED DEMO_NAME)
message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name")
endif()


include_directories("${PADDLE_LIB}")
include_directories("${PADDLE_LIB}/third_party/install/protobuf/include")
include_directories("${PADDLE_LIB}/third_party/install/glog/include")
include_directories("${PADDLE_LIB}/third_party/install/gflags/include")
include_directories("${PADDLE_LIB}/third_party/install/xxhash/include")
include_directories("${PADDLE_LIB}/third_party/install/zlib/include")
include_directories("${PADDLE_LIB}/third_party/boost")
include_directories("${PADDLE_LIB}/third_party/eigen3")

link_directories("${PADDLE_LIB}/third_party/install/zlib/lib")

link_directories("${PADDLE_LIB}/third_party/install/protobuf/lib")
link_directories("${PADDLE_LIB}/third_party/install/glog/lib")
link_directories("${PADDLE_LIB}/third_party/install/gflags/lib")
link_directories("${PADDLE_LIB}/third_party/install/xxhash/lib")
link_directories("${PADDLE_LIB}/paddle/lib")

add_executable(${DEMO_NAME} ${DEMO_NAME}.cc)

if(WITH_MKL)
include_directories("${PADDLE_LIB}/third_party/install/mklml/include")
set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX}
${PADDLE_LIB}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX})
set(MKLDNN_PATH "${PADDLE_LIB}/third_party/install/mkldnn")
if(EXISTS ${MKLDNN_PATH})
include_directories("${MKLDNN_PATH}/include")
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0)
endif()
else()
set(MATH_LIB ${PADDLE_LIB}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX})
endif()

# Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a
if(WITH_STATIC_LIB)
set(DEPS
${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_STATIC_LIBRARY_SUFFIX})
else()
set(DEPS
${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()

set(EXTERNAL_LIB "-lrt -ldl -lpthread")
set(DEPS ${DEPS}
${MATH_LIB} ${MKLDNN_LIB}
glog gflags protobuf xxhash
${EXTERNAL_LIB})

if(WITH_GPU)
set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX} )
set(DEPS ${DEPS} /usr/local/cuda-10.2/targets/x86_64-linux/lib/libcublas${CMAKE_SHARED_LIBRARY_SUFFIX} )
set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX} )
endif()

target_link_libraries(${DEMO_NAME} ${DEPS})
4 changes: 4 additions & 0 deletions c++/paddle_infer_demo/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
## 模型下载链接:
[yolov3-r34-float](https://paddle-inference-dist.bj.bcebos.com/Paddle-Inference-Demo/yolov3_r34_float.tgz)
[yolov3-r34-int8](https://paddle-inference-dist.bj.bcebos.com/Paddle-Inference-Demo/yolov3_r34_int8.tgz)

30 changes: 30 additions & 0 deletions c++/paddle_infer_demo/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
mkdir -p build
cd build
rm -rf *

#DEMO_NAME=hrnet_test
DEMO_NAME=yolov3_test

WITH_MKL=ON
WITH_GPU=ON
USE_TENSORRT=ON

LIB_DIR=/pr/Paddle/build/paddle_inference_install_dir
MODEL_DIR=/paddle_infer_demo/yolov3_r34_float/

#echo $MODEL_DIR
CUDNN_LIB=/pr/nvidia/cudnn-8.1/lib64
CUDA_LIB=/usr/local/cuda-10.2/lib64

cmake .. -DPADDLE_LIB=${LIB_DIR} \
-DWITH_MKL=${WITH_MKL} \
-DDEMO_NAME=${DEMO_NAME} \
-DWITH_GPU=${WITH_GPU} \
-DWITH_STATIC_LIB=OFF \
-DUSE_TENSORRT=${USE_TENSORRT} \
-DCUDNN_LIB=${CUDNN_LIB} \
-DCUDA_LIB=${CUDA_LIB}

make -j

./${DEMO_NAME} --model_file=${MODEL_DIR}/model.pdmodel --params_file=${MODEL_DIR}/model.pdiparams --batch_size=1
86 changes: 86 additions & 0 deletions c++/paddle_infer_demo/test_yolov3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import numpy as np
import argparse
import time
import os


from paddle.inference import Config
from paddle.inference import create_predictor

def init_predictor(args):
config = Config()
if args.model_dir == "":
config.set_model(args.model_file, args.params_file)
else:
config.set_model(args.model_dir)
#config.disable_glog_info()
config.enable_use_gpu(1000, 3)
predictor = create_predictor(config)
return predictor

def run(args, predictor, data):
# copy data to input tensor

input_names = predictor.get_input_names()
for i, name in enumerate(input_names):
input_tensor = predictor.get_input_handle(name)
input_tensor.reshape(data[i].shape)
data[i] = data[i].copy()
input_tensor.copy_from_cpu(data[i])


# warm up
for i in range(10):
predictor.run()

# do the inference
repeat = 100
start = time.clock()
for i in range(repeat):
for i, name in enumerate(input_names):
input_tensor = predictor.get_input_handle(name)
input_tensor.reshape(data[i].shape)
input_tensor.copy_from_cpu(data[i])
predictor.run()
output_names = predictor.get_output_names()
for i, name in enumerate(output_names):
output_tensor = predictor.get_output_handle(name)
output_data = output_tensor.copy_to_cpu()
end = time.clock()

precision = "int8" if args.use_int8 else "float32"
latency = (end - start) * 1000 / repeat
print("latency:", latency, "ms")

results = []
# get out data from output tensor
output_names = predictor.get_output_names()
for i, name in enumerate(output_names):
output_tensor = predictor.get_output_handle(name)
output_data = output_tensor.copy_to_cpu()
results.append(output_data)

return results

def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--model_file", type=str, default="", help="Model filename, Specify this when your model is a combined model.")
parser.add_argument("--params_file", type=str, default="", help="Parameter filename, Specify this when your model is a combined model.")
parser.add_argument("--model_dir", type=str, default="", help="Model dir, If you load a non-combined model, specify the directory of the model.")
parser.add_argument("--int8", dest='use_int8', action='store_true', help="Use int8.")
parser.add_argument("--float32", dest='use_int8', action='store_false', help="Use float32.")
parser.set_defaults(use_int8=False)
parser.add_argument("--min", type=int, default=3, help="min_subgraph_size for tensorrt")
return parser.parse_args()

def fake_input(shape):
fake_img = np.ones(shape).astype(np.float32)
return fake_img

if __name__ == '__main__':
args = parse_args()
pred = init_predictor(args)
input_shape = (1, 3, 608, 608)
fake_img = fake_input(input_shape)
im_size = np.array([[608, 608]]).astype('int32')
result = run(args, pred, [fake_img, im_size])
97 changes: 97 additions & 0 deletions c++/paddle_infer_demo/yolov3_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#include "paddle/include/paddle_inference_api.h"

#include <chrono>
#include <iostream>
#include <memory>
#include <numeric>

#include <gflags/gflags.h>
#include <glog/logging.h>

using paddle_infer::Config;
using paddle_infer::Predictor;
using paddle_infer::CreatePredictor;
using paddle_infer::PrecisionType;

DEFINE_string(model_file, "", "Directory of the inference model.");
DEFINE_string(params_file, "", "Directory of the inference model.");
DEFINE_string(model_dir, "", "Directory of the inference model.");
DEFINE_int32(batch_size, 1, "Directory of the inference model.");

using Time = decltype(std::chrono::high_resolution_clock::now());
Time time() { return std::chrono::high_resolution_clock::now(); };
double time_diff(Time t1, Time t2) {
typedef std::chrono::microseconds ms;
auto diff = t2 - t1;
ms counter = std::chrono::duration_cast<ms>(diff);
return counter.count();
}

std::shared_ptr<Predictor> InitPredictor() {
Config config;
if (FLAGS_model_dir != "") {
config.SetModel(FLAGS_model_dir);
} else {
config.SetModel(FLAGS_model_file, FLAGS_params_file);
}
config.EnableUseGpu(1000, 3);
//config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, 10, PrecisionType::kFloat32, false, false);
return CreatePredictor(config);
}

void run(Predictor *predictor, const std::vector<float> &input,
const std::vector<int> &input_shape, const std::vector<int32_t> &input_im,
const std::vector<int> &input_im_shape, std::vector<float> *out_data) {
auto input_names = predictor->GetInputNames();
auto im_shape_handle = predictor->GetInputHandle(input_names[0]);
im_shape_handle->Reshape(input_shape);
im_shape_handle->CopyFromCpu(input.data());

auto image_handle = predictor->GetInputHandle(input_names[1]);
image_handle->Reshape(input_im_shape);
image_handle->CopyFromCpu(input_im.data());

int warmup = 10;
int repeat = 100;

for (int i = 0; i < warmup; i++)
predictor->Run();

auto time1 = time();
for (int i = 0; i < repeat; i++) {
predictor->Run();
}
auto time2 = time();
double latency = time_diff(time1, time2) / repeat / 1000;
std::cout << "batch: " << FLAGS_batch_size << " predict cost: " << latency << "ms" << std::endl;

auto output_names = predictor->GetOutputNames();
auto output_t = predictor->GetOutputHandle(output_names[0]);
std::vector<int> output_shape = output_t->shape();
int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
std::multiplies<int>());

out_data->resize(out_num);
output_t->CopyToCpu(out_data->data());
}

int main(int argc, char *argv[]) {
google::ParseCommandLineFlags(&argc, &argv, true);
auto predictor = InitPredictor();

const int height = 608;
const int width = 608;
const int channels = 3;
std::vector<int> input_shape = {FLAGS_batch_size, channels, height, width};
std::vector<float> input_data(FLAGS_batch_size * channels * height * width);
for (size_t i = 0; i < input_data.size(); ++i) {
input_data[i] = i % 255 * 0.13f;
}
std::vector<int> input_im_shape = {FLAGS_batch_size, 2};
std::vector<int32_t> input_im_data(FLAGS_batch_size * 2, 608);

std::vector<float> out_data;
run(predictor.get(), input_data, input_shape, input_im_data, input_im_shape,
&out_data);
return 0;
}

0 comments on commit 1054925

Please sign in to comment.