Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Enhancement] Support TRT and dynamic shape in PaddleInference CPP #1691

Merged
merged 6 commits into from
Jan 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions deploy/cpp/run_seg_cpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ make -j
cd ..

./build/test_seg \
--model_dir=./bisenetv2_demo_model \
--model_dir=./stdc1seg_infer_model \
--img_path=./cityscapes_demo.png \
--use_cpu=true \
--devices=CPU \
--use_mkldnn=true
4 changes: 2 additions & 2 deletions deploy/cpp/run_seg_gpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ make -j
cd ..

./build/test_seg \
--model_dir=./bisenetv2_demo_model \
--model_dir=./stdc1seg_infer_model \
--img_path=./cityscapes_demo.png \
--use_cpu=false
--devices=GPU
40 changes: 40 additions & 0 deletions deploy/cpp/run_seg_gpu_trt.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash
set +x
set -e

# set TENSORRT_ROOT
TENSORRT_ROOT='/work/download/TensorRT-7.1.3.4/'

WITH_MKL=ON
WITH_GPU=ON
USE_TENSORRT=ON
DEMO_NAME=test_seg

work_path=$(dirname $(readlink -f $0))
LIB_DIR="${work_path}/paddle_inference"

# compile
mkdir -p build
cd build
rm -rf *

cmake .. \
-DDEMO_NAME=${DEMO_NAME} \
-DWITH_MKL=${WITH_MKL} \
-DWITH_GPU=${WITH_GPU} \
-DUSE_TENSORRT=${USE_TENSORRT} \
-DWITH_STATIC_LIB=OFF \
-DPADDLE_LIB=${LIB_DIR} \
-DTENSORRT_ROOT=${TENSORRT_ROOT}

make -j

# run
cd ..

./build/test_seg \
--model_dir=./stdc1seg_infer_model \
--img_path=./cityscapes_demo.png \
--devices=GPU \
--use_trt=True \
--trt_precision=fp32
44 changes: 44 additions & 0 deletions deploy/cpp/run_seg_gpu_trt_dynamic_shape.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash
set +x
set -e

WITH_MKL=ON
WITH_GPU=ON
USE_TENSORRT=ON
DEMO_NAME=test_seg

work_path=$(dirname $(readlink -f $0))
LIB_DIR="${work_path}/paddle_inference"

# set TENSORRT_ROOT and dynamic_shape_path
TENSORRT_ROOT='/work/download/TensorRT-7.1.3.4/'
DYNAMIC_SHAPE_PATH='./dynamic_shape.pbtxt'
TRT_PRECISION=fp32

# compile
mkdir -p build
cd build
rm -rf *

cmake .. \
-DDEMO_NAME=${DEMO_NAME} \
-DWITH_MKL=${WITH_MKL} \
-DWITH_GPU=${WITH_GPU} \
-DUSE_TENSORRT=${USE_TENSORRT} \
-DWITH_STATIC_LIB=OFF \
-DPADDLE_LIB=${LIB_DIR} \
-DTENSORRT_ROOT=${TENSORRT_ROOT}

make -j

# run
cd ..

./build/test_seg \
--model_dir=./stdc1seg_infer_model \
--img_path=./cityscapes_demo.png \
--devices=GPU \
--use_trt=True \
--trt_precision=${TRT_PRECISION} \
--use_trt_dynamic_shape=True \
--dynamic_shape_path=${DYNAMIC_SHAPE_PATH}
54 changes: 46 additions & 8 deletions deploy/cpp/src/test_seg.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@
DEFINE_string(model_dir, "", "Directory of the inference model. "
"It constains deploy.yaml and infer models");
DEFINE_string(img_path, "", "Path of the test image.");
DEFINE_bool(use_cpu, false, "Wether use CPU. Default: use GPU.");
DEFINE_string(devices, "GPU", "Use GPU or CPU devices. Default: GPU");
DEFINE_bool(use_trt, false, "Wether enable TensorRT when use GPU. Defualt: false.");
DEFINE_string(trt_precision, "fp32", "The precision of TensorRT, support fp32, fp16 and int8. Default: fp32");
DEFINE_bool(use_trt_dynamic_shape, false, "Wether enable dynamic shape when use GPU and TensorRT. Defualt: false.");
DEFINE_string(dynamic_shape_path, "", "If set dynamic_shape_path, it read the dynamic shape for TRT.");
DEFINE_bool(use_mkldnn, false, "Wether enable MKLDNN when use CPU. Defualt: false.");
DEFINE_string(save_dir, "", "Directory of the output image.");

Expand Down Expand Up @@ -60,20 +63,55 @@ std::shared_ptr<paddle_infer::Predictor> create_predictor(
model_dir + "/" + yaml_config.params_file);
infer_config.EnableMemoryOptim();

if (FLAGS_use_cpu) {
if (FLAGS_devices == "CPU") {
LOG(INFO) << "Use CPU";
if (FLAGS_use_mkldnn) {
// TODO(jc): fix the bug
//infer_config.EnableMKLDNN();
LOG(INFO) << "Use MKLDNN";
infer_config.EnableMKLDNN();
infer_config.SetCpuMathLibraryNumThreads(5);
}
} else {
} else if(FLAGS_devices == "GPU") {
LOG(INFO) << "Use GPU";
infer_config.EnableUseGpu(100, 0);

// TRT config
if (FLAGS_use_trt) {
infer_config.EnableTensorRtEngine(1 << 20, 1, 3,
paddle_infer::PrecisionType::kFloat32, false, false);
LOG(INFO) << "Use TRT";
LOG(INFO) << "trt_precision:" << FLAGS_trt_precision;

// TRT precision
if (FLAGS_trt_precision == "fp32") {
infer_config.EnableTensorRtEngine(1 << 20, 1, 3,
paddle_infer::PrecisionType::kFloat32, false, false);
} else if (FLAGS_trt_precision == "fp16") {
infer_config.EnableTensorRtEngine(1 << 20, 1, 3,
paddle_infer::PrecisionType::kHalf, false, false);
} else if (FLAGS_trt_precision == "int8") {
infer_config.EnableTensorRtEngine(1 << 20, 1, 3,
paddle_infer::PrecisionType::kInt8, false, false);
} else {
LOG(FATAL) << "The trt_precision should be fp32, fp16 or int8.";
}

// TRT dynamic shape
if (FLAGS_use_trt_dynamic_shape) {
LOG(INFO) << "Enable TRT dynamic shape";
if (FLAGS_dynamic_shape_path.empty()) {
std::map<std::string, std::vector<int>> min_input_shape = {
{"image", {1, 3, 112, 112}}};
std::map<std::string, std::vector<int>> max_input_shape = {
{"image", {1, 3, 1024, 2048}}};
std::map<std::string, std::vector<int>> opt_input_shape = {
{"image", {1, 3, 512, 1024}}};
infer_config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
opt_input_shape);
} else {
infer_config.EnableTunedTensorRtDynamicShape(FLAGS_dynamic_shape_path, true);
}
}
}
} else {
LOG(FATAL) << "The devices should be GPU or CPU";
}

auto predictor = paddle_infer::CreatePredictor(infer_config);
Expand Down Expand Up @@ -153,5 +191,5 @@ int main(int argc, char *argv[]) {
cv::equalizeHist(out_gray_img, out_eq_img);
cv::imwrite("out_img.jpg", out_eq_img);

LOG(INFO) << "Finish";
LOG(INFO) << "Finish, the result is saved in out_img.jpg";
}
110 changes: 110 additions & 0 deletions deploy/python/collect_dynamic_shape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import codecs
import os
import sys

import yaml
import numpy as np
from paddle.inference import create_predictor, PrecisionType
from paddle.inference import Config as PredictConfig

LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(LOCAL_PATH, '..', '..'))

from paddleseg.utils import logger, get_image_list, progbar
from infer import DeployConfig
"""
Load images and run the model, it collects and saves dynamic shapes,
which are used in deployment with TRT.
"""


def parse_args():
parser = argparse.ArgumentParser(description='Test')
parser.add_argument(
"--config",
help="The deploy config generated by exporting model.",
type=str,
required=True)
parser.add_argument(
'--image_path',
help='The directory or path or file list of the images to be predicted.',
type=str,
required=True)

parser.add_argument(
'--dynamic_shape_path',
type=str,
default="./dynamic_shape.pbtxt",
help='The path to save dynamic shape.')

return parser.parse_args()


def is_support_collecting():
return hasattr(PredictConfig, "collect_shape_range_info") \
and hasattr(PredictConfig, "enable_tuned_tensorrt_dynamic_shape")


def collect_dynamic_shape(args):

if not is_support_collecting():
logger.error("The Paddle does not support collecting dynamic shape, " \
"please reinstall the PaddlePaddle (latest gpu version).")

# prepare config
cfg = DeployConfig(args.config)
pred_cfg = PredictConfig(cfg.model, cfg.params)
pred_cfg.enable_use_gpu(1000, 0)
pred_cfg.collect_shape_range_info(args.dynamic_shape_path)

# create predictor
predictor = create_predictor(pred_cfg)
input_names = predictor.get_input_names()
input_handle = predictor.get_input_handle(input_names[0])

# get images
img_path_list, _ = get_image_list(args.image_path)
if not isinstance(img_path_list, (list, tuple)):
img_path_list = [img_path_list]
logger.info(f"The num of images is {len(img_path_list)} \n")

# collect
progbar_val = progbar.Progbar(target=len(img_path_list))
for idx, img_path in enumerate(img_path_list):
data = np.array([cfg.transforms(img_path)[0]])
input_handle.reshape(data.shape)
input_handle.copy_from_cpu(data)

try:
predictor.run()
except:
logger.info(
"Fail to collect dynamic shape. Usually, the error is out of "
"GPU memory, for the model and image are too large.\n")
del predictor
if os.path.exists(args.dynamic_shape_path):
os.remove(args.dynamic_shape_path)

progbar_val.update(idx + 1)

logger.info(f"The dynamic shape is save in {args.dynamic_shape_path}")


if __name__ == '__main__':
args = parse_args()
collect_dynamic_shape(args)
Loading