Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dynamic shape clone #38520

Merged
merged 12 commits into from
Dec 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,11 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog)
cc_library(variable_helper SRCS variable_helper.cc DEPS lod_tensor)

if (TENSORRT_FOUND)
cc_library(naive_executor SRCS naive_executor.cc DEPS op_registry denormal device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper tensorrt_engine_op)
else()
cc_library(naive_executor SRCS naive_executor.cc DEPS op_registry denormal device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper)
endif(TENSORRT_FOUND)

cc_library(executor_gc_helper SRCS executor_gc_helper.cc DEPS scope proto_desc operator garbage_collector op_registry while_op_helper recurrent_op_helper conditional_block_op_helper)
if(WITH_DISTRIBUTE)
Expand Down
36 changes: 36 additions & 0 deletions paddle/fluid/framework/naive_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
#if PADDLE_WITH_TENSORRT
#include "paddle/fluid/operators/tensorrt/tensorrt_engine_op.h"
#endif

namespace paddle {
namespace framework {
Expand Down Expand Up @@ -132,5 +135,38 @@ NaiveExecutor::~NaiveExecutor() {
#endif
}

void NaiveExecutor::ResetTrtOps(int num) {
#if PADDLE_WITH_TENSORRT
for (auto &op : ops_) {
if (op->Type() == "tensorrt_engine") {
operators::TensorRTEngineOp *trtop =
dynamic_cast<operators::TensorRTEngineOp *>(op.get());
if (!trtop) return;
std::string engine_key = trtop->Attr<std::string>("engine_key");
int engine_predictor_id = trtop->Attr<int>("predictor_id");
std::string engine_name =
engine_key + std::to_string(engine_predictor_id);
operators::TensorRTEngine *trt_engine =
paddle::inference::Singleton<
inference::tensorrt::TRTEngineManager>::Global()
.Get(engine_name);
if (trt_engine->with_dynamic_shape()) {
LOG(INFO) << "rebuild trt engine, this may cost a lot of time!";
trt_engine->ResetContext();
trt_engine->ClearTensorMap();
trt_engine->SetProfileNum(num);
auto *anc = scope_->parent();
while (anc && anc->parent()) {
anc = anc->parent();
}
if (anc == nullptr) {
anc = scope_;
}
trtop->PrepareTRTEngine(*anc, trt_engine);
}
}
}
#endif
}
} // namespace framework
} // namespace paddle
2 changes: 2 additions & 0 deletions paddle/fluid/framework/naive_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ class NaiveExecutor {

void CleanFeedFetchOps();

void ResetTrtOps(int num);

protected:
void CreateOps(const ProgramDesc& desc, int block_id,
bool with_feed_fetch_ops);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,17 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
// Because there exists the case that new parameter variables are not added to
// the program in the analysis pass.
bool reserve_cpu_weights = false;
if (argument->tensorrt_allow_build_at_runtime_valid() &&
argument->tensorrt_allow_build_at_runtime()) {
bool with_dynamic_shape = false;
if (argument->Has("max_input_shape") && argument->Has("min_input_shape") &&
argument->Has("optim_input_shape")) {
with_dynamic_shape = (argument->max_input_shape().size() > 0 &&
argument->min_input_shape().size() > 0 &&
argument->optim_input_shape().size() > 0);
}
with_dynamic_shape =
with_dynamic_shape || (argument->Has("tensorrt_tuned_dynamic_shape") &&
argument->tensorrt_tuned_dynamic_shape());
if (with_dynamic_shape) {
reserve_cpu_weights = true;
}
for (auto &var_name : all_vars) {
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1332,6 +1332,7 @@ std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone() {
std::lock_guard<std::mutex> lk(clone_mutex_);
auto *x = new AnalysisPredictor(config_);
x->Init(scope_, inference_program_);
x->executor_->ResetTrtOps(++x->clone_num_);
b3602sss marked this conversation as resolved.
Show resolved Hide resolved
return std::unique_ptr<PaddlePredictor>(x);
}

Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/inference/api/analysis_predictor.h
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ class AnalysisPredictor : public PaddlePredictor {
bool status_is_cloned_{false};

std::map<std::string, std::vector<std::vector<int32_t>>> shape_info_;
int clone_num_{1};
};

} // namespace paddle
68 changes: 40 additions & 28 deletions paddle/fluid/inference/tensorrt/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@ void TensorRTEngine::InitNetwork() {
}

infer_builder_config_.reset(infer_builder_->createBuilderConfig());
optim_profile_ = infer_builder_->createOptimizationProfile();
// optim_profile_ = infer_builder_->createOptimizationProfile();
optim_profiles_.resize(max_profile_num_);
for (int i = 0; i < max_profile_num_; i++)
optim_profiles_[i] = infer_builder_->createOptimizationProfile();
}

void TensorRTEngine::Execute(int batch_size, std::vector<void *> *buffers,
Expand Down Expand Up @@ -199,35 +202,38 @@ void TensorRTEngine::FreezeNetwork() {
if (with_dynamic_shape_) {
#if IS_TRT_VERSION_GE(6000)
LOG(INFO) << "Run Paddle-TRT Dynamic Shape mode.";
for (auto &input : min_input_shape_) {
for (int i = 0; i < max_profile_num_; i++) {
for (auto &input : min_input_shape_) {
#if IS_TRT_VERSION_LT(7000)
// trt6 will check all_of input > 0
if (!(std::all_of(input.second.begin(), input.second.end(),
[](int x) { return x > 0; }) &&
std::all_of(max_input_shape_[input.first].begin(),
max_input_shape_[input.first].end(),
[](int x) { return x > 0; }) &&
std::all_of(optim_input_shape_[input.first].begin(),
optim_input_shape_[input.first].end(),
[](int x) { return x > 0; }))) {
continue;
}
// trt6 will check all_of input > 0
if (!(std::all_of(input.second.begin(), input.second.end(),
[](int x) { return x > 0; }) &&
std::all_of(max_input_shape_[input.first].begin(),
max_input_shape_[input.first].end(),
[](int x) { return x > 0; }) &&
std::all_of(optim_input_shape_[input.first].begin(),
optim_input_shape_[input.first].end(),
[](int x) { return x > 0; }))) {
continue;
}
#endif
VLOG(4) << "TRT dynamic_shape set " << input.first
<< " min: " << Vec2Str(input.second)
<< ", max: " << Vec2Str(max_input_shape_[input.first])
<< ", opt: " << Vec2Str(optim_input_shape_[input.first]);
optim_profile_->setDimensions(
input.first.c_str(), nvinfer1::OptProfileSelector::kMIN,
Vec2TRT_Dims(input.second, input.first, true));
optim_profile_->setDimensions(
input.first.c_str(), nvinfer1::OptProfileSelector::kMAX,
Vec2TRT_Dims(max_input_shape_[input.first], input.first, true));
optim_profile_->setDimensions(
input.first.c_str(), nvinfer1::OptProfileSelector::kOPT,
Vec2TRT_Dims(optim_input_shape_[input.first], input.first, true));
VLOG(4) << "TRT dynamic_shape set " << input.first
<< " min: " << Vec2Str(input.second)
<< ", max: " << Vec2Str(max_input_shape_[input.first])
<< ", opt: " << Vec2Str(optim_input_shape_[input.first]);

optim_profiles_[i]->setDimensions(
input.first.c_str(), nvinfer1::OptProfileSelector::kMIN,
Vec2TRT_Dims(input.second, input.first, true));
optim_profiles_[i]->setDimensions(
input.first.c_str(), nvinfer1::OptProfileSelector::kMAX,
Vec2TRT_Dims(max_input_shape_[input.first], input.first, true));
optim_profiles_[i]->setDimensions(
input.first.c_str(), nvinfer1::OptProfileSelector::kOPT,
Vec2TRT_Dims(optim_input_shape_[input.first], input.first, true));
}
infer_builder_config_->addOptimizationProfile(optim_profiles_[i]);
}
infer_builder_config_->addOptimizationProfile(optim_profile_);
if (WithFp16() && disable_trt_plugin_fp16()) {
LOG(INFO) << "NOTE: In order to achieve higher accuracy, you have "
"disabled the fp16 mode of TRT Plugin,\n"
Expand All @@ -237,7 +243,6 @@ void TensorRTEngine::FreezeNetwork() {
}
#endif
}

#if IS_TRT_VERSION_GE(8200)
infer_builder_config_->setProfilingVerbosity(
nvinfer1::ProfilingVerbosity::kDETAILED);
Expand All @@ -260,6 +265,13 @@ void TensorRTEngine::FreezeNetwork() {
"Build TensorRT cuda engine failed! Please recheck "
"you configurations related to paddle-TensorRT."));

binding_num_ = infer_engine_->getNbBindings();
// reset status for dynamic shape clone
if (max_profile_num_ > 1) {
infer_context_.clear();
cur_profile_num_ = 0;
}

GetEngineInfo();
}

Expand Down
36 changes: 35 additions & 1 deletion paddle/fluid/inference/tensorrt/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,10 +253,38 @@ class TensorRTEngine {
infer_engine_,
platform::errors::InvalidArgument(
"You should build engine first and then set the context."));
// We may see trt warning: Profile 0 has been chosen by another
// IExecutionContext...
// It's ok. We will set it later.
infer_context_[tid].reset(infer_engine_->createExecutionContext());
if (with_dynamic_shape_) {
// need new profile if it's not the first
if (cur_profile_num_ > 0) {
infer_context_[tid]->setOptimizationProfile(cur_profile_num_);
}
profile_index_[tid] = cur_profile_num_;
++cur_profile_num_;
}
}
return infer_context_[tid].get();
}

int GetProfileIndex() {
if (max_profile_num_ > 1) {
std::unique_lock<std::mutex> lock(mutex_);
const std::thread::id tid = std::this_thread::get_id();
return profile_index_[tid];
} else {
return 0;
}
}

int GetBindingsOffset() {
return (binding_num_ / max_profile_num_) * GetProfileIndex();
}

int GetNbBindings() { return binding_num_; }

void ResetContext() {
std::unique_lock<std::mutex> lock(mutex_);
const std::thread::id tid = std::this_thread::get_id();
Expand Down Expand Up @@ -322,6 +350,7 @@ class TensorRTEngine {
"generating serialization file and doing inference are "
"consistent."));

binding_num_ = infer_engine_->getNbBindings();
GetEngineInfo();
}

Expand Down Expand Up @@ -540,6 +569,7 @@ class TensorRTEngine {
}
}

void SetProfileNum(int num) { max_profile_num_ = num; }
void GetEngineInfo() {
#if IS_TRT_VERSION_GE(8200)
std::unique_ptr<nvinfer1::IEngineInspector> infer_inspector(
Expand Down Expand Up @@ -571,6 +601,9 @@ class TensorRTEngine {
int batch_size_{-1};

int device_id_;
int max_profile_num_{1};
int cur_profile_num_{0};
std::unordered_map<std::thread::id, int> profile_index_;
ShapeMapType min_input_shape_;
ShapeMapType max_input_shape_;
ShapeMapType optim_input_shape_;
Expand Down Expand Up @@ -614,8 +647,9 @@ class TensorRTEngine {
// For dynamic shape
bool with_dynamic_shape_{false};
#if IS_TRT_VERSION_GE(6000)
int binding_num_;
infer_ptr<nvinfer1::IBuilderConfig> infer_builder_config_;
nvinfer1::IOptimizationProfile* optim_profile_;
std::vector<nvinfer1::IOptimizationProfile*> optim_profiles_;
std::vector<std::unique_ptr<plugin::DynamicPluginTensorRT>> owned_pluginv2_;
#endif
std::mutex mutex_;
Expand Down
82 changes: 82 additions & 0 deletions paddle/fluid/inference/tests/api/trt_dynamic_shape_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,87 @@ void TestTunedDynamic() {
check_func(test_predictor.get());
}

void TestDynamicClone(bool with_dynamic = true, bool delete_cache = true,
bool delete_conv_bn = false) {
std::string model_dir =
FLAGS_infer_model + "/conv_bn_swish_split_gelu/conv_bn_swish_split_gelu";

std::string opt_cache_dir = model_dir + "/my_cache";
if (delete_cache) {
delete_cache_files(opt_cache_dir);
}

AnalysisConfig config;
config.EnableUseGpu(100, 0);
std::string buffer_prog, buffer_param;
ReadBinaryFile(model_dir + "/model", &buffer_prog);
ReadBinaryFile(model_dir + "/params", &buffer_param);
config.SetModelBuffer(&buffer_prog[0], buffer_prog.size(), &buffer_param[0],
buffer_param.size());
config.SetOptimCacheDir(opt_cache_dir);

config.SwitchUseFeedFetchOps(false);
// Set the input's min, max, opt shape
config.EnableTensorRtEngine(
1 << 30, 1, 1, AnalysisConfig::Precision::kFloat32, false, false);
if (delete_conv_bn) {
config.pass_builder()->DeletePass("conv_bn_fuse_pass");
}
if (with_dynamic) {
std::map<std::string, std::vector<int>> min_input_shape = {
{"image", {1, 1, 3, 3}}};
std::map<std::string, std::vector<int>> max_input_shape = {
{"image", {1, 1, 10, 10}}};
std::map<std::string, std::vector<int>> opt_input_shape = {
{"image", {1, 1, 3, 3}}};

config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
opt_input_shape);
}
auto predictor = CreatePaddlePredictor(config);
auto input_names = predictor->GetInputNames();
int channels = 1;
int height = 3;
int width = 3;
int input_num = channels * height * width * 1;

float *input = new float[input_num];
memset(input, 0, input_num * sizeof(float));
auto input_t = predictor->GetInputTensor(input_names[0]);
input_t->Reshape({1, channels, height, width});
input_t->copy_from_cpu(input);

ASSERT_TRUE(predictor->ZeroCopyRun());

std::vector<float> out_data;
auto output_names = predictor->GetOutputNames();
auto output_t = predictor->GetOutputTensor(output_names[0]);
std::vector<int> output_shape = output_t->shape();
int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
std::multiplies<int>());
out_data.resize(out_num);
output_t->copy_to_cpu(out_data.data());

auto predictor2 = predictor->Clone();
auto input_t2 = predictor2->GetInputTensor(input_names[0]);
input_t2->Reshape({1, channels, height, width});
input_t2->copy_from_cpu(input);

ASSERT_TRUE(predictor2->ZeroCopyRun());

std::vector<float> out_data2;
auto output_t2 = predictor2->GetOutputTensor(output_names[0]);
std::vector<int> output_shape2 = output_t2->shape();
int out_num2 = std::accumulate(output_shape2.begin(), output_shape2.end(), 1,
std::multiplies<int>());
out_data2.resize(out_num2);
output_t2->copy_to_cpu(out_data2.data());
ASSERT_TRUE(out_data2.size() == out_data.size());
for (size_t i = 0; i < out_data.size(); i++) {
EXPECT_NEAR(out_data2[i], out_data[i], 1e-5);
}
}

TEST(AnalysisPredictor, trt_dynamic) { TestDynamic(true); }
TEST(AnalysisPredictor, trt_static) { TestDynamic(false); }
TEST(AnalysisPredictor, trt_memory_serialize) {
Expand All @@ -218,6 +299,7 @@ TEST(AnalysisPredictor, trt_memory_serialize) {
TEST(AnalysisPredictor, trt_dynamic2) { TestDynamic2(); }

TEST(AnalysisPredictor, trt_tuned_dynamic) { TestTunedDynamic(); }
TEST(AnalysisPredictor, trt_dynamic_clone) { TestDynamicClone(); }

} // namespace inference
} // namespace paddle
Loading