From 104443527c1fb4c50d17a1ad4b93de1340b16709 Mon Sep 17 00:00:00 2001 From: yuanlehome Date: Sun, 8 Oct 2023 11:23:20 +0000 Subject: [PATCH] inference use InterperterCore --- paddle/fluid/framework/naive_executor.cc | 14 +++++++ paddle/fluid/framework/naive_executor.h | 14 +++++++ .../interpreter/execution_config.h | 1 + .../interpreter/interpreter_util.cc | 5 +++ paddle/fluid/inference/api/analysis_config.cc | 2 + .../fluid/inference/api/analysis_predictor.cc | 40 +++++++++++++++---- .../inference/api/paddle_analysis_config.h | 6 +++ paddle/fluid/pybind/inference_api.cc | 3 ++ 8 files changed, 78 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index 9f8e9ed80ca46..b11b973fe7f1b 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/framework/naive_executor.h" +#include #include #include #include @@ -51,6 +52,19 @@ void NaiveExecutor::Prepare(Scope *scope, CreateOps(program_desc, block_id, with_feed_fetch_ops); } +void NaiveExecutor::PrepareInterperterCore( + Scope *scope, + const ProgramDesc &program_desc, + const framework::interpreter::ExecutionConfig &execution_config) { + interpreter_core_ = std::make_unique( + place_, program_desc.Block(0), scope, execution_config); +} + +void NaiveExecutor::RunInterperterCore( + const std::vector &feed_names, bool need_fetch) { + interpreter_core_->Run(feed_names, need_fetch); +} + void NaiveExecutor::Run() { #ifdef PADDLE_WITH_DNNL platform::AttachPointerHashToMKLDNNKey(this, place_); diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h index 85f98046285b3..295ce691f1170 100644 --- a/paddle/fluid/framework/naive_executor.h +++ b/paddle/fluid/framework/naive_executor.h @@ -26,6 +26,9 @@ #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/place.h" +#include "paddle/fluid/framework/new_executor/interpreter/execution_config.h" +#include "paddle/fluid/framework/new_executor/interpretercore.h" + namespace paddle { namespace framework { @@ -52,6 +55,12 @@ class NaiveExecutor { int block_id, bool with_feed_fetch_ops); + void PrepareInterperterCore( + Scope* scope, + const ProgramDesc& program_desc, + const framework::interpreter::ExecutionConfig& execution_config = + framework::interpreter::ExecutionConfig{}); + // Create variables before head. // Create parameters if persistable is true, or create the temporary variables // instead. @@ -63,6 +72,9 @@ class NaiveExecutor { // Run all the operators. void Run(); + void RunInterperterCore(const std::vector& feed_names = {}, + bool need_fetch = false); + // Get an tensor to operating directly, without the need for feed_ops. phi::DenseTensor* FindTensor(const std::string& name); @@ -96,6 +108,8 @@ class NaiveExecutor { std::unordered_map> reuse_cache_; std::vector cluster_buffer_; + + std::unique_ptr interpreter_core_; }; } // namespace framework diff --git a/paddle/fluid/framework/new_executor/interpreter/execution_config.h b/paddle/fluid/framework/new_executor/interpreter/execution_config.h index 828678fa59da1..def76235331f1 100644 --- a/paddle/fluid/framework/new_executor/interpreter/execution_config.h +++ b/paddle/fluid/framework/new_executor/interpreter/execution_config.h @@ -29,6 +29,7 @@ struct ExecutionConfig { bool used_for_cinn{false}; bool used_for_control_flow_op{false}; bool used_for_jit{false}; + bool used_for_inference{false}; size_t device_num_threads{0}; size_t host_num_threads{0}; diff --git a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc index 8015a50545e69..d07874448acee 100644 --- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc +++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc @@ -599,6 +599,11 @@ void BuildOpFuncList(const platform::Place& place, for (size_t i = 0; i < ops.size(); ++i) { auto op = ops[i].get(); const std::string& op_type = op->Type(); + if (execution_config.used_for_inference) { + if (op_type == "feed" || op_type == "fetch") { + continue; + } + } VLOG(6) << "Build OpFuncNode from : " << op_type; diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index a0d66dc509298..eee02f5fc4d23 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -576,6 +576,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(apply_optim_); CP_MEMBER(skip_load_params_); + CP_MEMBER(use_new_executor_); + if (use_gpu_) { PADDLE_ENFORCE_EQ(use_xpu_, false, diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index f30e2c560b57f..e9dfcf4ed5f55 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -702,6 +702,20 @@ bool AnalysisPredictor::PrepareExecutor() { executor_->Prepare( sub_scope_, *inference_program_, 0, config_.use_feed_fetch_ops_); + if (config_.new_executor_enabled()) { + framework::interpreter::ExecutionConfig execution_config; + execution_config.create_local_scope = false; + execution_config.used_for_inference = true; + auto input_names = GetInputNames(); + execution_config.skip_gc_vars.insert(input_names.begin(), + input_names.end()); + auto output_names = GetOutputNames(); + execution_config.skip_gc_vars.insert(output_names.begin(), + output_names.end()); + executor_->PrepareInterperterCore( + sub_scope_, *inference_program_, execution_config); + } + if (config_.enable_memory_optim_) { auto *pass_res_info = inference::analysis::PassResultInfoForRuntime::Instance(); @@ -1107,9 +1121,13 @@ bool AnalysisPredictor::Run(const std::vector &inputs, HookCollectShapeRangeInfo(); } - // Run the inference program - // if share variables, we need not create variables - executor_->Run(); + if (config_.new_executor_enabled()) { + executor_->RunInterperterCore(); + } else { + // Run the inference program + // if share variables, we need not create variables + executor_->Run(); + } // get fetch variable if (!GetFetch(output_data, scope)) { @@ -1175,9 +1193,13 @@ bool AnalysisPredictor::Run(const std::vector &inputs, HookCollectShapeRangeInfo(); } - // Run the inference program - // if share variables, we need not create variables - executor_->Run(); + if (config_.new_executor_enabled()) { + executor_->RunInterperterCore(); + } else { + // Run the inference program + // if share variables, we need not create variables + executor_->Run(); + } inference::DisplayMemoryInfo(place_, "after run"); @@ -2154,7 +2176,11 @@ bool AnalysisPredictor::ZeroCopyRun() { } #endif - executor_->Run(); + if (config_.new_executor_enabled()) { + executor_->RunInterperterCore(); + } else { + executor_->Run(); + } inference::DisplayMemoryInfo(place_, "after run"); #ifdef PADDLE_WITH_XPU diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 5299fa4334ae8..214506c6e9169 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -880,6 +880,10 @@ struct PD_INFER_DECL AnalysisConfig { /// int tensorrt_optimization_level() { return trt_optimization_level_; } + void EnableNewExecutor(bool x = true) { use_new_executor_ = x; } + + bool new_executor_enabled() const { return use_new_executor_; } + void EnableDlnne( int min_subgraph_size = 3, int max_batch_size = 1, @@ -1303,6 +1307,8 @@ struct PD_INFER_DECL AnalysisConfig { bool use_feed_fetch_ops_{true}; bool ir_debug_{false}; + bool use_new_executor_{false}; + bool specify_input_name_{false}; int cpu_math_library_num_threads_{1}; diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index bd569f328b115..d59058f2d8ca0 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -852,6 +852,9 @@ void BindAnalysisConfig(py::module *m) { .def("enable_memory_optim", &AnalysisConfig::EnableMemoryOptim, py::arg("x") = true) + .def("enable_new_executor", + &AnalysisConfig::EnableNewExecutor, + py::arg("x") = true) .def("enable_profile", &AnalysisConfig::EnableProfile) .def("disable_glog_info", &AnalysisConfig::DisableGlogInfo) .def("glog_info_disabled", &AnalysisConfig::glog_info_disabled)