inference use InterperterCore

PaddlePaddle · Oct 8, 2023 · 1044435 · 1044435
1 parent 395ffbf
commit 1044435
Show file tree

Hide file tree

Showing 8 changed files with 78 additions and 7 deletions.
diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc
@@ -14,6 +14,7 @@
 
 #include "paddle/fluid/framework/naive_executor.h"
 
+#include <memory>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
@@ -51,6 +52,19 @@ void NaiveExecutor::Prepare(Scope *scope,
   CreateOps(program_desc, block_id, with_feed_fetch_ops);
 }
 
+void NaiveExecutor::PrepareInterperterCore(
+    Scope *scope,
+    const ProgramDesc &program_desc,
+    const framework::interpreter::ExecutionConfig &execution_config) {
+  interpreter_core_ = std::make_unique<framework::InterpreterCore>(
+      place_, program_desc.Block(0), scope, execution_config);
+}
+
+void NaiveExecutor::RunInterperterCore(
+    const std::vector<std::string> &feed_names, bool need_fetch) {
+  interpreter_core_->Run(feed_names, need_fetch);
+}
+
 void NaiveExecutor::Run() {
 #ifdef PADDLE_WITH_DNNL
   platform::AttachPointerHashToMKLDNNKey(this, place_);

diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h
@@ -26,6 +26,9 @@
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/place.h"
 
+#include "paddle/fluid/framework/new_executor/interpreter/execution_config.h"
+#include "paddle/fluid/framework/new_executor/interpretercore.h"
+
 namespace paddle {
 namespace framework {
 
@@ -52,6 +55,12 @@ class NaiveExecutor {
                int block_id,
                bool with_feed_fetch_ops);
 
+  void PrepareInterperterCore(
+      Scope* scope,
+      const ProgramDesc& program_desc,
+      const framework::interpreter::ExecutionConfig& execution_config =
+          framework::interpreter::ExecutionConfig{});
+
   // Create variables before head.
   // Create parameters if persistable is true, or create the temporary variables
   // instead.
@@ -63,6 +72,9 @@ class NaiveExecutor {
   // Run all the operators.
   void Run();
 
+  void RunInterperterCore(const std::vector<std::string>& feed_names = {},
+                          bool need_fetch = false);
+
   // Get an tensor to operating directly, without the need for feed_ops.
   phi::DenseTensor* FindTensor(const std::string& name);
 
@@ -96,6 +108,8 @@ class NaiveExecutor {
   std::unordered_map<OperatorBase*, std::unordered_map<phi::DenseTensor*, int>>
       reuse_cache_;
   std::vector<phi::DenseTensor*> cluster_buffer_;
+
+  std::unique_ptr<framework::InterpreterCore> interpreter_core_;
 };
 
 }  // namespace framework

diff --git a/paddle/fluid/framework/new_executor/interpreter/execution_config.h b/paddle/fluid/framework/new_executor/interpreter/execution_config.h
@@ -29,6 +29,7 @@ struct ExecutionConfig {
   bool used_for_cinn{false};
   bool used_for_control_flow_op{false};
   bool used_for_jit{false};
+  bool used_for_inference{false};
 
   size_t device_num_threads{0};
   size_t host_num_threads{0};

diff --git a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc
@@ -599,6 +599,11 @@ void BuildOpFuncList(const platform::Place& place,
   for (size_t i = 0; i < ops.size(); ++i) {
     auto op = ops[i].get();
     const std::string& op_type = op->Type();
+    if (execution_config.used_for_inference) {
+      if (op_type == "feed" || op_type == "fetch") {
+        continue;
+      }
+    }
 
     VLOG(6) << "Build OpFuncNode from : " << op_type;
 

diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc
@@ -576,6 +576,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
   CP_MEMBER(apply_optim_);
   CP_MEMBER(skip_load_params_);
 
+  CP_MEMBER(use_new_executor_);
+
   if (use_gpu_) {
     PADDLE_ENFORCE_EQ(use_xpu_,
                       false,

diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -702,6 +702,20 @@ bool AnalysisPredictor::PrepareExecutor() {
   executor_->Prepare(
       sub_scope_, *inference_program_, 0, config_.use_feed_fetch_ops_);
 
+  if (config_.new_executor_enabled()) {
+    framework::interpreter::ExecutionConfig execution_config;
+    execution_config.create_local_scope = false;
+    execution_config.used_for_inference = true;
+    auto input_names = GetInputNames();
+    execution_config.skip_gc_vars.insert(input_names.begin(),
+                                         input_names.end());
+    auto output_names = GetOutputNames();
+    execution_config.skip_gc_vars.insert(output_names.begin(),
+                                         output_names.end());
+    executor_->PrepareInterperterCore(
+        sub_scope_, *inference_program_, execution_config);
+  }
+
   if (config_.enable_memory_optim_) {
     auto *pass_res_info =
         inference::analysis::PassResultInfoForRuntime::Instance();
@@ -1107,9 +1121,13 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
     HookCollectShapeRangeInfo();
   }
 
-  // Run the inference program
-  // if share variables, we need not create variables
-  executor_->Run();
+  if (config_.new_executor_enabled()) {
+    executor_->RunInterperterCore();
+  } else {
+    // Run the inference program
+    // if share variables, we need not create variables
+    executor_->Run();
+  }
 
   // get fetch variable
   if (!GetFetch(output_data, scope)) {
@@ -1175,9 +1193,13 @@ bool AnalysisPredictor::Run(const std::vector<paddle::Tensor> &inputs,
     HookCollectShapeRangeInfo();
   }
 
-  // Run the inference program
-  // if share variables, we need not create variables
-  executor_->Run();
+  if (config_.new_executor_enabled()) {
+    executor_->RunInterperterCore();
+  } else {
+    // Run the inference program
+    // if share variables, we need not create variables
+    executor_->Run();
+  }
 
   inference::DisplayMemoryInfo(place_, "after run");
 
@@ -2154,7 +2176,11 @@ bool AnalysisPredictor::ZeroCopyRun() {
   }
 #endif
 
-  executor_->Run();
+  if (config_.new_executor_enabled()) {
+    executor_->RunInterperterCore();
+  } else {
+    executor_->Run();
+  }
   inference::DisplayMemoryInfo(place_, "after run");
 
 #ifdef PADDLE_WITH_XPU

diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -880,6 +880,10 @@ struct PD_INFER_DECL AnalysisConfig {
   ///
   int tensorrt_optimization_level() { return trt_optimization_level_; }
 
+  void EnableNewExecutor(bool x = true) { use_new_executor_ = x; }
+
+  bool new_executor_enabled() const { return use_new_executor_; }
+
   void EnableDlnne(
       int min_subgraph_size = 3,
       int max_batch_size = 1,
@@ -1303,6 +1307,8 @@ struct PD_INFER_DECL AnalysisConfig {
   bool use_feed_fetch_ops_{true};
   bool ir_debug_{false};
 
+  bool use_new_executor_{false};
+
   bool specify_input_name_{false};
 
   int cpu_math_library_num_threads_{1};

diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc
@@ -852,6 +852,9 @@ void BindAnalysisConfig(py::module *m) {
       .def("enable_memory_optim",
            &AnalysisConfig::EnableMemoryOptim,
            py::arg("x") = true)
+      .def("enable_new_executor",
+           &AnalysisConfig::EnableNewExecutor,
+           py::arg("x") = true)
       .def("enable_profile", &AnalysisConfig::EnableProfile)
       .def("disable_glog_info", &AnalysisConfig::DisableGlogInfo)
       .def("glog_info_disabled", &AnalysisConfig::glog_info_disabled)