Skip to content

Commit

Permalink
inference use InterperterCore
Browse files Browse the repository at this point in the history
  • Loading branch information
yuanlehome committed Oct 8, 2023
1 parent 395ffbf commit 1044435
Show file tree
Hide file tree
Showing 8 changed files with 78 additions and 7 deletions.
14 changes: 14 additions & 0 deletions paddle/fluid/framework/naive_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "paddle/fluid/framework/naive_executor.h"

#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
Expand Down Expand Up @@ -51,6 +52,19 @@ void NaiveExecutor::Prepare(Scope *scope,
CreateOps(program_desc, block_id, with_feed_fetch_ops);
}

void NaiveExecutor::PrepareInterperterCore(
Scope *scope,
const ProgramDesc &program_desc,
const framework::interpreter::ExecutionConfig &execution_config) {
interpreter_core_ = std::make_unique<framework::InterpreterCore>(
place_, program_desc.Block(0), scope, execution_config);
}

void NaiveExecutor::RunInterperterCore(
const std::vector<std::string> &feed_names, bool need_fetch) {
interpreter_core_->Run(feed_names, need_fetch);
}

void NaiveExecutor::Run() {
#ifdef PADDLE_WITH_DNNL
platform::AttachPointerHashToMKLDNNKey(this, place_);
Expand Down
14 changes: 14 additions & 0 deletions paddle/fluid/framework/naive_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"

#include "paddle/fluid/framework/new_executor/interpreter/execution_config.h"
#include "paddle/fluid/framework/new_executor/interpretercore.h"

namespace paddle {
namespace framework {

Expand All @@ -52,6 +55,12 @@ class NaiveExecutor {
int block_id,
bool with_feed_fetch_ops);

void PrepareInterperterCore(
Scope* scope,
const ProgramDesc& program_desc,
const framework::interpreter::ExecutionConfig& execution_config =
framework::interpreter::ExecutionConfig{});

// Create variables before head.
// Create parameters if persistable is true, or create the temporary variables
// instead.
Expand All @@ -63,6 +72,9 @@ class NaiveExecutor {
// Run all the operators.
void Run();

void RunInterperterCore(const std::vector<std::string>& feed_names = {},
bool need_fetch = false);

// Get an tensor to operating directly, without the need for feed_ops.
phi::DenseTensor* FindTensor(const std::string& name);

Expand Down Expand Up @@ -96,6 +108,8 @@ class NaiveExecutor {
std::unordered_map<OperatorBase*, std::unordered_map<phi::DenseTensor*, int>>
reuse_cache_;
std::vector<phi::DenseTensor*> cluster_buffer_;

std::unique_ptr<framework::InterpreterCore> interpreter_core_;
};

} // namespace framework
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ struct ExecutionConfig {
bool used_for_cinn{false};
bool used_for_control_flow_op{false};
bool used_for_jit{false};
bool used_for_inference{false};

size_t device_num_threads{0};
size_t host_num_threads{0};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,11 @@ void BuildOpFuncList(const platform::Place& place,
for (size_t i = 0; i < ops.size(); ++i) {
auto op = ops[i].get();
const std::string& op_type = op->Type();
if (execution_config.used_for_inference) {
if (op_type == "feed" || op_type == "fetch") {
continue;
}
}

VLOG(6) << "Build OpFuncNode from : " << op_type;

Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(apply_optim_);
CP_MEMBER(skip_load_params_);

CP_MEMBER(use_new_executor_);

if (use_gpu_) {
PADDLE_ENFORCE_EQ(use_xpu_,
false,
Expand Down
40 changes: 33 additions & 7 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,20 @@ bool AnalysisPredictor::PrepareExecutor() {
executor_->Prepare(
sub_scope_, *inference_program_, 0, config_.use_feed_fetch_ops_);

if (config_.new_executor_enabled()) {
framework::interpreter::ExecutionConfig execution_config;
execution_config.create_local_scope = false;
execution_config.used_for_inference = true;
auto input_names = GetInputNames();
execution_config.skip_gc_vars.insert(input_names.begin(),
input_names.end());
auto output_names = GetOutputNames();
execution_config.skip_gc_vars.insert(output_names.begin(),
output_names.end());
executor_->PrepareInterperterCore(
sub_scope_, *inference_program_, execution_config);
}

if (config_.enable_memory_optim_) {
auto *pass_res_info =
inference::analysis::PassResultInfoForRuntime::Instance();
Expand Down Expand Up @@ -1107,9 +1121,13 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
HookCollectShapeRangeInfo();
}

// Run the inference program
// if share variables, we need not create variables
executor_->Run();
if (config_.new_executor_enabled()) {
executor_->RunInterperterCore();
} else {
// Run the inference program
// if share variables, we need not create variables
executor_->Run();
}

// get fetch variable
if (!GetFetch(output_data, scope)) {
Expand Down Expand Up @@ -1175,9 +1193,13 @@ bool AnalysisPredictor::Run(const std::vector<paddle::Tensor> &inputs,
HookCollectShapeRangeInfo();
}

// Run the inference program
// if share variables, we need not create variables
executor_->Run();
if (config_.new_executor_enabled()) {
executor_->RunInterperterCore();
} else {
// Run the inference program
// if share variables, we need not create variables
executor_->Run();
}

inference::DisplayMemoryInfo(place_, "after run");

Expand Down Expand Up @@ -2154,7 +2176,11 @@ bool AnalysisPredictor::ZeroCopyRun() {
}
#endif

executor_->Run();
if (config_.new_executor_enabled()) {
executor_->RunInterperterCore();
} else {
executor_->Run();
}
inference::DisplayMemoryInfo(place_, "after run");

#ifdef PADDLE_WITH_XPU
Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -880,6 +880,10 @@ struct PD_INFER_DECL AnalysisConfig {
///
int tensorrt_optimization_level() { return trt_optimization_level_; }

void EnableNewExecutor(bool x = true) { use_new_executor_ = x; }

bool new_executor_enabled() const { return use_new_executor_; }

void EnableDlnne(
int min_subgraph_size = 3,
int max_batch_size = 1,
Expand Down Expand Up @@ -1303,6 +1307,8 @@ struct PD_INFER_DECL AnalysisConfig {
bool use_feed_fetch_ops_{true};
bool ir_debug_{false};

bool use_new_executor_{false};

bool specify_input_name_{false};

int cpu_math_library_num_threads_{1};
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/pybind/inference_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -852,6 +852,9 @@ void BindAnalysisConfig(py::module *m) {
.def("enable_memory_optim",
&AnalysisConfig::EnableMemoryOptim,
py::arg("x") = true)
.def("enable_new_executor",
&AnalysisConfig::EnableNewExecutor,
py::arg("x") = true)
.def("enable_profile", &AnalysisConfig::EnableProfile)
.def("disable_glog_info", &AnalysisConfig::DisableGlogInfo)
.def("glog_info_disabled", &AnalysisConfig::glog_info_disabled)
Expand Down

0 comments on commit 1044435

Please sign in to comment.