Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[IPU] paddle-inference support custom-ops #45235

Merged
merged 5 commits into from
Sep 30, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions paddle/fluid/framework/ir/ipu/infer_shape_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ void InferShapePass::ApplyImpl(ir::Graph* graph) const {
if (node->Var()->GetDataType() == proto::VarType::INT64) {
node->Var()->SetDataType(proto::VarType::INT32);
}
// float64->float32
if (node->Var()->GetDataType() == proto::VarType::FP64) {
node->Var()->SetDataType(proto::VarType::FP32);
}
}
}

Expand Down
25 changes: 25 additions & 0 deletions paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,30 @@ void InferenceDtypeTransferPass::ApplyImpl(ir::Graph* graph) const {
VLOG(10) << "Transfer var to fp16...";
auto* scope = ipu_backend->GetScope();

// Record specific vars to skip
std::set<std::string> skip_var_lists;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
// clip op' attrs `max` and `min` only support FP32
if (node->Op()->Type() == "popart_clip") {
auto min_tensor_name = node->Op()->InputArgumentNames()[1];
auto max_tensor_name = node->Op()->InputArgumentNames()[2];
skip_var_lists.insert(min_tensor_name);
skip_var_lists.insert(max_tensor_name);
}
}
}

std::unordered_set<std::string> used_var_names;
for (auto* node : graph->Nodes()) {
if (node->IsVar()) {
auto var_desc = node->Var();
if (var_desc->GetDataType() == proto::VarType::FP32) {
// Skip specific vars
if (skip_var_lists.find(var_desc->Name()) != skip_var_lists.end()) {
continue;
}

// Transfer the dtypes of var_desc
var_desc->SetDataType(proto::VarType::FP16);
VLOG(10) << "Transfer the VarDesc of " << var_desc->Name() << " to "
Expand Down Expand Up @@ -81,6 +100,12 @@ void InferenceDtypeTransferPass::ApplyImpl(ir::Graph* graph) const {
}
}
if (op_desc->Type() == "popart_constant") {
// Skip specific constant
auto output_var_name = node->outputs[0]->Var()->Name();
if (skip_var_lists.find(output_var_name) != skip_var_lists.end()) {
continue;
}

// Transfer the dtype of fill_constant Op
if (op_desc->GetAttrIfExists<int>("dtype") == 1) {
op_desc->SetAttr("dtype", 10);
Expand Down
32 changes: 32 additions & 0 deletions paddle/fluid/framework/ir/ipu/inference_process_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,33 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const {
// Set tiles_per_ipu for IPUMODEL
ipu_strategy_instance_->tiles_per_ipu = 128;

// Set Cache path
auto* ipu_cache_path = getenv("IPU_CACHE_PATH");
if (ipu_cache_path) {
ipu_strategy_instance_->popart_options.enableEngineCaching = true;
ipu_strategy_instance_->popart_options.cachePath =
std::string{ipu_cache_path};
}

// custom ops and patterns
std::unordered_set<std::string> custom_op_names;
auto custom_ops_info =
graph->Get<std::vector<std::vector<std::string>>>("custom_ops_info");
for (auto custom_op : custom_ops_info) {
ipu_strategy_instance_->AddCustomOp(
custom_op[0], custom_op[1], custom_op[2], atoi(custom_op[3].c_str()));
custom_op_names.insert(custom_op[0]);
}
auto patterns =
graph->Get<std::vector<std::vector<std::string>>>("custom_patterns");
for (auto pattern : patterns) {
if (pattern[1] == "True") {
ipu_strategy_instance_->EnablePattern(pattern[0]);
} else if (pattern[1] == "False") {
ipu_strategy_instance_->DisablePattern(pattern[0]);
}
}

ipu_backend->SetIpuStrategy(*(ipu_strategy_instance_.get()));

// Get feed_list and fetch list
Expand Down Expand Up @@ -140,6 +167,11 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const {
"feed_list",
new std::vector<std::string>(feed_list.begin(), feed_list.end()));
}
if (pass_name == "popart_canonicalization_pass") {
pass->Set("custom_ops",
new std::unordered_set<std::string>(custom_op_names.begin(),
custom_op_names.end()));
}
pass->Apply(graph);
}

Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/inference/analysis/argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,12 @@ struct Argument {
IpuAvailableMemoryProportion,
float);
DECL_ARGUMENT_FIELD(ipu_enable_half_partial, IpuEnableHalfPartial, bool);
DECL_ARGUMENT_FIELD(ipu_custom_ops_info,
IpuCustomOpsInfo,
std::vector<std::vector<std::string>>);
DECL_ARGUMENT_FIELD(ipu_custom_patterns,
IpuCustomPatterns,
std::vector<std::vector<std::string>>);

// npu related
DECL_ARGUMENT_FIELD(use_npu, UseNpu, bool);
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
&argument->ipu_available_memory_proportion());
argument->main_graph().SetNotOwned("enable_half_partial",
&argument->ipu_enable_half_partial());
argument->main_graph().SetNotOwned("custom_ops_info",
&argument->ipu_custom_ops_info());
argument->main_graph().SetNotOwned("custom_patterns",
&argument->ipu_custom_patterns());
}
}
#endif
Expand Down
124 changes: 123 additions & 1 deletion paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/utils/string/split.h"

#ifdef PADDLE_WITH_TENSORRT
#include "paddle/fluid/inference/tensorrt/helper.h"
Expand Down Expand Up @@ -208,6 +209,120 @@ void AnalysisConfig::SetIpuConfig(bool ipu_enable_fp16,
Update();
}

void AnalysisConfig::SetIpuCustomInfo(
const std::vector<std::vector<std::string>> &ipu_custom_ops_info,
const std::map<std::string, bool> &ipu_custom_patterns) {
ipu_custom_ops_info_ = ipu_custom_ops_info;
for (auto iter = ipu_custom_patterns.begin();
iter != ipu_custom_patterns.end();
iter++) {
if (iter->second == true) {
ipu_custom_patterns_.push_back(
std::vector<std::string>{iter->first, "True"});
} else if (iter->second == false) {
ipu_custom_patterns_.push_back(
std::vector<std::string>{iter->first, "False"});
}
}

Update();
}

void AnalysisConfig::LoadIpuConfig(const std::string &config_path) {
std::ifstream fin(config_path, std::ios::in);
PADDLE_ENFORCE_EQ(
static_cast<bool>(fin.is_open()),
true,
platform::errors::NotFound(
"Cannot open file %s, please confirm whether the file is normal.",
config_path));
std::string line;
while (std::getline(fin, line)) {
// remove all space
line.erase(std::remove(line.begin(), line.end(), ' '), line.end());

std::string key;
std::string value;
std::istringstream stream(line);
// Split string to key and value based on the first `,`
std::getline(stream, key, ',');
std::getline(stream, value);

auto string2bool = [](std::string s) {
std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) {
return ::tolower(c);
});
return s == "true" || s == "1";
};

// ipu_custom_ops_info:
// [[paddle_op_name, popart_op_name, domain, version], [paddle_op_name,
// popart_op_name, domain, version]...]
// ipu_custom_patterns:
// [[paddle_op_name, enable_pattern], [paddle_op_name, enable_pattern]...]
auto string2vector = [](std::string s) {
std::vector<std::vector<std::string>> custom_info;
s.erase(0, 1);
s.pop_back();

std::string one;
std::istringstream s_stream(s);
while (std::getline(s_stream, one, ']')) {
if (!one.empty()) {
// remove `[`
one.erase(0, 1);
custom_info.push_back(paddle::string::Split(one, ','));
}
}
return custom_info;
};

if (ipu_config_mapper_.find(key) == ipu_config_mapper_.end()) {
PADDLE_THROW(platform::errors::InvalidArgument(
"invalid key {} in IPU config", key));
}
switch (ipu_config_mapper_.at(key)) {
case ipu_config_code::ipu_device_num:
ipu_device_num_ = std::stoi(value);
break;
case ipu_config_code::ipu_micro_batch_size:
ipu_micro_batch_size_ = std::stoi(value);
break;
case ipu_config_code::ipu_enable_pipelining:
ipu_enable_pipelining_ = string2bool(value);
break;
case ipu_config_code::ipu_batches_per_step:
ipu_batches_per_step_ = std::stoi(value);
break;
case ipu_config_code::ipu_enable_fp16:
ipu_enable_fp16_ = string2bool(value);
break;
case ipu_config_code::ipu_replica_num:
ipu_replica_num_ = std::stoi(value);
break;
case ipu_config_code::ipu_available_memory_proportion:
ipu_available_memory_proportion_ = std::stof(value);
break;
case ipu_config_code::ipu_enable_half_partial:
ipu_enable_half_partial_ = string2bool(value);
break;
case ipu_config_code::ipu_custom_ops_info:
ipu_custom_ops_info_ = string2vector(value);
break;
case ipu_config_code::ipu_custom_patterns:
ipu_custom_patterns_ = string2vector(value);
break;

default:
PADDLE_THROW(platform::errors::InvalidArgument(
"invalid key {} in IPU config", key));
break;
}
}

Update();
}

void AnalysisConfig::EnableONNXRuntime() {
#ifdef PADDLE_WITH_ONNXRUNTIME
use_onnxruntime_ = true;
Expand Down Expand Up @@ -358,6 +473,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(ipu_replica_num_);
CP_MEMBER(ipu_available_memory_proportion_);
CP_MEMBER(ipu_enable_half_partial_);
CP_MEMBER(ipu_custom_ops_info_);
CP_MEMBER(ipu_custom_patterns_);

// fleet exe related
CP_MEMBER(dist_config_);
Expand Down Expand Up @@ -910,7 +1027,12 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << ipu_replica_num_;
ss << ipu_available_memory_proportion_;
ss << ipu_enable_half_partial_;

for (auto custom_op : ipu_custom_ops_info_)
for (auto attr : custom_op) ss << attr;
ss << ";";
for (auto pattern : ipu_custom_patterns_)
for (auto attr : pattern) ss << attr;
ss << ";";
for (auto &op : mixed_black_list_) ss << op.c_str();
return ss.str();
}
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1160,6 +1160,8 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetIpuAvailableMemoryProportion(
config_.ipu_available_memory_proportion_);
argument_.SetIpuEnableHalfPartial(config_.ipu_enable_half_partial_);
argument_.SetIpuCustomOpsInfo(config_.ipu_custom_ops_info_);
argument_.SetIpuCustomPatterns(config_.ipu_custom_patterns_);
#endif

argument_.SetUseNpu(config_.use_npu_);
Expand Down
51 changes: 51 additions & 0 deletions paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,22 @@ struct PD_INFER_DECL AnalysisConfig {
const std::string& precision = "int16",
bool adaptive_seqlen = false);

///
/// \brief configs of IPU
///
enum class ipu_config_code {
ipu_device_num,
ipu_micro_batch_size,
ipu_enable_pipelining,
ipu_batches_per_step,
ipu_enable_fp16,
ipu_replica_num,
ipu_available_memory_proportion,
ipu_enable_half_partial,
ipu_custom_ops_info,
ipu_custom_patterns
};

///
/// \brief Turn on IPU.
///
Expand Down Expand Up @@ -318,6 +334,25 @@ struct PD_INFER_DECL AnalysisConfig {
float ipu_available_memory_proportion = 1.0,
bool ipu_enable_half_partial = false);

///
/// \brief Set IPU custom ops and patterns.
///
/// \param custom_ops_info the mapper of paddle custom ops and popart ops.
/// e.g. {{paddle_op_name, popart_op_name, op_domain, op_version}}.
/// \param custom_patterns the names of popart patterns. e.g. {{pattern_name,
/// enable_pattern}}}
///
void SetIpuCustomInfo(
const std::vector<std::vector<std::string>>& ipu_custom_ops_info = {},
const std::map<std::string, bool>& ipu_custom_patterns = {});

///
/// \brief Load IPU config from configuration file.
///
/// \param config_path configure file path for ipu.
///
void LoadIpuConfig(const std::string& config_path);

///
/// \brief Set XPU device id.
///
Expand Down Expand Up @@ -1118,6 +1153,22 @@ struct PD_INFER_DECL AnalysisConfig {
float ipu_available_memory_proportion_{1.0};
bool ipu_enable_half_partial_{false};

std::vector<std::vector<std::string>> ipu_custom_ops_info_;
std::vector<std::vector<std::string>> ipu_custom_patterns_;

const std::unordered_map<std::string, ipu_config_code> ipu_config_mapper_ = {
{"ipu_device_num", ipu_config_code::ipu_device_num},
{"ipu_micro_batch_size", ipu_config_code::ipu_micro_batch_size},
{"ipu_enable_pipelining", ipu_config_code::ipu_enable_pipelining},
{"ipu_batches_per_step", ipu_config_code::ipu_batches_per_step},
{"ipu_enable_fp16", ipu_config_code::ipu_enable_fp16},
{"ipu_replica_num", ipu_config_code::ipu_replica_num},
{"ipu_available_memory_proportion",
ipu_config_code::ipu_available_memory_proportion},
{"ipu_enable_half_partial", ipu_config_code::ipu_enable_half_partial},
{"ipu_custom_ops_info", ipu_config_code::ipu_custom_ops_info},
{"ipu_custom_patterns", ipu_config_code::ipu_custom_patterns}};

// If the config is already used on a predictor, it becomes invalid.
// Any config can only be used with one predictor.
// Variables held by config can take up a lot of memory in some cases.
Expand Down
13 changes: 12 additions & 1 deletion paddle/fluid/platform/device/ipu/ipu_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,20 @@ void IpuBackend::Compile(framework::ir::Graph* graph,
if (ipu_strategy_->is_training) {
compiler_->LowerOptimizer(scope_);
}

// environment variable IPU_ONNX_DUMP_PATH have higher priority
std::string onnx_dump_path;
if (!ipu_strategy_->onnx_dump_path.empty()) {
SaveModelProto(ipu_strategy_->onnx_dump_path);
onnx_dump_path = ipu_strategy_->onnx_dump_path;
}
auto* ipu_onnx_dump_path = getenv("IPU_ONNX_DUMP_PATH");
if (ipu_onnx_dump_path) {
onnx_dump_path = std::string{ipu_onnx_dump_path};
}
if (!onnx_dump_path.empty()) {
SaveModelProto(onnx_dump_path);
}

executor_->SetCompilerResources(compiler_->GetResources());
executor_->Prepare(compiler_->GetModelProto());
is_compiled_ = true;
Expand Down
8 changes: 8 additions & 0 deletions paddle/fluid/pybind/inference_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,14 @@ void BindAnalysisConfig(py::module *m) {
py::arg("ipu_replica_num") = 1,
py::arg("ipu_available_memory_proportion") = 1.0,
py::arg("ipu_enable_half_partial") = false)
.def("set_ipu_custom_info",
&AnalysisConfig::SetIpuCustomInfo,
py::arg("ipu_custom_ops_info") =
std::vector<std::vector<std::string>>({}),
py::arg("ipu_custom_patterns") = std::map<std::string, bool>({}))
.def("load_ipu_config",
&AnalysisConfig::LoadIpuConfig,
py::arg("config_path"))
.def("disable_gpu", &AnalysisConfig::DisableGpu)
.def("enable_onnxruntime", &AnalysisConfig::EnableONNXRuntime)
.def("disable_onnxruntime", &AnalysisConfig::DisableONNXRuntime)
Expand Down