Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Inference] Support NNAdapter and ascend310 #35226

Merged
merged 22 commits into from
Sep 22, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions cmake/external/lite.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ if (LITE_WITH_XPU)
ENDIF()
endif()

if (LITE_WITH_NNADAPTER)
add_definitions(-DLITE_SUBGRAPH_WITH_NNADAPTER)
if (NNADAPTER_WITH_NPU)
add_definitions(-DLITE_SUBGRAPH_WITH_NPU)
set(NPU_SDK_ROOT "/usr/local/Ascend/ascend-toolkit/latest" CACHE STRING "default NPU SDK ROOT")
endif()
endif()

if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
include(ExternalProject)
set(LITE_PROJECT extern_lite)
Expand Down Expand Up @@ -67,6 +75,9 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DLITE_WITH_XPU=${LITE_WITH_XPU}
-DXPU_SDK_URL=${XPU_BASE_URL}
-DXPU_SDK_ENV=${XPU_SDK_ENV}
-DLITE_WITH_NNADAPTER=${LITE_WITH_NNADAPTER}
-DNNADAPTER_WITH_HUAWEI_ASCEND_NPU=${NNADAPTER_WITH_NPU}
-DNNADAPTER_HUAWEI_ASCEND_NPU_SDK_ROOT=${NPU_SDK_ROOT}
-DLITE_WITH_CODE_META_INFO=OFF
-DLITE_WITH_ARM=ON)
ExternalProject_Add(
Expand Down Expand Up @@ -110,6 +121,9 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DLITE_WITH_XPU=${LITE_WITH_XPU}
-DXPU_SDK_URL=${XPU_BASE_URL}
-DXPU_SDK_ENV=${XPU_SDK_ENV}
-DLITE_WITH_NNADAPTER=${LITE_WITH_NNADAPTER}
-DNNADAPTER_WITH_HUAWEI_ASCEND_NPU=${NNADAPTER_WITH_NPU}
-DNNADAPTER_HUAWEI_ASCEND_NPU_SDK_ROOT=${NPU_SDK_ROOT}
-DLITE_WITH_CODE_META_INFO=OFF
-DLITE_WITH_ARM=OFF)

Expand Down Expand Up @@ -146,6 +160,11 @@ endif()
if (WITH_ARM)
if(LITE_WITH_XPU)
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.xpu)
elseif(LITE_WITH_NNADAPTER)
message("Enable LITE_WITH_NNADAPTER")
if (NNADAPTER_WITH_NPU)
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.nnadapter)
endif()
else()
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8)
endif()
Expand Down Expand Up @@ -174,5 +193,17 @@ endfunction()
external_lite_libs(lite_full_static ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so)
set(LITE_SHARED_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so)

if (LITE_WITH_NNADAPTER)
if (NNADAPTER_WITH_NPU)
jiweibo marked this conversation as resolved.
Show resolved Hide resolved
external_lite_libs(lite_nnadapter ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter_driver_huawei_ascend_npu.so)
set(LITE_DEPS lite_full_static lite_nnadapter)
set(LITE_SHARED_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so
${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so
${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter_driver_huawei_ascend_npu.so)
endif()
else()
set(LITE_DEPS lite_full_static)
endif()

add_definitions(-DPADDLE_WITH_LITE)
add_definitions(-DLITE_WITH_LOG)
16 changes: 16 additions & 0 deletions paddle/fluid/inference/analysis/argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,22 @@ struct Argument {
DECL_ARGUMENT_FIELD(xpu_precision, XpuPrecision, std::string);
DECL_ARGUMENT_FIELD(xpu_adaptive_seqlen, XpuAdaptiveSeqlen, bool);

DECL_ARGUMENT_FIELD(use_nnadapter, UseNNAdapter, bool);
DECL_ARGUMENT_FIELD(nnadapter_model_cache_dir, NNAdapterModelCacheDir,
std::string);
DECL_ARGUMENT_FIELD(nnadapter_device_names, NNAdapterDeviceNames,
std::vector<std::string>);
DECL_ARGUMENT_FIELD(nnadapter_context_properties, NNAdapterContextProperties,
std::string);
DECL_ARGUMENT_FIELD(nnadapter_subgraph_partition_config_buffer,
NNAdapterSubgraphPartitionConfigBuffer, std::string);
DECL_ARGUMENT_FIELD(nnadapter_subgraph_partition_config_path,
NNAdapterSubgraphPartitionConfigPath, std::string);
DECL_ARGUMENT_FIELD(nnadapter_model_cache_token, NNAdapterModelCacheToken,
std::vector<std::string>);
DECL_ARGUMENT_FIELD(nnadapter_model_cache_buffer, NNAdapterModelCacheBuffer,
std::vector<std::vector<char>>);

// Memory optimized related.
DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool);

Expand Down
20 changes: 20 additions & 0 deletions paddle/fluid/inference/analysis/ir_pass_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,26 @@ void IRPassManager::CreatePasses(Argument *argument,
new std::string(argument->xpu_autotune_file()));
pass->Set("precision", new std::string(argument->xpu_precision()));
pass->Set("adaptive_seqlen", new bool(argument->xpu_adaptive_seqlen()));
// NNAdapter Related
pass->Set("use_nnadapter", new bool(argument->use_nnadapter()));
pass->Set("model_cache_dir",
new std::string(argument->nnadapter_model_cache_dir()));
pass->Set("device_names", new std::vector<std::string>(
argument->nnadapter_device_names()));
pass->Set("context_properties",
new std::string(argument->nnadapter_context_properties()));
pass->Set("subgraph_partition_config_buffer",
new std::string(
argument->nnadapter_subgraph_partition_config_buffer()));
pass->Set("subgraph_partition_config_path",
new std::string(
argument->nnadapter_subgraph_partition_config_path()));
pass->Set("nnadapter_model_cache_buffer",
jiweibo marked this conversation as resolved.
Show resolved Hide resolved
new std::vector<std::vector<char>>(
argument->nnadapter_model_cache_buffer()));
pass->Set("nnadapter_model_cache_token",
new std::vector<std::string>(
argument->nnadapter_model_cache_token()));
}
disable_logs_ = argument->disable_logs();
if (pass_name == "fc_fuse_pass") {
Expand Down
26 changes: 26 additions & 0 deletions paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -250,12 +250,27 @@ void LiteSubgraphPass::SetUpEngine(
std::string autotune_file = Get<std::string>("autotune_file");
std::string precision = Get<std::string>("precision");
bool adaptive_seqlen = Get<bool>("adaptive_seqlen");
// NNAdapter Related
bool use_nnadapter = Get<bool>("use_nnadapter");
std::string model_cache_dir = Get<std::string>("model_cache_dir");
auto device_names = Get<std::vector<std::string>>("device_names");
std::string context_properties = Get<std::string>("context_properties");
std::string subgraph_partition_config_buffer =
Get<std::string>("subgraph_partition_config_buffer");
std::string nnadapter_subgraph_partition_config_path =
Get<std::string>("subgraph_partition_config_path");
auto nnadapter_model_cache_buffer =
jiweibo marked this conversation as resolved.
Show resolved Hide resolved
Get<std::vector<std::vector<char>>>("nnadapter_model_cache_buffer");
auto nnadapter_model_cache_token =
Get<std::vector<std::string>>("nnadapter_model_cache_token");

lite_api::TargetType target_type;
if (use_gpu) {
target_type = TARGET(kCUDA);
} else if (use_xpu) {
target_type = TARGET(kXPU);
} else if (use_nnadapter) {
target_type = TARGET(kNNAdapter);
} else {
#ifdef PADDLE_WITH_ARM
target_type = TARGET(kARM);
Expand Down Expand Up @@ -292,6 +307,17 @@ void LiteSubgraphPass::SetUpEngine(
config.autotune_file = autotune_file;
config.precision = precision;
config.adaptive_seqlen = adaptive_seqlen;
// NNAdapter Related
config.nnadapter_model_cache_dir = model_cache_dir;
config.nnadapter_device_names = device_names;
config.nnadapter_context_properties = context_properties;
config.nnadapter_subgraph_partition_config_buffer =
subgraph_partition_config_buffer;
config.nnadapter_subgraph_partition_config_path =
nnadapter_subgraph_partition_config_path;
config.nnadapter_model_cache_buffer = nnadapter_model_cache_buffer;
config.nnadapter_model_cache_token = nnadapter_model_cache_token;

if (dump_model) {
lite::StrToBinaryFile("./model.bin", config.model);
lite::StrToBinaryFile("./param.bin", config.param);
Expand Down
58 changes: 57 additions & 1 deletion paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// NPU related.
CP_MEMBER(use_npu_);
CP_MEMBER(npu_device_id_);
CP_MEMBER(nnadapter_config_);

// profile related.
CP_MEMBER(with_profile_);
Expand Down Expand Up @@ -542,7 +543,7 @@ void AnalysisConfig::Update() {
}

if (use_npu_) {
#ifdef PADDLE_WITH_ASCEND_CL
#if defined(PADDLE_WITH_ASCEND_CL) || defined(LITE_SUBGRAPH_WITH_NPU)
PADDLE_ENFORCE_EQ(use_gpu_, false,
platform::errors::Unavailable(
"Currently, NPU and GPU cannot be enabled in the "
Expand Down Expand Up @@ -816,4 +817,59 @@ std::string AnalysisConfig::Summary() {
return os.PrintTable();
}

LiteNNAdapterConfig &LiteNNAdapterConfig::SetDeviceNames(
const std::vector<std::string> &names) {
device_names = names;
return *this;
}

LiteNNAdapterConfig &LiteNNAdapterConfig::SetContextProperties(
const std::string &properties) {
context_properties = properties;
return *this;
}

LiteNNAdapterConfig &LiteNNAdapterConfig::SetModelCacheDir(
const std::string &dir) {
model_cache_dir = dir;
return *this;
}

LiteNNAdapterConfig &LiteNNAdapterConfig::SetModelCacheBuffers(
const std::string &model_cache_token,
const std::vector<char> &model_cache_buffer) {
PADDLE_ENFORCE_EQ(model_cache_token.empty(), false,
platform::errors::InvalidArgument(
"model_cache_token should not be empty."));
PADDLE_ENFORCE_EQ(model_cache_buffer.empty(), false,
platform::errors::InvalidArgument(
"model_cache_buffer should not be empty."));
PADDLE_ENFORCE_EQ(model_cache_buffers.count(model_cache_token), false,
platform::errors::InvalidArgument(
"model_cache_token has already been set."));

model_cache_buffers[model_cache_token] = model_cache_buffer;
return *this;
}

LiteNNAdapterConfig &LiteNNAdapterConfig::SetSubgraphPartitionConfigPath(
const std::string &path) {
subgraph_partition_config_path = path;
return *this;
}

LiteNNAdapterConfig &LiteNNAdapterConfig::SetSubgraphPartitionConfigBuffer(
const std::string &buffer) {
subgraph_partition_config_buffer = buffer;
return *this;
}
LiteNNAdapterConfig &LiteNNAdapterConfig::Enable() {
use_nnadapter = true;
return *this;
}
LiteNNAdapterConfig &LiteNNAdapterConfig::Disable() {
use_nnadapter = false;
return *this;
}

} // namespace paddle
41 changes: 41 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,29 @@ bool AnalysisPredictor::CreateExecutor() {
"You tried to use NPU forward propagation, but Paddle was not compiled "
"with WITH_ASCEND_CL."));
#endif
} else if (config_.NNAdapter().use_nnadapter) {
if (config_.lite_engine_enabled()) {
#ifdef LITE_SUBGRAPH_WITH_NNADAPTER
// Currently, Paddle-Lite's NNAdapter user interface only supports the
// transfer
// of Host data pointers. If it is currently used as a subgraph, execution
// efficiency will be sacrificed, so it is temporarily set to cpu place.
// And, the current lite engine of xpu must execute all parts of the
// model.
place_ = paddle::platform::CPUPlace();
#else
PADDLE_THROW(
platform::errors::Unavailable("You tried to use an NNAdapter lite "
"engine, but Paddle was not compiled "
"with it."));
#endif // LITE_SUBGRAPH_WITH_NNADAPTER
} else {
PADDLE_THROW(
platform::errors::Unavailable("You tried to use NNadapter forward "
"propagation (inference without lite "
"engine), but Paddle was not compiled "
"with LITE_WITH_NNADAPTER."));
}
} else {
place_ = paddle::platform::CPUPlace();
}
Expand Down Expand Up @@ -592,6 +615,24 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetXpuAutotuneFile(config_.xpu_autotune_file_);
argument_.SetXpuPrecision(config_.xpu_precision_);
argument_.SetXpuAdaptiveSeqlen(config_.xpu_adaptive_seqlen_);
// NNAdapter related
argument_.SetUseNNAdapter(config_.NNAdapter().use_nnadapter);
argument_.SetNNAdapterDeviceNames(config_.NNAdapter().device_names);
argument_.SetNNAdapterContextProperties(
config_.NNAdapter().context_properties);
argument_.SetNNAdapterModelCacheDir(config_.NNAdapter().model_cache_dir);
argument_.SetNNAdapterSubgraphPartitionConfigBuffer(
config_.NNAdapter().subgraph_partition_config_buffer);
argument_.SetNNAdapterSubgraphPartitionConfigPath(
config_.NNAdapter().subgraph_partition_config_path);
std::vector<std::string> buffer_keys;
std::vector<std::vector<char>> buffer_vals;
for (auto it : config_.NNAdapter().model_cache_buffers) {
buffer_keys.emplace_back(it.first);
buffer_vals.emplace_back(it.second);
}
argument_.SetNNAdapterModelCacheToken(buffer_keys);
argument_.SetNNAdapterModelCacheBuffer(buffer_vals);
LOG(INFO) << "Lite subgraph engine is enabled";
}

Expand Down
33 changes: 33 additions & 0 deletions paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,34 @@ namespace paddle {
class AnalysisPredictor;
struct MkldnnQuantizerConfig;

struct LiteNNAdapterConfig {
bool use_nnadapter{false};
std::string model_cache_dir;
std::map<std::string, std::vector<char>> model_cache_buffers;
std::vector<std::string> device_names;
std::string context_properties;
std::string subgraph_partition_config_path;
std::string subgraph_partition_config_buffer;

LiteNNAdapterConfig& SetDeviceNames(const std::vector<std::string>& names);

LiteNNAdapterConfig& SetContextProperties(const std::string& properties);

LiteNNAdapterConfig& SetModelCacheDir(const std::string& dir);

LiteNNAdapterConfig& SetModelCacheBuffers(
const std::string& model_cache_token,
const std::vector<char>& model_cache_buffer);

LiteNNAdapterConfig& SetSubgraphPartitionConfigPath(const std::string& path);

LiteNNAdapterConfig& SetSubgraphPartitionConfigBuffer(
const std::string& buffer);

LiteNNAdapterConfig& Enable();
LiteNNAdapterConfig& Disable();
};

///
/// \brief configuration manager for AnalysisPredictor.
/// \since 1.7.0
Expand Down Expand Up @@ -627,6 +655,8 @@ struct PD_INFER_DECL AnalysisConfig {
///
std::string Summary();

LiteNNAdapterConfig& NNAdapter() { return nnadapter_config_; }

protected:
// Update the config.
void Update();
Expand Down Expand Up @@ -726,6 +756,9 @@ struct PD_INFER_DECL AnalysisConfig {
std::string xpu_precision_;
bool xpu_adaptive_seqlen_;

// NNAdapter related
LiteNNAdapterConfig nnadapter_config_;

// mkldnn related.
int mkldnn_cache_capacity_{10};
bool use_mkldnn_quantizer_{false};
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/inference/lite/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ if(XPU_SDK_ROOT)
set(XPU_DEPS xpuapi xpurt)
endif()

cc_library(lite_op_teller SRCS op_teller.cc DEPS lite_full_static framework_proto device_context boost xxhash)
cc_library(lite_engine SRCS engine.cc DEPS lite_full_static framework_proto ${XPU_DEPS})
cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy lite_full_static framework_proto boost device_context ${XPU_DEPS})
cc_library(lite_op_teller SRCS op_teller.cc DEPS ${LITE_DEPS} framework_proto device_context boost xxhash)
cc_library(lite_engine SRCS engine.cc DEPS ${LITE_DEPS} framework_proto ${XPU_DEPS})
cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy ${LITE_DEPS} framework_proto boost device_context ${XPU_DEPS})
cc_test(test_lite_engine SRCS test_engine_lite.cc DEPS lite_engine protobuf framework_proto glog gtest analysis)
cc_test(test_lite_tensor_utils SRCS test_tensor_utils.cc DEPS lite_engine lite_tensor_utils)
19 changes: 19 additions & 0 deletions paddle/fluid/inference/lite/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,25 @@ paddle::lite_api::PaddlePredictor* EngineManager::Create(
cfg.adaptive_seqlen);
#endif

#ifdef LITE_SUBGRAPH_WITH_NPU
lite_cxx_config.set_nnadapter_device_names(cfg.nnadapter_device_names);
lite_cxx_config.set_nnadapter_context_properties(
cfg.nnadapter_context_properties);
lite_cxx_config.set_nnadapter_model_cache_dir(cfg.nnadapter_model_cache_dir);
if (!cfg.nnadapter_subgraph_partition_config_path.empty()) {
lite_cxx_config.set_nnadapter_subgraph_partition_config_path(
cfg.nnadapter_subgraph_partition_config_path);
}
if (!cfg.nnadapter_subgraph_partition_config_buffer.empty()) {
lite_cxx_config.set_nnadapter_subgraph_partition_config_buffer(
cfg.nnadapter_subgraph_partition_config_buffer);
}
for (size_t i = 0; i < cfg.nnadapter_model_cache_token.size(); ++i) {
lite_cxx_config.set_nnadapter_model_cache_buffers(
cfg.nnadapter_model_cache_token[i],
cfg.nnadapter_model_cache_buffer[i]);
}
#endif
// create predictor
std::shared_ptr<paddle::lite_api::PaddlePredictor> p =
paddle::lite_api::CreatePaddlePredictor(lite_cxx_config);
Expand Down
Loading