Skip to content

Commit

Permalink
[Backend] Remove all lite options in RuntimeOption (#1109)
Browse files Browse the repository at this point in the history
* Remove all lite options in RuntimeOption

* Fix code error

* move pybind

* Fix build error
  • Loading branch information
jiangjiajun authored Jan 13, 2023
1 parent c5b01ab commit d00df3d
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 113 deletions.
37 changes: 14 additions & 23 deletions fastdeploy/pybind/runtime.cc
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,17 @@ void BindRuntime(pybind11::module& m) {
.def("use_openvino_backend", &RuntimeOption::UseOpenVINOBackend)
.def("use_lite_backend", &RuntimeOption::UseLiteBackend)
.def("set_lite_device_names", &RuntimeOption::SetLiteDeviceNames)
.def("set_lite_context_properties", &RuntimeOption::SetLiteContextProperties)
.def("set_lite_context_properties",
&RuntimeOption::SetLiteContextProperties)
.def("set_lite_model_cache_dir", &RuntimeOption::SetLiteModelCacheDir)
.def("set_lite_dynamic_shape_info", &RuntimeOption::SetLiteDynamicShapeInfo)
.def("set_lite_subgraph_partition_path", &RuntimeOption::SetLiteSubgraphPartitionPath)
.def("set_lite_mixed_precision_quantization_config_path", &RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath)
.def("set_lite_subgraph_partition_config_buffer", &RuntimeOption::SetLiteSubgraphPartitionConfigBuffer)
.def("set_lite_dynamic_shape_info",
&RuntimeOption::SetLiteDynamicShapeInfo)
.def("set_lite_subgraph_partition_path",
&RuntimeOption::SetLiteSubgraphPartitionPath)
.def("set_lite_mixed_precision_quantization_config_path",
&RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath)
.def("set_lite_subgraph_partition_config_buffer",
&RuntimeOption::SetLiteSubgraphPartitionConfigBuffer)
.def("set_paddle_mkldnn", &RuntimeOption::SetPaddleMKLDNN)
.def("set_openvino_device", &RuntimeOption::SetOpenVINODevice)
.def("set_openvino_shape_info", &RuntimeOption::SetOpenVINOShapeInfo)
Expand Down Expand Up @@ -114,21 +119,7 @@ void BindRuntime(pybind11::module& m) {
.def_readwrite("ipu_available_memory_proportion",
&RuntimeOption::ipu_available_memory_proportion)
.def_readwrite("ipu_enable_half_partial",
&RuntimeOption::ipu_enable_half_partial)
.def_readwrite("kunlunxin_l3_workspace_size",
&RuntimeOption::kunlunxin_l3_workspace_size)
.def_readwrite("kunlunxin_locked",
&RuntimeOption::kunlunxin_locked)
.def_readwrite("kunlunxin_autotune",
&RuntimeOption::kunlunxin_autotune)
.def_readwrite("kunlunxin_autotune_file",
&RuntimeOption::kunlunxin_autotune_file)
.def_readwrite("kunlunxin_precision",
&RuntimeOption::kunlunxin_precision)
.def_readwrite("kunlunxin_adaptive_seqlen",
&RuntimeOption::kunlunxin_adaptive_seqlen)
.def_readwrite("kunlunxin_enable_multi_stream",
&RuntimeOption::kunlunxin_enable_multi_stream);
&RuntimeOption::ipu_enable_half_partial);

pybind11::class_<TensorInfo>(m, "TensorInfo")
.def_readwrite("name", &TensorInfo::name)
Expand All @@ -151,9 +142,9 @@ void BindRuntime(pybind11::module& m) {
auto dtype =
NumpyDataTypeToFDDataType(warm_datas[i][j].dtype());
std::vector<int64_t> data_shape;
data_shape.insert(data_shape.begin(), warm_datas[i][j].shape(),
warm_datas[i][j].shape() +
warm_datas[i][j].ndim());
data_shape.insert(
data_shape.begin(), warm_datas[i][j].shape(),
warm_datas[i][j].shape() + warm_datas[i][j].ndim());
warm_tensors[i][j].Resize(data_shape, dtype);
memcpy(warm_tensors[i][j].MutableData(),
warm_datas[i][j].mutable_data(),
Expand Down
33 changes: 1 addition & 32 deletions fastdeploy/runtime/runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -390,43 +390,12 @@ void Runtime::CreateTrtBackend() {

void Runtime::CreateLiteBackend() {
#ifdef ENABLE_LITE_BACKEND
auto lite_option = LiteBackendOption();
lite_option.threads = option.cpu_thread_num;
lite_option.enable_int8 = option.lite_enable_int8;
lite_option.enable_fp16 = option.lite_enable_fp16;
lite_option.power_mode = static_cast<int>(option.lite_power_mode);
lite_option.optimized_model_dir = option.lite_optimized_model_dir;
lite_option.nnadapter_subgraph_partition_config_path =
option.lite_nnadapter_subgraph_partition_config_path;
lite_option.nnadapter_subgraph_partition_config_buffer =
option.lite_nnadapter_subgraph_partition_config_buffer;
lite_option.nnadapter_device_names = option.lite_nnadapter_device_names;
lite_option.nnadapter_context_properties =
option.lite_nnadapter_context_properties;
lite_option.nnadapter_model_cache_dir = option.lite_nnadapter_model_cache_dir;
lite_option.nnadapter_dynamic_shape_info =
option.lite_nnadapter_dynamic_shape_info;
lite_option.nnadapter_mixed_precision_quantization_config_path =
option.lite_nnadapter_mixed_precision_quantization_config_path;
lite_option.enable_timvx = option.enable_timvx;
lite_option.enable_ascend = option.enable_ascend;
lite_option.enable_kunlunxin = option.enable_kunlunxin;
lite_option.device_id = option.device_id;
lite_option.kunlunxin_l3_workspace_size = option.kunlunxin_l3_workspace_size;
lite_option.kunlunxin_locked = option.kunlunxin_locked;
lite_option.kunlunxin_autotune = option.kunlunxin_autotune;
lite_option.kunlunxin_autotune_file = option.kunlunxin_autotune_file;
lite_option.kunlunxin_precision = option.kunlunxin_precision;
lite_option.kunlunxin_adaptive_seqlen = option.kunlunxin_adaptive_seqlen;
lite_option.kunlunxin_enable_multi_stream =
option.kunlunxin_enable_multi_stream;

FDASSERT(option.model_format == ModelFormat::PADDLE,
"LiteBackend only support model format of ModelFormat::PADDLE");
backend_ = utils::make_unique<LiteBackend>();
auto casted_backend = dynamic_cast<LiteBackend*>(backend_.get());
FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
lite_option),
option.paddle_lite_option),
"Load model from nb file failed while initializing LiteBackend.");
#else
FDASSERT(false,
Expand Down
57 changes: 33 additions & 24 deletions fastdeploy/runtime/runtime_option.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name,
}

void RuntimeOption::UseTimVX() {
enable_timvx = true;
device = Device::TIMVX;
paddle_lite_option.enable_timvx = true;
}

void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
Expand All @@ -95,21 +95,21 @@ void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
const std::string& precision,
bool adaptive_seqlen,
bool enable_multi_stream) {
enable_kunlunxin = true;
device_id = kunlunxin_id;
kunlunxin_l3_workspace_size = l3_workspace_size;
kunlunxin_locked = locked;
kunlunxin_autotune = autotune;
kunlunxin_autotune_file = autotune_file;
kunlunxin_precision = precision;
kunlunxin_adaptive_seqlen = adaptive_seqlen;
kunlunxin_enable_multi_stream = enable_multi_stream;
device = Device::KUNLUNXIN;
paddle_lite_option.enable_kunlunxin = true;
paddle_lite_option.device_id = kunlunxin_id;
paddle_lite_option.kunlunxin_l3_workspace_size = l3_workspace_size;
paddle_lite_option.kunlunxin_locked = locked;
paddle_lite_option.kunlunxin_autotune = autotune;
paddle_lite_option.kunlunxin_autotune_file = autotune_file;
paddle_lite_option.kunlunxin_precision = precision;
paddle_lite_option.kunlunxin_adaptive_seqlen = adaptive_seqlen;
paddle_lite_option.kunlunxin_enable_multi_stream = enable_multi_stream;
}

void RuntimeOption::UseAscend() {
enable_ascend = true;
device = Device::ASCEND;
paddle_lite_option.enable_ascend = true;
}

void RuntimeOption::UseSophgo() {
Expand All @@ -124,6 +124,7 @@ void RuntimeOption::SetExternalStream(void* external_stream) {
void RuntimeOption::SetCpuThreadNum(int thread_num) {
FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
cpu_thread_num = thread_num;
paddle_lite_option.threads = thread_num;
}

void RuntimeOption::SetOrtGraphOptLevel(int level) {
Expand Down Expand Up @@ -231,57 +232,65 @@ void RuntimeOption::SetOpenVINODevice(const std::string& name) {
openvino_device = name;
}

void RuntimeOption::EnableLiteFP16() { lite_enable_fp16 = true; }
void RuntimeOption::EnableLiteFP16() { paddle_lite_option.enable_fp16 = true; }

void RuntimeOption::DisableLiteFP16() { lite_enable_fp16 = false; }
void RuntimeOption::EnableLiteInt8() { lite_enable_int8 = true; }
void RuntimeOption::DisableLiteFP16() {
paddle_lite_option.enable_fp16 = false;
}

void RuntimeOption::EnableLiteInt8() { paddle_lite_option.enable_int8 = true; }

void RuntimeOption::DisableLiteInt8() {
paddle_lite_option.enable_int8 = false;
}

void RuntimeOption::DisableLiteInt8() { lite_enable_int8 = false; }
void RuntimeOption::SetLitePowerMode(LitePowerMode mode) {
lite_power_mode = mode;
paddle_lite_option.power_mode = mode;
}

void RuntimeOption::SetLiteOptimizedModelDir(
const std::string& optimized_model_dir) {
lite_optimized_model_dir = optimized_model_dir;
paddle_lite_option.optimized_model_dir = optimized_model_dir;
}

void RuntimeOption::SetLiteSubgraphPartitionPath(
const std::string& nnadapter_subgraph_partition_config_path) {
lite_nnadapter_subgraph_partition_config_path =
paddle_lite_option.nnadapter_subgraph_partition_config_path =
nnadapter_subgraph_partition_config_path;
}

void RuntimeOption::SetLiteSubgraphPartitionConfigBuffer(
const std::string& nnadapter_subgraph_partition_config_buffer) {
lite_nnadapter_subgraph_partition_config_buffer =
paddle_lite_option.nnadapter_subgraph_partition_config_buffer =
nnadapter_subgraph_partition_config_buffer;
}

void RuntimeOption::SetLiteDeviceNames(
const std::vector<std::string>& nnadapter_device_names) {
lite_nnadapter_device_names = nnadapter_device_names;
paddle_lite_option.nnadapter_device_names = nnadapter_device_names;
}

void RuntimeOption::SetLiteContextProperties(
const std::string& nnadapter_context_properties) {
lite_nnadapter_context_properties = nnadapter_context_properties;
paddle_lite_option.nnadapter_context_properties =
nnadapter_context_properties;
}

void RuntimeOption::SetLiteModelCacheDir(
const std::string& nnadapter_model_cache_dir) {
lite_nnadapter_model_cache_dir = nnadapter_model_cache_dir;
paddle_lite_option.nnadapter_model_cache_dir = nnadapter_model_cache_dir;
}

void RuntimeOption::SetLiteDynamicShapeInfo(
const std::map<std::string, std::vector<std::vector<int64_t>>>&
nnadapter_dynamic_shape_info) {
lite_nnadapter_dynamic_shape_info = nnadapter_dynamic_shape_info;
paddle_lite_option.nnadapter_dynamic_shape_info =
nnadapter_dynamic_shape_info;
}

void RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath(
const std::string& nnadapter_mixed_precision_quantization_config_path) {
lite_nnadapter_mixed_precision_quantization_config_path =
paddle_lite_option.nnadapter_mixed_precision_quantization_config_path =
nnadapter_mixed_precision_quantization_config_path;
}

Expand Down
39 changes: 5 additions & 34 deletions fastdeploy/runtime/runtime_option.h
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,8 @@ struct FASTDEPLOY_DECL RuntimeOption {
bool enable_half_partial = false);

Backend backend = Backend::UNKNOWN;
// for cpu inference and preprocess

// for cpu inference
// default will let the backend choose their own default value
int cpu_thread_num = -1;
int device_id = 0;
Expand Down Expand Up @@ -388,31 +389,6 @@ struct FASTDEPLOY_DECL RuntimeOption {
float ipu_available_memory_proportion = 1.0;
bool ipu_enable_half_partial = false;

// ======Only for Paddle Lite Backend=====
// 0: LITE_POWER_HIGH 1: LITE_POWER_LOW 2: LITE_POWER_FULL
// 3: LITE_POWER_NO_BIND 4: LITE_POWER_RAND_HIGH
// 5: LITE_POWER_RAND_LOW
LitePowerMode lite_power_mode = LitePowerMode::LITE_POWER_NO_BIND;
// enable int8 or not
bool lite_enable_int8 = false;
// enable fp16 or not
bool lite_enable_fp16 = false;
// optimized model dir for CxxConfig
std::string lite_optimized_model_dir = "";
std::string lite_nnadapter_subgraph_partition_config_path = "";
// and other nnadapter settings for CxxConfig
std::string lite_nnadapter_subgraph_partition_config_buffer = "";
std::string lite_nnadapter_context_properties = "";
std::string lite_nnadapter_model_cache_dir = "";
std::string lite_nnadapter_mixed_precision_quantization_config_path = "";
std::map<std::string, std::vector<std::vector<int64_t>>>
lite_nnadapter_dynamic_shape_info = {{"", {{0}}}};
std::vector<std::string> lite_nnadapter_device_names = {};

bool enable_timvx = false;
bool enable_ascend = false;
bool enable_kunlunxin = false;

// ======Only for Trt Backend=======
std::map<std::string, std::vector<int32_t>> trt_max_shape;
std::map<std::string, std::vector<int32_t>> trt_min_shape;
Expand Down Expand Up @@ -444,14 +420,9 @@ struct FASTDEPLOY_DECL RuntimeOption {
fastdeploy::rknpu2::CoreMask rknpu2_core_mask_ =
fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO;

// ======Only for KunlunXin XPU Backend=======
int kunlunxin_l3_workspace_size = 0xfffc00;
bool kunlunxin_locked = false;
bool kunlunxin_autotune = true;
std::string kunlunxin_autotune_file = "";
std::string kunlunxin_precision = "int16";
bool kunlunxin_adaptive_seqlen = false;
bool kunlunxin_enable_multi_stream = false;

/// Option to configure Paddle Lite backend
LiteBackendOption paddle_lite_option;

std::string model_file = ""; // Path of model file
std::string params_file = ""; // Path of parameters file, can be empty
Expand Down

0 comments on commit d00df3d

Please sign in to comment.