diff --git a/.gitignore b/.gitignore
index a3cb6dcb9e..d61f64c9c3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,8 @@ FastDeploy.cmake
 build-debug.sh
 *dist
 fastdeploy.egg-info
+fastdeploy_python.egg-info
+fastdeploy_gpu_python.egg-info
 .setuptools-cmake-build
 fastdeploy/version.py
 fastdeploy/core/config.h
@@ -15,7 +17,5 @@ fastdeploy/LICENSE*
 fastdeploy/ThirdPartyNotices*
 *.so*
 fastdeploy/libs/third_libs
-csrcs/fastdeploy/core/config.h
-csrcs/fastdeploy/pybind/main.cc
-fastdeploy_gpu_python.egg-info
-
+csrc/fastdeploy/core/config.h
+csrc/fastdeploy/pybind/main.cc
diff --git a/CMakeLists.txt b/CMakeLists.txt
index eb02982d75..ea44dcb864 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -42,6 +42,7 @@ option(WITH_GPU "Whether WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernc
 option(ENABLE_ORT_BACKEND "Whether to enable onnxruntime backend." OFF)
 option(ENABLE_TRT_BACKEND "Whether to enable tensorrt backend." OFF)
 option(ENABLE_PADDLE_BACKEND "Whether to enable paddle backend." OFF)
+option(ENABLE_OPENVINO_BACKEND "Whether to enable paddle backend." OFF)
 option(CUDA_DIRECTORY "If build tensorrt backend, need to define path of cuda library.")
 option(TRT_DIRECTORY "If build tensorrt backend, need to define path of tensorrt library.")
 option(ENABLE_VISION "Whether to enable vision models usage." OFF)
@@ -55,6 +56,9 @@ option(ENABLE_FDTENSOR_FUNC "Whether to compile with function of FDTensor." OFF)
 option(ENABLE_OPENCV_CUDA "Whether to enable opencv with cuda, this will allow process image with GPU." OFF)
 option(ENABLE_DEBUG "Whether to enable print debug information, this may reduce performance." OFF)
 
+# Whether to build fastdeply with vision/text/... examples, only for testings.
+option(BUILD_EXAMPLES "Whether to build fastdeply with vision examples" OFF)
+
 # config GIT_URL with github mirrors to speed up dependent repos clone
 option(GIT_URL "Git URL to clone dependent repos" ${GIT_URL})
 if(NOT GIT_URL)
@@ -98,16 +102,27 @@ set(HEAD_DIR "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}")
 include_directories(${HEAD_DIR})
 include_directories(${CMAKE_CURRENT_BINARY_DIR})
 
+if(BUILD_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
+  if(ENABLE_VISION)
+    # ENABLE_VISION_VISUALIZE must be ON if enable vision examples.
+    message(STATUS "Found BUILD_EXAMPLES and ENABLE_VISION ON, so, force ENABLE_VISION_VISUALIZE ON")
+    set(ENABLE_VISION_VISUALIZE ON CACHE BOOL "force to enable visualize vision model result toolbox" FORCE)
+  else()
+    message(WARNING "BUILD_EXAMPLES is ON, but found ENABLE_VISION OFF, will skip vision examples.")
+  endif()
+endif()
+
 add_definitions(-DFASTDEPLOY_LIB)
 file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*.cc)
 file(GLOB_RECURSE FDTENSOR_FUNC_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cc)
 file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/ort/*.cc)
 file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/paddle/*.cc)
 file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp)
+file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/openvino/*.cc)
 file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc)
 file(GLOB_RECURSE DEPLOY_TEXT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/text/*.cc)
 file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*_pybind.cc)
-list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS} ${FDTENSOR_FUNC_SRCS})
+list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS} ${FDTENSOR_FUNC_SRCS})
 
 set(DEPEND_LIBS "")
 
@@ -144,6 +159,13 @@ if(ENABLE_PADDLE_BACKEND)
   endif()
 endif()
 
+if(ENABLE_OPENVINO_BACKEND)
+  add_definitions(-DENABLE_OPENVINO_BACKEND)
+  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_OPENVINO_SRCS})
+  include(external/openvino.cmake)
+  list(APPEND DEPEND_LIBS external_openvino)
+endif()
+
 if(WITH_GPU)
   if(APPLE)
     message(FATAL_ERROR "Cannot enable GPU while compling in Mac OSX.")
@@ -273,6 +295,15 @@ if(MSVC)
 endif()
 target_link_libraries(${LIBRARY_NAME} ${DEPEND_LIBS})
 
+# add examples after prepare include paths for third-parties
+if(BUILD_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples)
+  add_definitions(-DBUILD_EXAMPLES)
+  if(NOT EXECUTABLE_OUTPUT_PATH STREQUAL ${CMAKE_CURRENT_BINARY_DIR}/bin)
+    set(EXECUTABLE_OUTPUT_PATH ${CMAKE_CURRENT_BINARY_DIR}/bin)
+  endif()
+  add_subdirectory(examples)
+endif()
+
 if (WITH_TESTING AND EXISTS ${PROJECT_SOURCE_DIR}/tests)
   add_definitions(-DWITH_TESTING)
   include(external/gtest.cmake)
diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in
index 6ee4f490ee..816bd7f0cf 100644
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -3,7 +3,9 @@ CMAKE_MINIMUM_REQUIRED (VERSION 3.12)
 set(WITH_GPU @WITH_GPU@)
 set(ENABLE_ORT_BACKEND @ENABLE_ORT_BACKEND@)
 set(ENABLE_PADDLE_BACKEND @ENABLE_PADDLE_BACKEND@)
+set(ENABLE_OPENVINO_BACKEND @ENABLE_OPENVINO_BACKEND@)
 set(PADDLEINFERENCE_VERSION @PADDLEINFERENCE_VERSION@)
+set(OPENVINO_VERSION @OPENVINO_VERSION@)
 set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND@)
 set(ENABLE_PADDLE_FRONTEND @ENABLE_PADDLE_FRONTEND@)
 set(ENABLE_VISION @ENABLE_VISION@)
@@ -45,6 +47,11 @@ if(ENABLE_PADDLE_BACKEND)
   endif()
 endif()
 
+if(ENABLE_OPENVINO_BACKEND)
+  find_library(OPENVINO_LIB openvino ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/openvino/lib/ NO_DEFAULT_PATH)
+  list(APPEND FASTDEPLOY_LIBS ${OPENVINO_LIB})
+endif()
+
 if(WITH_GPU)
   if (NOT CUDA_DIRECTORY)
     set(CUDA_DIRECTORY "/usr/local/cuda")
@@ -101,6 +108,8 @@ if (ENABLE_TEXT)
   find_library(FASTER_TOKENIZER_LIB core_tokenizers ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/faster_tokenizer/lib NO_DEFAULT_PATH)
   list(APPEND FASTDEPLOY_LIBS ${FASTER_TOKENIZER_LIB})
   list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/faster_tokenizer/include)
+   # TODO (zhoushunjie): Will remove it later.
+  list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/faster_tokenizer/include/faster_tokenizer)
   list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/faster_tokenizer/third_party/include)
 endif()
 
@@ -124,6 +133,10 @@ message(STATUS "  ENABLE_PADDLE_BACKEND     : ${ENABLE_PADDLE_BACKEND}")
 if(ENABLE_PADDLE_BACKEND)
   message(STATUS "  Paddle Inference version  : ${PADDLEINFERENCE_VERSION}")
 endif()
+message(STATUS "  ENABLE_OPENVINO_BACKEND   : ${ENABLE_OPENVINO_BACKEND}")
+if(ENABLE_OPENVINO_BACKEND)
+  message(STATUS "  OpenVINO version          : ${OPENVINO_VERSION}")
+endif()
 message(STATUS "  ENABLE_TRT_BACKEND        : ${ENABLE_TRT_BACKEND}")
 message(STATUS "  ENABLE_VISION             : ${ENABLE_VISION}")
 message(STATUS "  ENABLE_TEXT               : ${ENABLE_TEXT}")
diff --git a/README.md b/README.md
index 8d414580ac..71ce08a0d7 100644
--- a/README.md
+++ b/README.md
@@ -14,68 +14,63 @@
     <a href="https://github.com/PaddlePaddle/FastDeploy/stargazers"><img src="https://img.shields.io/github/stars/PaddlePaddle/FastDeploy?color=ccf"></a>
 </p>
 
-**⚡️FastDeploy**是一款**简单易用**的推理部署工具箱。覆盖业界主流**优质预训练模型**并提供**开箱即用**的开发体验，包括图像分类、目标检测、图像分割、人脸检测、人脸识别、人体关键点识别、文字识别等多任务，满足开发者**多场景**，**多硬件**、**多平台**的快速部署需求。
+**⚡️FastDeploy**是一款**易用高效**的推理部署开发套件。覆盖业界主流**优质预训练模型**并提供**开箱即用**的部署体验，包括图像分类、目标检测、图像分割、人脸检测、人脸识别、人体关键点识别、文字识别等多任务，满足开发者**多场景**，**多硬件**、**多平台**的便捷高效的产业级部署需求。
 
 ## 近期更新
 
 - 🔥 **2022.8.18：发布FastDeploy [release/v0.2.0](https://github.com/PaddlePaddle/FastDeploy/releases/tag/release%2F0.2.0)** <br>
-    - **服务端全新升级：一套SDK，覆盖全量模型**  
-        - 发布基于x86 CPU、NVIDIA GPU的易用、高性能推理引擎SDK，推理速度大幅提升
-        - 支持ONNXRuntime、Paddle Inference、TensorRT推理引擎
-        - 支持YOLOv7、YOLOv6、YOLOv5、PP-YOLOE等目标检测最优模型及[Demo示例](examples/vision/detection/)
+    - **服务端部署全新升级：更快的推理性能，更多的视觉模型支持**  
+        - 发布基于x86 CPU、NVIDIA GPU的高性能推理引擎SDK，推理速度大幅提升
+        - 集成Paddle Inference、ONNX Runtime、TensorRT等推理引擎并提供统一的部署体验
+        - 支持YOLOv7、YOLOv6、YOLOv5、PP-YOLOE等全系列目标检测模型并提供[端到端部署示例](examples/vision/detection/)
         - 支持人脸检测、人脸识别、实时人像抠图、图像分割等40+重点模型及[Demo示例](examples/vision/)
-        - 支持Python API 和 C++ API
-        - 开发AI模型部署代码量减少～60%
-    - **端侧继ARM CPU后，延伸至瑞芯微、晶晨、恩智浦等NPU能力**
+        - 支持Python和C++两种语言部署
+    - **端侧部署新增瑞芯微、晶晨、恩智浦等NPU芯片部署能力**
         - 发布轻量化目标检测[Picodet-NPU部署Demo](https://github.com/PaddlePaddle/Paddle-Lite-Demo/tree/develop/object_detection/linux/picodet_detection)，提供低门槛INT8全量化能力
 
-## 内容目录
-* **服务端**
-    * [服务端快速开始](#fastdeploy-quick-start)  
-      * [快速安装](#fastdeploy-quick-start)
-      * [Python预测示例](#fastdeploy-quick-start-python)  
-      * [C++预测示例](#fastdeploy-quick-start-cpp)
-    * [服务端模型列表](#fastdeploy-server-models)
-* **端侧**
-    * [端侧文档](#fastdeploy-edge-doc)
-      * [ARM CPU端部署](#fastdeploy-edge-sdk-arm-linux)  
-      * [ARM CPU移动端部署](#fastdeploy-edge-sdk-ios-android)  
-      * [ARM CPU自定义模型](#fastdeploy-edge-sdk-custom)  
-      * [NPU端部署](#fastdeploy-edge-sdk-npu)
-   * [端侧模型列表](#fastdeploy-edge-sdk)
+## 目录
+* **服务端部署**
+    * [FastDeploy Python SDK快速开始](#fastdeploy-quick-start-python)  
+    * [FastDeploy C++ SDK快速开始](#fastdeploy-quick-start-cpp)
+    * [服务端模型支持列表](#fastdeploy-server-models)
+* **端侧部署**
+    * [EasyEdge边缘端部署](#fastdeploy-edge-sdk-arm-linux)  
+    * [EasyEdge移动端部署](#fastdeploy-edge-sdk-ios-android)  
+    * [EasyEdge自定义模型部署](#fastdeploy-edge-sdk-custom)  
+    * [Paddle Lite NPU部署](#fastdeploy-edge-sdk-npu)
+    * [端侧模型支持列表](#fastdeploy-edge-sdk)
 * [社区交流](#fastdeploy-community)
 * [Acknowledge](#fastdeploy-acknowledge)  
 * [License](#fastdeploy-license)
 
-## 1. 服务端快速开始
+## 服务端部署
 
-<div id="fastdeploy-quick-start"></div>
+### FastDeploy Python SDK快速开始
+<div id="fastdeploy-quick-start-python"></div>
 
-### 1.1 快速安装 FastDeploy Python/C++ 库
+#### 快速安装
 
-#### 环境依赖
+##### 前置依赖
+- CUDA >= 11.2
+- cuDNN >= 8.0
+- python >= 3.8
+- OS: Linux x86_64/macOS/Windows 10
 
-- Linux x64/aarch64
-- Windows 10
-- Mac OSX x86/arm64
-- cuda >= 11.2
-- cudnn >= 8.0
-- python 3.6\~3.9(Windows 10 3.8\~3.9)
+##### 安装GPU版本
 
-#### 安装 CPU Python 版本
-```bash
-pip install numpy opencv-python fastdeploy-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html
-```
-#### 安装 GPU Python 版本
 ```bash
 pip install numpy opencv-python fastdeploy-gpu-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html
 ```
-#### 安装 C++ 版本
 
-- 参考[C++预编译库下载](docs/quick_start/CPP_prebuilt_libraries.md)文档  
+##### 安装CPU版本
 
+```bash
+pip install numpy opencv-python fastdeploy-python -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html
+```
+
+#### Python 推理示例
 
-#### 准备目标检测模型和测试图片
+* 准备模型和图片
 
 ```bash
 wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
@@ -83,11 +78,7 @@ tar xvf ppyoloe_crn_l_300e_coco.tgz
 wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
 ```
 
-
-### 1.2 Python预测示例
-
-<div id="fastdeploy-quick-start-python"></div>
-
+* 测试推理结果
 ```python
 # GPU/TensorRT部署参考 examples/vision/detection/paddledetection/python
 import cv2
@@ -95,7 +86,7 @@ import fastdeploy.vision as vision
 
 model = vision.detection.PPYOLOE("ppyoloe_crn_l_300e_coco/model.pdmodel",
                                  "ppyoloe_crn_l_300e_coco/model.pdiparams",
-                                 "ppyoloe_crn_l_300e_coco/nfer_cfg.yml")
+                                 "ppyoloe_crn_l_300e_coco/infer_cfg.yml")
 im = cv2.imread("000000014439.jpg")
 result = model.predict(im.copy())
 print(result)
@@ -104,10 +95,25 @@ vis_im = vision.vis_detection(im, result, score_threshold=0.5)
 cv2.imwrite("vis_image.jpg", vis_im)
 ```
 
-### 1.3 C++预测示例
-
+### FastDeploy C++ SDK快速开始
 <div id="fastdeploy-quick-start-cpp"></div>
 
+#### 安装
+
+- 参考[C++预编译库下载](docs/quick_start/CPP_prebuilt_libraries.md)文档  
+
+#### C++ 推理示例
+
+* 准备模型和图片
+
+```bash
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+```
+
+* 测试推理结果
+
 ```C++
 // GPU/TensorRT部署参考 examples/vision/detection/paddledetection/cpp
 #include "fastdeploy/vision.h"
@@ -129,7 +135,7 @@ int main(int argc, char* argv[]) {
 
 更多部署案例请参考[视觉模型部署示例](examples/vision) .
 
-## 2. 服务端模型列表 🔥🔥🔥
+### 服务端模型支持列表 🔥🔥🔥
 
 <div id="fastdeploy-server-models"></div>
 
@@ -140,8 +146,8 @@ int main(int argc, char* argv[]) {
 | :--------:  | :--------: | :--------: | :--------: | :--------: | :--------: | :--------: | :--------: | :--------: | :--------: |:--------: |
 |  --- | --- |  --- |  <font size=2> X86 CPU |  <font size=2> NVIDIA GPU |  <font size=2> Intel  CPU |  <font size=2> NVIDIA GPU |  <font size=2> Intel CPU |  <font size=2> Arm CPU   | <font size=2>  AArch64 CPU  | <font size=2> NVIDIA Jetson |
 | <font size=2> Classification | <font size=2> [PaddleClas/ResNet50](./examples/vision/classification/paddleclas) | <font size=2> [Python](./examples/vision/classification/paddleclas/python)/[C++](./examples/vision/classification/paddleclas/cpp) |✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ |
-| <font size=2> Classification | <font size=2> [PaddleClas/PPLCNet](./examples/vision/classification/paddleclas) | <font size=2> [Python](./examples/vision/classification/paddleclas/python)/[C++](./examples/vision/classification/paddleclas/cpp) |   ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ |
-| <font size=2> Classification | <font size=2> [PaddleClas/PPLCNetv2](./examples/vision/classification/paddleclas) | <font size=2> [Python](./examples/vision/classification/paddleclas/python)/[C++](./examples/vision/classification/paddleclas/cpp) |  ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ |
+| <font size=2> Classification | <font size=2> [PaddleClas/PP-LCNet](./examples/vision/classification/paddleclas) | <font size=2> [Python](./examples/vision/classification/paddleclas/python)/[C++](./examples/vision/classification/paddleclas/cpp) |   ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ |
+| <font size=2> Classification | <font size=2> [PaddleClas/PP-LCNetv2](./examples/vision/classification/paddleclas) | <font size=2> [Python](./examples/vision/classification/paddleclas/python)/[C++](./examples/vision/classification/paddleclas/cpp) |  ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ |
 | <font size=2> Classification | <font size=2> [PaddleClas/EfficientNet](./examples/vision/classification/paddleclas) | <font size=2> [Python](./examples/vision/classification/paddleclas/python)/[C++](./examples/vision/classification/paddleclas/cpp) |  ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ |
 | <font size=2> Classification | <font size=2> [PaddleClas/GhostNet](./examples/vision/classification/paddleclas) | <font size=2> [Python](./examples/vision/classification/paddleclas/python)/[C++](./examples/vision/classification/paddleclas/cpp) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ |
 | <font size=2> Classification | <font size=2> [PaddleClas/MobileNetV1](./examples/vision/classification/paddleclas) | <font size=2> [Python](./examples/vision/classification/paddleclas/python)/[C++](./examples/vision/classification/paddleclas/cpp) |  ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ |
@@ -150,14 +156,14 @@ int main(int argc, char* argv[]) {
 | <font size=2> Classification | <font size=2> [PaddleClas/ShuffleNetV2](./examples/vision/classification/paddleclas) | <font size=2> [Python](./examples/vision/classification/paddleclas/python)/[C++](./examples/vision/classification/paddleclas/cpp) |  ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ |
 | <font size=2> Classification | <font size=2> [PaddleClas/SqueeezeNetV1.1](./examples/vision/classification/paddleclas) | <font size=2> [Python](./examples/vision/classification/paddleclas/python)/[C++](./examples/vision/classification/paddleclas/cpp) |  ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ |
 | <font size=2> Classification | <font size=2> [PaddleClas/Inceptionv3](./examples/vision/classification/paddleclas) | <font size=2> [Python](./examples/vision/classification/paddleclas/python)/[C++](./examples/vision/classification/paddleclas/cpp) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ |
-| <font size=2> Classification | <font size=2> [PaddleClas/PPHGNet](./examples/vision/classification/paddleclas) | <font size=2> [Python](./examples/vision/classification/paddleclas/python)/[C++](./examples/vision/classification/paddleclas/cpp) |✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ |
+| <font size=2> Classification | <font size=2> [PaddleClas/PP-HGNet](./examples/vision/classification/paddleclas) | <font size=2> [Python](./examples/vision/classification/paddleclas/python)/[C++](./examples/vision/classification/paddleclas/cpp) |✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ |
 | <font size=2> Classification | <font size=2> [PaddleClas/SwinTransformer](./examples/vision/classification/paddleclas) | <font size=2> [Python](./examples/vision/classification/paddleclas/python)/[C++](./examples/vision/classification/paddleclas/cpp) |  ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ |
-| <font size=2> Detection | <font size=2> [PaddleDetection/PPYOLOE](./examples/vision/detection/paddledetection) | <font size=2> [Python](./examples/vision/detection/paddledetection/python)/[C++](./examples/vision/detection/paddledetection/cpp) |  ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ |
+| <font size=2> Detection | <font size=2> [PaddleDetection/PP-YOLOE](./examples/vision/detection/paddledetection) | <font size=2> [Python](./examples/vision/detection/paddledetection/python)/[C++](./examples/vision/detection/paddledetection/cpp) |  ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ |
 | <font size=2> Detection | <font size=2> [PaddleDetection/PicoDet](./examples/vision/detection/paddledetection) | <font size=2> [Python](./examples/vision/detection/paddledetection/python)/[C++](./examples/vision/detection/paddledetection/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ✅ |  ✅ | ❔ |
 | <font size=2> Detection | <font size=2> [PaddleDetection/YOLOX](./examples/vision/detection/paddledetection) | <font size=2> [Python](./examples/vision/detection/paddledetection/python)/[C++](./examples/vision/detection/paddledetection/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ✅ |  ✅ | ❔ |
 | <font size=2> Detection | <font size=2> [PaddleDetection/YOLOv3](./examples/vision/detection/paddledetection) | <font size=2> [Python](./examples/vision/detection/paddledetection/python)/[C++](./examples/vision/detection/paddledetection/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ✅ |  ✅ | ❔ |
-| <font size=2> Detection | <font size=2> [PaddleDetection/PPYOLO](./examples/vision/detection/paddledetection) | <font size=2> [Python](./examples/vision/detection/paddledetection/python)/[C++](./examples/vision/detection/paddledetection/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ❌ | ❌ | ❔ |
-| <font size=2> Detection | <font size=2> [PaddleDetection/PPYOLOv2](./examples/vision/detection/paddledetection) | <font size=2> [Python](./examples/vision/detection/paddledetection/python)/[C++](./examples/vision/detection/paddledetection/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ❌ | ❌ | ❔ |
+| <font size=2> Detection | <font size=2> [PaddleDetection/PP-YOLO](./examples/vision/detection/paddledetection) | <font size=2> [Python](./examples/vision/detection/paddledetection/python)/[C++](./examples/vision/detection/paddledetection/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ❌ | ❌ | ❔ |
+| <font size=2> Detection | <font size=2> [PaddleDetection/PP-YOLOv2](./examples/vision/detection/paddledetection) | <font size=2> [Python](./examples/vision/detection/paddledetection/python)/[C++](./examples/vision/detection/paddledetection/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ❌ | ❌ | ❔ |
 | <font size=2> Detection | <font size=2> [PaddleDetection/FasterRCNN](./examples/vision/detection/paddledetection) | <font size=2> [Python](./examples/vision/detection/paddledetection/python)/[C++](./examples/vision/detection/paddledetection/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ | ❌ | ❌ | ❔ |
 | <font size=2> Detection | <font size=2> [Megvii-BaseDetection/YOLOX](./examples/vision/detection/yolox) | <font size=2> [Python](./examples/vision/detection/yolox/python)/[C++](./examples/vision/detection/yolox/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ✅ |  ✅ | ❔ |
 | <font size=2> Detection | <font size=2> [WongKinYiu/YOLOv7](./examples/vision/detection/yolov7) | <font size=2> [Python](./examples/vision/detection/yolov7/python)/[C++](./examples/vision/detection/yolov7/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ✅ |  ✅ | ❔ |
@@ -167,10 +173,10 @@ int main(int argc, char* argv[]) {
 | <font size=2> Detection | <font size=2> [WongKinYiu/ScaledYOLOv4](./examples/vision/detection/scaledyolov4) | <font size=2> [Python](./examples/vision/detection/scaledyolov4/python)/[C++](./examples/vision/detection/scaledyolov4/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ✅ |  ✅ | ❔ |
 | <font size=2> Detection | <font size=2> [ppogg/YOLOv5Lite](./examples/vision/detection/yolov5lite) | <font size=2> [Python](./examples/vision/detection/yolov5lite/python)/[C++](./examples/vision/detection/yolov5lite/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ✅ |  ✅ | ❔ |
 | <font size=2> Detection | <font size=2> [RangiLyu/NanoDetPlus](./examples/vision/detection/nanodet_plus) | <font size=2> [Python](./examples/vision/detection/nanodet_plus/python)/[C++](./examples/vision/detection/nanodet_plus/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ✅ |  ✅ | ❔ |
-| <font size=2> Segmentation | <font size=2> [PaddleSeg/PPLiteSeg](./examples/vision/segmentation/paddleseg) | <font size=2> [Python](./examples/vision/segmentation/paddleseg/python)/[C++](./examples/vision/segmentation/paddleseg/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ✅ |  ✅ | ❔ |
-| <font size=2> Segmentation | <font size=2> [PaddleSeg/PPHumanSegLite](./examples/vision/segmentation/paddleseg) | <font size=2> [Python](./examples/vision/segmentation/paddleseg/python)/[C++](./examples/vision/segmentation/paddleseg/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ✅ |  ✅ | ❔ |
+| <font size=2> Segmentation | <font size=2> [PaddleSeg/PP-LiteSeg](./examples/vision/segmentation/paddleseg) | <font size=2> [Python](./examples/vision/segmentation/paddleseg/python)/[C++](./examples/vision/segmentation/paddleseg/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ✅ |  ✅ | ❔ |
+| <font size=2> Segmentation | <font size=2> [PaddleSeg/PP-HumanSegLite](./examples/vision/segmentation/paddleseg) | <font size=2> [Python](./examples/vision/segmentation/paddleseg/python)/[C++](./examples/vision/segmentation/paddleseg/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ✅ |  ✅ | ❔ |
 | <font size=2> Segmentation | <font size=2> [PaddleSeg/HRNet](./examples/vision/segmentation/paddleseg) | <font size=2> [Python](./examples/vision/segmentation/paddleseg/python)/[C++](./examples/vision/segmentation/paddleseg/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ✅ |  ✅ | ❔ |
-| <font size=2> Segmentation | <font size=2> [PaddleSeg/PPHumanSegServer](./examples/vision/segmentation/paddleseg) | <font size=2> [Python](./examples/vision/segmentation/paddleseg/python)/[C++](./examples/vision/segmentation/paddleseg/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ✅ |  ✅ | ❔ |
+| <font size=2> Segmentation | <font size=2> [PaddleSeg/PP-HumanSegServer](./examples/vision/segmentation/paddleseg) | <font size=2> [Python](./examples/vision/segmentation/paddleseg/python)/[C++](./examples/vision/segmentation/paddleseg/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ✅ |  ✅ | ❔ |
 | <font size=2> Segmentation | <font size=2> [PaddleSeg/Unet](./examples/vision/segmentation/paddleseg) | <font size=2> [Python](./examples/vision/segmentation/paddleseg/python)/[C++](./examples/vision/segmentation/paddleseg/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ✅ |  ✅ | ❔ |
 | <font size=2> Segmentation | <font size=2> [PaddleSeg/Deeplabv3](./examples/vision/segmentation/paddleseg) | <font size=2> [Python](./examples/vision/segmentation/paddleseg/python)/[C++](./examples/vision/segmentation/paddleseg/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ✅ |  ✅ | ❔ |
 | <font size=2> FaceDetection | <font size=2> [biubug6/RetinaFace](./examples/vision/facedet/retinaface) | <font size=2> [Python](./examples/vision/facedet/retinaface/python)/[C++](./examples/vision/facedet/retinaface/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ✅ |  ✅ | ❔ |
@@ -184,11 +190,11 @@ int main(int argc, char* argv[]) {
 | <font size=2> Matting | <font size=2> [ZHKKKe/MODNet](./examples/vision/matting/modnet) | <font size=2> [Python](./examples/vision/matting/modnet/python)/[C++](./examples/vision/matting/modnet/cpp) |  ✅       |  ✅    |  ✅     |  ✅    |  ✅ |  ✅ |  ✅ | ❔ |
 
 
-## 3. 端侧文档
+## 端侧部署
 
 <div id="fastdeploy-edge-doc"></div>
 
-### 3.1 端侧部署
+### EasyEdge边缘端部署
 
 <div id="fastdeploy-edge-sdk-arm-linux"></div>
 
@@ -198,26 +204,26 @@ int main(int argc, char* argv[]) {
   - [Python Inference部署](./docs/ARM-CPU/ARM-Linux-Python-SDK-Inference.md)
   - [Python 服务化部署](./docs/ARM-CPU/ARM-Linux-Python-SDK-Serving.md)
 
-### 3.2 移动端部署
+### EasyEdge移动端部署
 
 <div id="fastdeploy-edge-sdk-ios-android"></div>
 
 - [iOS 系统部署](./docs/ARM-CPU/iOS-SDK.md)
 - [Android 系统部署](./docs/ARM-CPU/Android-SDK.md)  
 
-### 3.3 自定义模型部署
+### EasyEdge自定义模型部署
 
 <div id="fastdeploy-edge-sdk-custom"></div>
 
 - [快速实现个性化模型替换](./docs/ARM-CPU/Replace-Model-With-Anther-One.md)
 
-### 3.4 NPU部署
+### Paddle Lite NPU部署
 
 <div id="fastdeploy-edge-sdk-npu"></div>
 
 - [瑞芯微-NPU/晶晨-NPU/恩智浦-NPU](https://github.com/PaddlePaddle/Paddle-Lite-Demo/tree/develop/object_detection/linux/picodet_detection)
 
-## 4. 端侧模型列表
+### 端侧模型支持列表
 
 <div id="fastdeploy-edge-sdk"></div>
 
@@ -259,7 +265,7 @@ int main(int argc, char* argv[]) {
 | OCR                | PP-OCRv3                     | 2.4+10.6              | ✅                     | ✅                      | ✅                     |❔ | ❔ | ❔  |❔|
 | OCR                | PP-OCRv3-tiny                | 2.4+10.7              | ✅                     | ✅                      | ✅                     |--  | --  | --    |--|
 
-## 5. 社区交流
+## 社区交流
 
 <div id="fastdeploy-community"></div>
 
@@ -269,13 +275,13 @@ int main(int argc, char* argv[]) {
 <img src="https://user-images.githubusercontent.com/54695910/175854075-2c0f9997-ed18-4b17-9aaf-1b43266d3996.jpeg"  width = "200" height = "200" />
 </div>
 
-## 6. Acknowledge
+## Acknowledge
 
 <div id="fastdeploy-acknowledge"></div>
 
-本项目中SDK生成和下载使用了[EasyEdge](https://ai.baidu.com/easyedge/app/openSource)中的免费开放能力，再次表示感谢。
+本项目中SDK生成和下载使用了[EasyEdge](https://ai.baidu.com/easyedge/app/openSource)中的免费开放能力，在此表示感谢。
 
-## 7. License
+## License
 
 <div id="fastdeploy-license"></div>
 
diff --git a/ThirdPartyNotices.txt b/ThirdPartyNotices.txt
index fa9df0fbab..ec873fbc31 100644
--- a/ThirdPartyNotices.txt
+++ b/ThirdPartyNotices.txt
@@ -941,3 +941,208 @@ THE SOFTWARE.
    terms, and open source software license terms. These separate license terms
    govern your use of the third party programs as set forth in the
    "THIRD-PARTY-PROGRAMS" file.
+
+--------
+8. https://github.com/openvinotoolkit/openvino
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/VERSION_NUMBER b/VERSION_NUMBER
index 0ea3a944b3..0c62199f16 100644
--- a/VERSION_NUMBER
+++ b/VERSION_NUMBER
@@ -1 +1 @@
-0.2.0
+0.2.1
diff --git a/csrc/fastdeploy/backends/backend.h b/csrc/fastdeploy/backends/backend.h
index de7b5a575f..6d2e11f662 100644
--- a/csrc/fastdeploy/backends/backend.h
+++ b/csrc/fastdeploy/backends/backend.h
@@ -14,12 +14,12 @@
 
 #pragma once
 
+#include "fastdeploy/backends/common/multiclass_nms.h"
+#include "fastdeploy/core/fd_tensor.h"
 #include <iostream>
 #include <memory>
 #include <string>
 #include <vector>
-#include "fastdeploy/backends/common/multiclass_nms.h"
-#include "fastdeploy/core/fd_tensor.h"
 
 namespace fastdeploy {
 
@@ -27,6 +27,20 @@ struct TensorInfo {
   std::string name;
   std::vector<int> shape;
   FDDataType dtype;
+
+  friend std::ostream& operator<<(std::ostream& output,
+                                  const TensorInfo& info) {
+    output << "TensorInfo(name: " << info.name << ", shape: [";
+    for (size_t i = 0; i < info.shape.size(); ++i) {
+      if (i == info.shape.size() - 1) {
+        output << info.shape[i];
+      } else {
+        output << info.shape[i] << ", ";
+      }
+    }
+    output << "], dtype: " << Str(info.dtype) << ")";
+    return output;
+  }
 };
 
 class BaseBackend {
diff --git a/csrc/fastdeploy/backends/openvino/ov_backend.cc b/csrc/fastdeploy/backends/openvino/ov_backend.cc
new file mode 100644
index 0000000000..406dfa1138
--- /dev/null
+++ b/csrc/fastdeploy/backends/openvino/ov_backend.cc
@@ -0,0 +1,199 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/backends/openvino/ov_backend.h"
+
+namespace fastdeploy {
+
+std::vector<int64_t> PartialShapeToVec(const ov::PartialShape& shape) {
+  std::vector<int64_t> res;
+  for (int i = 0; i < shape.size(); ++i) {
+    auto dim = shape[i];
+    if (dim.is_dynamic()) {
+      res.push_back(-1);
+    } else {
+      res.push_back(dim.get_length());
+    }
+  }
+  return res;
+}
+
+FDDataType OpenVINODataTypeToFD(const ov::element::Type& type) {
+  if (type == ov::element::f32) {
+    return FDDataType::FP32;
+  } else if (type == ov::element::f64) {
+    return FDDataType::FP64;
+  } else if (type == ov::element::i8) {
+    return FDDataType::INT8;
+  } else if (type == ov::element::i32) {
+    return FDDataType::INT32;
+  } else if (type == ov::element::i64) {
+    return FDDataType::INT64;
+  } else {
+    FDASSERT(false, "Only support float/double/int8/int32/int64 now.");
+  }
+  return FDDataType::FP32;
+}
+
+ov::element::Type FDDataTypeToOV(const FDDataType& type) {
+  if (type == FDDataType::FP32) {
+    return ov::element::f32;
+  } else if (type == FDDataType::FP64) {
+    return ov::element::f64;
+  } else if (type == FDDataType::INT8) {
+    return ov::element::i8;
+  } else if (type == FDDataType::INT32) {
+    return ov::element::i32;
+  } else if (type == FDDataType::INT64) {
+    return ov::element::i64;
+  }
+  FDASSERT(false, "Only support float/double/int8/int32/int64 now.");
+  return ov::element::f32;
+}
+
+bool OpenVINOBackend::InitFromPaddle(const std::string& model_file,
+                                     const std::string& params_file,
+                                     const OpenVINOBackendOption& option) {
+  if (initialized_) {
+    FDERROR << "OpenVINOBackend is already initlized, cannot initialize again."
+            << std::endl;
+    return false;
+  }
+  option_ = option;
+  ov::AnyMap properties;
+  if (option_.cpu_thread_num > 0) {
+    properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
+  }
+
+  std::shared_ptr<ov::Model> model = core_.read_model(model_file, params_file);
+
+  // Get inputs/outputs information from loaded model
+  const std::vector<ov::Output<ov::Node>> inputs = model->inputs();
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    TensorInfo info;
+    auto partial_shape = PartialShapeToVec(inputs[i].get_partial_shape());
+    info.shape.assign(partial_shape.begin(), partial_shape.end());
+    info.name = inputs[i].get_any_name();
+    info.dtype = OpenVINODataTypeToFD(inputs[i].get_element_type());
+    input_infos_.emplace_back(info);
+  }
+  const std::vector<ov::Output<ov::Node>> outputs = model->outputs();
+  for (size_t i = 0; i < outputs.size(); ++i) {
+    TensorInfo info;
+    auto partial_shape = PartialShapeToVec(outputs[i].get_partial_shape());
+    info.shape.assign(partial_shape.begin(), partial_shape.end());
+    info.name = outputs[i].get_any_name();
+    info.dtype = OpenVINODataTypeToFD(outputs[i].get_element_type());
+    output_infos_.emplace_back(info);
+  }
+
+  compiled_model_ = core_.compile_model(model, "CPU", properties);
+  request_ = compiled_model_.create_infer_request();
+  initialized_ = true;
+  return true;
+}
+
+TensorInfo OpenVINOBackend::GetInputInfo(int index) {
+  FDASSERT(index < NumInputs(),
+           "The index: %d should less than the number of outputs: %d.", index,
+           NumOutputs());
+  return input_infos_[index];
+}
+
+TensorInfo OpenVINOBackend::GetOutputInfo(int index) {
+  FDASSERT(index < NumOutputs(),
+           "The index: %d should less than the number of outputs: %d.", index,
+           NumOutputs());
+  return output_infos_[index];
+}
+
+bool OpenVINOBackend::InitFromOnnx(const std::string& model_file,
+                                   const OpenVINOBackendOption& option) {
+  if (initialized_) {
+    FDERROR << "OpenVINOBackend is already initlized, cannot initialize again."
+            << std::endl;
+    return false;
+  }
+  option_ = option;
+  ov::AnyMap properties;
+  if (option_.cpu_thread_num > 0) {
+    properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
+  }
+
+  std::shared_ptr<ov::Model> model = core_.read_model(model_file);
+
+  // Get inputs/outputs information from loaded model
+  const std::vector<ov::Output<ov::Node>> inputs = model->inputs();
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    TensorInfo info;
+    auto partial_shape = PartialShapeToVec(inputs[i].get_partial_shape());
+    info.shape.assign(partial_shape.begin(), partial_shape.end());
+    info.name = inputs[i].get_any_name();
+    info.dtype = OpenVINODataTypeToFD(inputs[i].get_element_type());
+    input_infos_.emplace_back(info);
+  }
+  const std::vector<ov::Output<ov::Node>> outputs = model->outputs();
+  for (size_t i = 0; i < outputs.size(); ++i) {
+    TensorInfo info;
+    auto partial_shape = PartialShapeToVec(outputs[i].get_partial_shape());
+    info.shape.assign(partial_shape.begin(), partial_shape.end());
+    info.name = outputs[i].get_any_name();
+    info.dtype = OpenVINODataTypeToFD(outputs[i].get_element_type());
+    output_infos_.emplace_back(info);
+  }
+
+  compiled_model_ = core_.compile_model(model, "CPU", properties);
+  request_ = compiled_model_.create_infer_request();
+  initialized_ = true;
+  return true;
+}
+
+int OpenVINOBackend::NumInputs() const { return input_infos_.size(); }
+
+int OpenVINOBackend::NumOutputs() const { return output_infos_.size(); }
+
+bool OpenVINOBackend::Infer(std::vector<FDTensor>& inputs,
+                            std::vector<FDTensor>* outputs) {
+  if (inputs.size() != input_infos_.size()) {
+    FDERROR << "[OpenVINOBackend] Size of the inputs(" << inputs.size()
+            << ") should keep same with the inputs of this model("
+            << input_infos_.size() << ")." << std::endl;
+    return false;
+  }
+
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    ov::Shape shape(inputs[i].shape.begin(), inputs[i].shape.end());
+    ov::Tensor ov_tensor(FDDataTypeToOV(inputs[i].dtype), shape,
+                         inputs[i].Data());
+    request_.set_tensor(inputs[i].name, ov_tensor);
+  }
+
+  request_.infer();
+
+  outputs->resize(output_infos_.size());
+  for (size_t i = 0; i < output_infos_.size(); ++i) {
+    auto out_tensor = request_.get_output_tensor(i);
+    auto out_tensor_shape = out_tensor.get_shape();
+    std::vector<int64_t> shape(out_tensor_shape.begin(),
+                               out_tensor_shape.end());
+    (*outputs)[i].Allocate(shape,
+                           OpenVINODataTypeToFD(out_tensor.get_element_type()),
+                           output_infos_[i].name);
+    memcpy((*outputs)[i].MutableData(), out_tensor.data(),
+           (*outputs)[i].Nbytes());
+  }
+  return true;
+}
+
+} // namespace fastdeploy
diff --git a/csrc/fastdeploy/backends/openvino/ov_backend.h b/csrc/fastdeploy/backends/openvino/ov_backend.h
new file mode 100644
index 0000000000..69c6104f70
--- /dev/null
+++ b/csrc/fastdeploy/backends/openvino/ov_backend.h
@@ -0,0 +1,62 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "fastdeploy/backends/backend.h"
+#include "openvino/openvino.hpp"
+
+namespace fastdeploy {
+
+struct OpenVINOBackendOption {
+  int cpu_thread_num = 8;
+  std::map<std::string, std::vector<int64_t>> shape_infos;
+};
+
+class OpenVINOBackend : public BaseBackend {
+ public:
+  OpenVINOBackend() {}
+  virtual ~OpenVINOBackend() = default;
+
+  bool
+  InitFromPaddle(const std::string& model_file, const std::string& params_file,
+                 const OpenVINOBackendOption& option = OpenVINOBackendOption());
+
+  bool
+  InitFromOnnx(const std::string& model_file,
+               const OpenVINOBackendOption& option = OpenVINOBackendOption());
+
+  bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
+
+  int NumInputs() const;
+
+  int NumOutputs() const;
+
+  TensorInfo GetInputInfo(int index);
+  TensorInfo GetOutputInfo(int index);
+
+ private:
+  ov::Core core_;
+  ov::CompiledModel compiled_model_;
+  ov::InferRequest request_;
+  OpenVINOBackendOption option_;
+  std::vector<TensorInfo> input_infos_;
+  std::vector<TensorInfo> output_infos_;
+};
+}  // namespace fastdeploy
diff --git a/csrc/fastdeploy/backends/ort/ort_backend.cc b/csrc/fastdeploy/backends/ort/ort_backend.cc
index a296c6b2b8..1b49928a99 100644
--- a/csrc/fastdeploy/backends/ort/ort_backend.cc
+++ b/csrc/fastdeploy/backends/ort/ort_backend.cc
@@ -32,10 +32,10 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
     session_options_.SetGraphOptimizationLevel(
         GraphOptimizationLevel(option.graph_optimization_level));
   }
-  if (option.intra_op_num_threads >= 0) {
+  if (option.intra_op_num_threads > 0) {
     session_options_.SetIntraOpNumThreads(option.intra_op_num_threads);
   }
-  if (option.inter_op_num_threads >= 0) {
+  if (option.inter_op_num_threads > 0) {
     session_options_.SetInterOpNumThreads(option.inter_op_num_threads);
   }
   if (option.execution_mode >= 0) {
diff --git a/csrc/fastdeploy/backends/paddle/paddle_backend.cc b/csrc/fastdeploy/backends/paddle/paddle_backend.cc
index 1b08f90c4c..e5a9d8e789 100644
--- a/csrc/fastdeploy/backends/paddle/paddle_backend.cc
+++ b/csrc/fastdeploy/backends/paddle/paddle_backend.cc
@@ -29,7 +29,11 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
   if (!option.enable_log_info) {
     config_.DisableGlogInfo();
   }
-  config_.SetCpuMathLibraryNumThreads(option.cpu_thread_num);
+  if (option.cpu_thread_num <= 0) {
+    config_.SetCpuMathLibraryNumThreads(8);
+  } else {
+    config_.SetCpuMathLibraryNumThreads(option.cpu_thread_num);
+  }
 }
 
 bool PaddleBackend::InitFromPaddle(const std::string& model_file,
diff --git a/csrc/fastdeploy/core/config.h.in b/csrc/fastdeploy/core/config.h.in
index b29113f1fd..573439f600 100644
--- a/csrc/fastdeploy/core/config.h.in
+++ b/csrc/fastdeploy/core/config.h.in
@@ -33,6 +33,10 @@
 #cmakedefine ENABLE_PADDLE_BACKEND
 #endif
 
+#ifndef ENABLE_OPENVINO_BACKEND
+#cmakedefine ENABLE_OPENVINO_BACKEND
+#endif
+
 #ifndef WITH_GPU
 #cmakedefine WITH_GPU
 #endif
diff --git a/csrc/fastdeploy/fastdeploy_model.cc b/csrc/fastdeploy/fastdeploy_model.cc
index d1d296c6cd..41f1315c1c 100644
--- a/csrc/fastdeploy/fastdeploy_model.cc
+++ b/csrc/fastdeploy/fastdeploy_model.cc
@@ -26,31 +26,8 @@ bool FastDeployModel::InitRuntime() {
     return false;
   }
   if (runtime_option.backend != Backend::UNKNOWN) {
-    if (runtime_option.backend == Backend::ORT) {
-      if (!IsBackendAvailable(Backend::ORT)) {
-        FDERROR
-            << "Backend::ORT is not complied with current FastDeploy library."
-            << std::endl;
-        return false;
-      }
-    } else if (runtime_option.backend == Backend::TRT) {
-      if (!IsBackendAvailable(Backend::TRT)) {
-        FDERROR
-            << "Backend::TRT is not complied with current FastDeploy library."
-            << std::endl;
-        return false;
-      }
-    } else if (runtime_option.backend == Backend::PDINFER) {
-      if (!IsBackendAvailable(Backend::PDINFER)) {
-        FDERROR << "Backend::PDINFER is not compiled with current FastDeploy "
-                   "library."
-                << std::endl;
-        return false;
-      }
-    } else {
-      FDERROR
-          << "Only support Backend::ORT / Backend::TRT / Backend::PDINFER now."
-          << std::endl;
+    if (!IsBackendAvailable(runtime_option.backend)) {
+      FDERROR << Str(runtime_option.backend) << " is not compiled with current FastDeploy library." << std::endl;
       return false;
     }
 
diff --git a/csrc/fastdeploy/fastdeploy_runtime.cc b/csrc/fastdeploy/fastdeploy_runtime.cc
index c2a16b9032..2e73af38c6 100644
--- a/csrc/fastdeploy/fastdeploy_runtime.cc
+++ b/csrc/fastdeploy/fastdeploy_runtime.cc
@@ -28,6 +28,10 @@
 #include "fastdeploy/backends/paddle/paddle_backend.h"
 #endif
 
+#ifdef ENABLE_OPENVINO_BACKEND
+#include "fastdeploy/backends/openvino/ov_backend.h"
+#endif
+
 namespace fastdeploy {
 
 std::vector<Backend> GetAvailableBackends() {
@@ -40,6 +44,9 @@ std::vector<Backend> GetAvailableBackends() {
 #endif
 #ifdef ENABLE_PADDLE_BACKEND
   backends.push_back(Backend::PDINFER);
+#endif
+#ifdef ENABLE_OPENVINO_BACKEND
+  backends.push_back(Backend::OPENVINO);
 #endif
   return backends;
 }
@@ -61,6 +68,8 @@ std::string Str(const Backend& b) {
     return "Backend::TRT";
   } else if (b == Backend::PDINFER) {
     return "Backend::PDINFER";
+  } else if (b == Backend::OPENVINO) {
+    return "Backend::OPENVINO";
   }
   return "UNKNOWN-Backend";
 }
@@ -177,6 +186,13 @@ void RuntimeOption::UseTrtBackend() {
 #endif
 }
 
+void RuntimeOption::UseOpenVINOBackend() {
+#ifdef ENABLE_OPENVINO_BACKEND
+  backend = Backend::OPENVINO;
+#else
+  FDASSERT(false, "The FastDeploy didn't compile with OpenVINO.");
+#endif
+}
 void RuntimeOption::EnablePaddleMKLDNN() { pd_enable_mkldnn = true; }
 
 void RuntimeOption::DisablePaddleMKLDNN() { pd_enable_mkldnn = false; }
@@ -228,21 +244,26 @@ bool Runtime::Init(const RuntimeOption& _option) {
       option.backend = Backend::ORT;
     } else if (IsBackendAvailable(Backend::PDINFER)) {
       option.backend = Backend::PDINFER;
-    } else {
+    } else if (IsBackendAvailable(Backend::OPENVINO)) {
+      option.backend = Backend::OPENVINO;
+    } {
       FDERROR << "Please define backend in RuntimeOption, current it's "
                  "Backend::UNKNOWN."
               << std::endl;
       return false;
     }
   }
+
   if (option.backend == Backend::ORT) {
     FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
-             "Backend::TRT only supports Device::CPU/Device::GPU.");
+             "Backend::ORT only supports Device::CPU/Device::GPU.");
     CreateOrtBackend();
+    FDINFO << "Runtime initialized with Backend::ORT." << std::endl;
   } else if (option.backend == Backend::TRT) {
     FDASSERT(option.device == Device::GPU,
              "Backend::TRT only supports Device::GPU.");
     CreateTrtBackend();
+    FDINFO << "Runtime initialized with Backend::TRT." << std::endl;
   } else if (option.backend == Backend::PDINFER) {
     FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
              "Backend::TRT only supports Device::CPU/Device::GPU.");
@@ -250,6 +271,11 @@ bool Runtime::Init(const RuntimeOption& _option) {
         option.model_format == Frontend::PADDLE,
         "Backend::PDINFER only supports model format of Frontend::PADDLE.");
     CreatePaddleBackend();
+    FDINFO << "Runtime initialized with Backend::PDINFER." << std::endl;
+  } else if (option.backend == Backend::OPENVINO) {
+    FDASSERT(option.device == Device::CPU, "Backend::OPENVINO only supports Device::CPU");
+    CreateOpenVINOBackend();
+    FDINFO << "Runtime initialized with Backend::OPENVINO." << std::endl;
   } else {
     FDERROR << "Runtime only support "
                "Backend::ORT/Backend::TRT/Backend::PDINFER as backend now."
@@ -295,6 +321,32 @@ void Runtime::CreatePaddleBackend() {
 #endif
 }
 
+void Runtime::CreateOpenVINOBackend() {
+#ifdef ENABLE_OPENVINO_BACKEND
+  auto ov_option = OpenVINOBackendOption();
+  ov_option.cpu_thread_num = option.cpu_thread_num;
+  FDASSERT(option.model_format == Frontend::PADDLE ||
+               option.model_format == Frontend::ONNX,
+           "OpenVINOBackend only support model format of Frontend::PADDLE / "
+           "Frontend::ONNX.");
+  backend_ = utils::make_unique<OpenVINOBackend>();
+  auto casted_backend = dynamic_cast<OpenVINOBackend*>(backend_.get());
+
+  if (option.model_format == Frontend::ONNX) {
+    FDASSERT(casted_backend->InitFromOnnx(option.model_file, ov_option),
+             "Load model from ONNX failed while initliazing OrtBackend.");
+  } else {
+    FDASSERT(casted_backend->InitFromPaddle(option.model_file,
+                                            option.params_file, ov_option),
+             "Load model from Paddle failed while initliazing OrtBackend.");
+  }
+#else
+  FDASSERT(false,
+           "OpenVINOBackend is not available, please compiled with "
+           "ENABLE_OPENVINO_BACKEND=ON.");
+#endif
+}
+
 void Runtime::CreateOrtBackend() {
 #ifdef ENABLE_ORT_BACKEND
   auto ort_option = OrtBackendOption();
diff --git a/csrc/fastdeploy/fastdeploy_runtime.h b/csrc/fastdeploy/fastdeploy_runtime.h
index ab6b4a188a..4abb1a02f4 100644
--- a/csrc/fastdeploy/fastdeploy_runtime.h
+++ b/csrc/fastdeploy/fastdeploy_runtime.h
@@ -21,7 +21,7 @@
 
 namespace fastdeploy {
 
-enum FASTDEPLOY_DECL Backend { UNKNOWN, ORT, TRT, PDINFER };
+enum FASTDEPLOY_DECL Backend { UNKNOWN, ORT, TRT, PDINFER, OPENVINO };
 // AUTOREC will according to the name of model file
 // to decide which Frontend is
 enum FASTDEPLOY_DECL Frontend { AUTOREC, PADDLE, ONNX };
@@ -63,6 +63,9 @@ struct FASTDEPLOY_DECL RuntimeOption {
   // use tensorrt backend
   void UseTrtBackend();
 
+  // use openvino backend
+  void UseOpenVINOBackend();
+
   // enable mkldnn while use paddle inference in CPU
   void EnablePaddleMKLDNN();
   // disable mkldnn while use paddle inference in CPU
@@ -97,7 +100,8 @@ struct FASTDEPLOY_DECL RuntimeOption {
 
   Backend backend = Backend::UNKNOWN;
   // for cpu inference and preprocess
-  int cpu_thread_num = 8;
+  // default will let the backend choose their own default value
+  int cpu_thread_num = -1;
   int device_id = 0;
 
   Device device = Device::CPU;
@@ -152,6 +156,8 @@ struct FASTDEPLOY_DECL Runtime {
 
   void CreateTrtBackend();
 
+  void CreateOpenVINOBackend();
+
   int NumInputs() { return backend_->NumInputs(); }
   int NumOutputs() { return backend_->NumOutputs(); }
   TensorInfo GetInputInfo(int index);
diff --git a/csrc/fastdeploy/function/reduce.cc b/csrc/fastdeploy/function/reduce.cc
index 6ff35fe280..305fbff53a 100644
--- a/csrc/fastdeploy/function/reduce.cc
+++ b/csrc/fastdeploy/function/reduce.cc
@@ -329,11 +329,11 @@ void ArgMinMax(const FDTensor& x, FDTensor* out, int64_t axis,
     } else {
       all_element_num = x_dims[axis];
     }
-    FDASSERT(all_element_num <= std::numeric_limits<int>::max(),
+    FDASSERT(all_element_num <= (std::numeric_limits<int>::max)(),
              "The element num of the argmin/argmax input at axis is "
              "%lld, is larger than int32 maximum value:%d, you must "
              "set the dtype of argmin/argmax to 'int64'.",
-             all_element_num, std::numeric_limits<int>::max());
+             all_element_num, (std::numeric_limits<int>::max)());
   }
   std::vector<int64_t> vec;
   if (flatten) {
diff --git a/csrc/fastdeploy/pybind/fastdeploy_runtime.cc b/csrc/fastdeploy/pybind/fastdeploy_runtime.cc
index 86e5b69c75..92f14bc6c5 100644
--- a/csrc/fastdeploy/pybind/fastdeploy_runtime.cc
+++ b/csrc/fastdeploy/pybind/fastdeploy_runtime.cc
@@ -26,6 +26,7 @@ void BindRuntime(pybind11::module& m) {
       .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
       .def("use_ort_backend", &RuntimeOption::UseOrtBackend)
       .def("use_trt_backend", &RuntimeOption::UseTrtBackend)
+      .def("use_openvino_backend", &RuntimeOption::UseOpenVINOBackend)
       .def("enable_paddle_mkldnn", &RuntimeOption::EnablePaddleMKLDNN)
       .def("disable_paddle_mkldnn", &RuntimeOption::DisablePaddleMKLDNN)
       .def("enable_paddle_log_info", &RuntimeOption::EnablePaddleLogInfo)
diff --git a/csrc/fastdeploy/vision/classification/ppcls/model.cc b/csrc/fastdeploy/vision/classification/ppcls/model.cc
index 2ea6d846f6..60d2f6dc31 100644
--- a/csrc/fastdeploy/vision/classification/ppcls/model.cc
+++ b/csrc/fastdeploy/vision/classification/ppcls/model.cc
@@ -26,7 +26,7 @@ PaddleClasModel::PaddleClasModel(const std::string& model_file,
                                  const RuntimeOption& custom_option,
                                  const Frontend& model_format) {
   config_file_ = config_file;
-  valid_cpu_backends = {Backend::ORT, Backend::PDINFER};
+  valid_cpu_backends = {Backend::ORT, Backend::OPENVINO, Backend::PDINFER};
   valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
   runtime_option = custom_option;
   runtime_option.model_format = model_format;
diff --git a/csrc/fastdeploy/vision/detection/ppdet/ppyoloe.cc b/csrc/fastdeploy/vision/detection/ppdet/ppyoloe.cc
index 0b675ebc6a..e2d501b76d 100644
--- a/csrc/fastdeploy/vision/detection/ppdet/ppyoloe.cc
+++ b/csrc/fastdeploy/vision/detection/ppdet/ppyoloe.cc
@@ -14,7 +14,7 @@ PPYOLOE::PPYOLOE(const std::string& model_file, const std::string& params_file,
                  const RuntimeOption& custom_option,
                  const Frontend& model_format) {
   config_file_ = config_file;
-  valid_cpu_backends = {Backend::ORT, Backend::PDINFER};
+  valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER};
   valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
   runtime_option = custom_option;
   runtime_option.model_format = model_format;
diff --git a/docs/FAQ.md b/docs/FAQ.md
new file mode 100644
index 0000000000..61cdaf9c3d
--- /dev/null
+++ b/docs/FAQ.md
@@ -0,0 +1,36 @@
+# FastDeploy FAQ 文档
+
+## 1. 在Windows 10 配置 CUDA v11.2 环境变量  
+FastDeploy Windows 10 x64 的 GPU 版本需要依赖 CUDA 11.2，在安装完 CUDA v11.2 之后，需要设置`CUDA_DIRECTORY`、`CUDA_HOME`、`CUDA_PATH`和`CUDA_ROOT`中**任意一个**环境变量，这样FastDeploy才能链接到相关的库。有3种方式设置环境变量，通过在代码中设置、终端命令行设置以及在系统环境变量中设置。  
+- 方式一：在代码中设置 **(推荐)** 。该方式最简单，只需要在导入FastDeploy之前，通过os库设置环境变量即可。FastDeploy在初始化时，会首先搜索`CUDA_DIRECTORY`、`CUDA_HOME`、`CUDA_PATH`和`CUDA_ROOT`环境变量，如果从这些环境变量的任意一个中找到有效的CUDA库，则可以成功初始化。  
+  ```python
+  import os
+  os.environ["CUDA_PATH"]=r"C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.2"
+  # 在设置环境变量后导入 fastdeploy
+  import fastdeploy
+  ```
+  如果成功找到CUDA，会显示以下信息:
+  ```shell
+  [FastDeploy][CUDA]: Found valid cuda directroy and added it: -> C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.2\bin
+  ```  
+
+- 方式二: 终端命令行设置。该方式只在当前终端有效。Windows菜单打开`x64 Native Tools Command Prompt for VS 2019`命令工具，假设你需要在该终端运行类似`python infer_ppyoloe.py`的命令。
+  ```bat
+  % 选择以下任意一个环境变量设置即可 %
+  set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2
+  set CUDA_HOME=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2
+  set CUDA_ROOT=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2
+  set CUDA_DIRECTORY=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2
+  ```
+
+- 方式三: 系统环境变量设置。该方式会修改系统环境变量。设置步骤为：  
+  - (1) 打开 "设置->系统->关于"
+  - (2) 找到 "高级系统设置"，点击打开
+  - (3) 点击右下角的 "环境变量设置"  
+  - (4) 注意，在 "系统变量" 一栏右下角点击 "新建"，如果已有相关的环境变量，则只需确认路径是否正确
+  - (5) 设置`CUDA_DIRECTORY`、`CUDA_HOME`、`CUDA_PATH`和`CUDA_ROOT`中**任意一个**环境变量  
+  - (6) 根据以下提示来设置环境变量，并点击确认
+  ```text
+  变量名(N): CUDA_DIRECTORY、CUDA_HOME、CUDA_PATH和CUDA_ROOT中任意一个
+  变量值(V): 类似 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2
+  ```
diff --git a/docs/compile/how_to_build_windows.md b/docs/compile/how_to_build_windows.md
index c0d60a7e95..2cf5371333 100644
--- a/docs/compile/how_to_build_windows.md
+++ b/docs/compile/how_to_build_windows.md
@@ -8,39 +8,86 @@
 - cudnn >= 11.2 (当WITH_GPU=ON)
 - TensorRT >= 8.4 (当ENABLE_TRT_BACKEND=ON)
 
-## 获取代码
+## 编译CPU版本 C++ SDK
+
+Windows菜单打开`x64 Native Tools Command Prompt for VS 2019`命令工具，其中`CMAKE_INSTALL_PREFIX`用于指定编译后生成的SDK路径
+
 ```bat
 git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy
-git checkout develop
+cd FastDeploy && git checkout develop
+mkdir build && cd build
+
+cmake .. -G "Visual Studio 16 2019" -A x64 -DCMAKE_INSTALL_PREFIX=D:\Paddle\FastDeploy\build\fastdeploy-win-x64-0.2.0 -DENABLE_ORT_BACKEND=ON -DENABLE_VISION=ON -DENABLE_VISION_VISUALIZE=ON
+msbuild fastdeploy.sln /m /p:Configuration=Release /p:Platform=x64
+msbuild INSTALL.vcxproj /m /p:Configuration=Release /p:Platform=x64
 ```
+编译后，FastDeploy CPU C++ SDK即在`D:\Paddle\FastDeploy\build\fastdeploy-win-x64-0.2.0`目录下
 
-## 编译C++ SDK
+## 编译GPU版本 C++ SDK
 
 Windows菜单打开`x64 Native Tools Command Prompt for VS 2019`命令工具，其中`CMAKE_INSTALL_PREFIX`用于指定编译后生成的SDK路径
 
 ```bat
-mkdir build
-cd build
-cmake -G "Visual Studio 16 2019" -A x64 -DCMAKE_INSTALL_PREFIX=D:\Paddle\FastDeploy\build\fastdeploy-win-x64-0.2.0 -DENABLE_ORT_BACKEND=ON -DENABLE_VISION=ON ..
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy && git checkout develop
+mkdir build && cd build
+
+cmake .. -G "Visual Studio 16 2019" -A x64 -DCMAKE_INSTALL_PREFIX=D:\Paddle\FastDeploy\build\fastdeploy-win-x64-gpu-0.2.0 -DWITH_GPU=ON -DENABLE_ORT_BACKEND=ON -DENABLE_VISION=ON -DENABLE_VISION_VISUALIZE=ON -DENABLE_PADDLE_FRONTEND=ON -DCUDA_DIRECTORY=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2
 msbuild fastdeploy.sln /m /p:Configuration=Release /p:Platform=x64
-msbuild INSTALL.vcxproj /m /p:Configuration=Release /p:Platform=x64
+msbuild INSTALL.vcxproj /m /p:Configuration=Release /p:Platform=x64  
+
+% 附加说明：%
+% (1) -DCUDA_DIRECTORY指定CUDA所在的目录 %
+% (2) 若编译Paddle后端，设置-DENABLE_PADDLE_BACKEND=ON %
+% (3) 若编译TensorRT后端，需要设置-DENABLE_TRT_BACKEND=ON，并指定TRT_DIRECTORY % 
+% (4) 如-DTRT_DIRECTORY=D:\x64\third_party\TensorRT-8.4.1.5 %
 ```
-编译后，C++ SDK即在`D:\Paddle\FastDeploy\build\fastdeploy-win-x64-0.2.0`目录下
+编译后，FastDeploy GPU C++ SDK即在`D:\Paddle\FastDeploy\build\fastdeploy-win-x64-gpu-0.2.0`目录下
 
-## 编译Python Wheel包
+## 编译CPU版本 Python Wheel包
 
-Python编译时，通过环境变量获取编译选项
+Windows菜单打开x64 Native Tools Command Prompt for VS 2019命令工具。Python编译时，通过环境变量获取编译选项，在命令行终端运行以下命令
 ```bat
 git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy
-git checkout develop
+cd FastDeploy && git checkout develop
 
 set ENABLE_ORT_BACKEND=ON
 set ENABLE_VISION=ON
 
-python setup.py build
-python setup.py bdist_wheel
+% 这里指定用户自己的python解释器 以ptyhon3.8为例 %
+C:\Python38\python.exe setup.py build 
+C:\Python38\python.exe setup.py bdist_wheel
 ```
+编译好的wheel文件在dist目录下，pip安装编译好的wheel包，命令如下
+```bat
+C:\Python38\python.exe -m pip install dist\fastdeploy_python-0.2.0-cp38-cp38-win_amd64.whl
+```
+
+## 编译GPU版本 Python Wheel包  
+Windows菜单打开x64 Native Tools Command Prompt for VS 2019命令工具。Python编译时，通过环境变量获取编译选项，在命令行终端运行以下命令
+```bat
+% 说明：CUDA_DIRECTORY 为用户自己的CUDA目录 以下为示例 %
+set CUDA_DIRECTORY=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2
+% 说明：TRT_DIRECTORY 为下载的TensorRT库所在的目录 以下为示例 如果不编译TensorRT后端 可以不设置 % 
+set TRT_DIRECTORY=D:\x64\third_party\TensorRT-8.4.1.5
+set WITH_GPU=ON
+set ENABLE_ORT_BACKEND=ON
+% 说明：如果不编译TensorRT后端 此项为OFF %
+set ENABLE_TRT_BACKEND=ON
+set ENABLE_PADDLE_BACKEND=ON
+set ENABLE_PADDLE_FRONTEND=ON
+set ENABLE_VISION=ON
+set ENABLE_VISION_VISUALIZE=ON
+
+git clone https://github.com/PaddlePaddle/FastDeploy.git 
+cd FastDeploy && git checkout develop
 
+% 说明：这里指定用户自己的python解释器 以ptyhon3.8为例 %
+C:\Python38\python.exe setup.py build 
+C:\Python38\python.exe setup.py bdist_wheel
+```
+编译好的wheel文件在dist目录下，pip安装编译好的wheel包，命令如下
+```bat
+C:\Python38\python.exe -m pip install dist\fastdeploy_gpu_python-0.2.0-cp38-cp38-win_amd64.whl
+```
 更多编译选项说明参考[编译指南](./README.md)
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
new file mode 100644
index 0000000000..160cb57519
--- /dev/null
+++ b/examples/CMakeLists.txt
@@ -0,0 +1,80 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+function(get_fastdeploy_example_names NAME_SPACE CLASS_NAME CC_FILE)
+  get_filename_component(CPP_DIR ${CC_FILE} DIRECTORY)
+  get_filename_component(CLASS_DIR ${CPP_DIR} DIRECTORY)
+  get_filename_component(NAME_SPACE_DIR ${CLASS_DIR} DIRECTORY)
+  get_filename_component(_CLASS_NAME ${CLASS_DIR} NAME)
+  get_filename_component(_NAME_SPACE ${NAME_SPACE_DIR} NAME)
+  set(${NAME_SPACE} ${_NAME_SPACE} PARENT_SCOPE)
+  set(${CLASS_NAME} ${_CLASS_NAME} PARENT_SCOPE)
+endfunction()
+
+set(EXAMPLES_NUM 0)
+function(add_fastdeploy_executable FIELD CC_FILE)
+  # temp target name/file var in function scope
+  set(TEMP_TARGET_FILE ${CC_FILE})
+  get_filename_component(FILE_NAME ${CC_FILE} NAME)
+  string(REGEX REPLACE ".cc" "" FILE_NAME ${FILE_NAME})
+  get_fastdeploy_example_names(NAME_SPACE CLASS_NAME ${CC_FILE})
+  set(TEMP_TARGET_NAME ${FIELD}_${NAME_SPACE}_${CLASS_NAME}_${FILE_NAME})
+  if(EXISTS ${TEMP_TARGET_FILE} AND TARGET fastdeploy)
+    add_executable(${TEMP_TARGET_NAME} ${TEMP_TARGET_FILE})
+    target_link_libraries(${TEMP_TARGET_NAME} PUBLIC fastdeploy)
+    math(EXPR _EXAMPLES_NUM "${EXAMPLES_NUM} + 1")
+    set(EXAMPLES_NUM ${_EXAMPLES_NUM} PARENT_SCOPE)
+    string(LENGTH ${EXAMPLES_NUM} len)
+    set(MESSAGE_HEAD "[${EXAMPLES_NUM}]")
+    if(${len} EQUAL 1)
+      set(MESSAGE_HEAD "[00${EXAMPLES_NUM}]")
+    elseif(${len} EQUAL 2)
+      set(MESSAGE_HEAD "[0${EXAMPLES_NUM}]")
+    endif()
+    message(STATUS "  ${MESSAGE_HEAD} Added FastDeploy Executable   : ${TEMP_TARGET_NAME}")
+  endif()
+  unset(TEMP_TARGET_FILE)
+  unset(TEMP_TARGET_NAME)
+endfunction()
+
+# vision examples
+if(BUILD_EXAMPLES AND ENABLE_VISION)
+  if(NOT ENABLE_VISION_VISUALIZE)
+    message(FATAL_ERROR "ENABLE_VISION_VISUALIZE must be ON while BUILD_EXAMPLES and ENABLE_VISION both ON.")
+  endif()
+  if(EXISTS ${PROJECT_SOURCE_DIR}/examples/vision)
+    message(STATUS "")
+    message(STATUS "*************FastDeploy Vision Examples Summary**********")
+    file(GLOB_RECURSE ALL_VISION_EXAMPLE_SRCS ${PROJECT_SOURCE_DIR}/examples/vision/*.cc)
+    foreach(_CC_FILE ${ALL_VISION_EXAMPLE_SRCS})
+      add_fastdeploy_executable(vision ${_CC_FILE})
+    endforeach()
+    message(STATUS "  [FastDeploy Executable Path]        : ${EXECUTABLE_OUTPUT_PATH}")
+  endif()
+endif()
+
+# text examples
+if(BUILD_EXAMPLES AND ENABLE_TEXT)
+  if(EXISTS ${PROJECT_SOURCE_DIR}/examples/text)
+    message(STATUS "")
+    message(STATUS "*************FastDeploy Text Examples Summary**********")
+    file(GLOB_RECURSE ALL_TEXT_EXAMPLE_SRCS ${PROJECT_SOURCE_DIR}/examples/text/*.cc)
+    foreach(_CC_FILE ${ALL_TEXT_EXAMPLE_SRCS})
+      add_fastdeploy_executable(text ${_CC_FILE})
+    endforeach()
+    message(STATUS "  [FastDeploy Executable Path]        : ${EXECUTABLE_OUTPUT_PATH}")
+  endif()
+endif()
+
+# other examples ...
diff --git a/examples/text/information_extraction/ernie/cpp/infer.cc b/examples/text/information_extraction/ernie/cpp/infer.cc
deleted file mode 100644
index 7f3b931866..0000000000
--- a/examples/text/information_extraction/ernie/cpp/infer.cc
+++ /dev/null
@@ -1,182 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include <iostream>
-#include <sstream>
-
-#include "fastdeploy/function/reduce.h"
-#include "fastdeploy/function/softmax.h"
-#include "fastdeploy/text.h"
-#include "tokenizers/ernie_faster_tokenizer.h"
-
-using namespace paddlenlp;
-
-void LoadTransitionFromFile(const std::string& file,
-                            std::vector<float>* transitions, int* num_tags) {
-  std::ifstream fin(file);
-  std::string curr_transition;
-  float transition;
-  int i = 0;
-  while (fin) {
-    std::getline(fin, curr_transition);
-    std::istringstream iss(curr_transition);
-    while (iss) {
-      iss >> transition;
-      transitions->push_back(transition);
-    }
-    if (curr_transition != "") {
-      ++i;
-    }
-  }
-  *num_tags = i;
-}
-
-template <typename T>
-void ViterbiDecode(const fastdeploy::FDTensor& slot_logits,
-                   const fastdeploy::FDTensor& trans,
-                   fastdeploy::FDTensor* best_path) {
-  int batch_size = slot_logits.shape[0];
-  int seq_len = slot_logits.shape[1];
-  int num_tags = slot_logits.shape[2];
-  best_path->Allocate({batch_size, seq_len}, fastdeploy::FDDataType::INT64);
-
-  const T* slot_logits_ptr = reinterpret_cast<const T*>(slot_logits.Data());
-  const T* trans_ptr = reinterpret_cast<const T*>(trans.Data());
-  int64_t* best_path_ptr = reinterpret_cast<int64_t*>(best_path->Data());
-  std::vector<T> scores(num_tags);
-  std::copy(slot_logits_ptr, slot_logits_ptr + num_tags, scores.begin());
-  std::vector<std::vector<T>> M(num_tags, std::vector<T>(num_tags));
-  for (int b = 0; b < batch_size; ++b) {
-    std::vector<std::vector<int>> paths;
-    const T* curr_slot_logits_ptr = slot_logits_ptr + b * seq_len * num_tags;
-    int64_t* curr_best_path_ptr = best_path_ptr + b * seq_len;
-    for (int t = 1; t < seq_len; t++) {
-      for (size_t i = 0; i < num_tags; i++) {
-        for (size_t j = 0; j < num_tags; j++) {
-          auto trans_idx = i * num_tags * num_tags + j * num_tags;
-          auto slot_logit_idx = t * num_tags + j;
-          M[i][j] = scores[i] + trans_ptr[trans_idx] +
-                    curr_slot_logits_ptr[slot_logit_idx];
-        }
-      }
-      std::vector<int> idxs;
-      for (size_t i = 0; i < num_tags; i++) {
-        T max = 0.0f;
-        int idx = 0;
-        for (size_t j = 0; j < num_tags; j++) {
-          if (M[j][i] > max) {
-            max = M[j][i];
-            idx = j;
-          }
-        }
-        scores[i] = max;
-        idxs.push_back(idx);
-      }
-      paths.push_back(idxs);
-    }
-    int scores_max_index = 0;
-    float scores_max = 0.0f;
-    for (size_t i = 0; i < scores.size(); i++) {
-      if (scores[i] > scores_max) {
-        scores_max = scores[i];
-        scores_max_index = i;
-      }
-    }
-    curr_best_path_ptr[seq_len - 1] = scores_max_index;
-    for (int i = seq_len - 2; i >= 0; i--) {
-      int index = curr_best_path_ptr[i + 1];
-      curr_best_path_ptr[i] = paths[i][index];
-    }
-  }
-}
-
-int main() {
-  // 1. Define a ernie faster tokenizer
-  faster_tokenizer::tokenizers_impl::ErnieFasterTokenizer tokenizer(
-      "ernie_vocab.txt");
-  std::vector<faster_tokenizer::core::EncodeInput> strings_list = {
-      "导航去科技园二号楼", "屏幕亮度为我减小一点吧"};
-  std::vector<faster_tokenizer::core::Encoding> encodings;
-  tokenizer.EncodeBatchStrings(strings_list, &encodings);
-  size_t batch_size = strings_list.size();
-  size_t seq_len = encodings[0].GetLen();
-  for (auto&& encoding : encodings) {
-    std::cout << encoding.DebugString() << std::endl;
-  }
-  // 2. Initialize runtime
-  fastdeploy::RuntimeOption runtime_option;
-  runtime_option.SetModelPath("nano_static/model.pdmodel",
-                              "nano_static/model.pdiparams");
-  fastdeploy::Runtime runtime;
-  runtime.Init(runtime_option);
-
-  // 3. Construct input vector
-  // 3.1 Convert encodings to input_ids, token_type_ids
-  std::vector<int64_t> input_ids, token_type_ids;
-  for (int i = 0; i < encodings.size(); ++i) {
-    auto&& curr_input_ids = encodings[i].GetIds();
-    auto&& curr_type_ids = encodings[i].GetTypeIds();
-    input_ids.insert(input_ids.end(), curr_input_ids.begin(),
-                     curr_input_ids.end());
-    token_type_ids.insert(token_type_ids.end(), curr_type_ids.begin(),
-                          curr_type_ids.end());
-  }
-  // 3.2 Set data to input vector
-  std::vector<fastdeploy::FDTensor> inputs(runtime.NumInputs());
-  void* inputs_ptrs[] = {input_ids.data(), token_type_ids.data()};
-  for (int i = 0; i < runtime.NumInputs(); ++i) {
-    inputs[i].SetExternalData({batch_size, seq_len},
-                              fastdeploy::FDDataType::INT64, inputs_ptrs[i]);
-    inputs[i].name = runtime.GetInputInfo(i).name;
-  }
-
-  // 4. Infer
-  std::vector<fastdeploy::FDTensor> outputs(runtime.NumOutputs());
-  runtime.Infer(inputs, &outputs);
-
-  // 5. Postprocess
-  fastdeploy::FDTensor domain_probs, intent_probs;
-  fastdeploy::Softmax(outputs[0], &domain_probs);
-  fastdeploy::Softmax(outputs[1], &intent_probs);
-
-  fastdeploy::FDTensor domain_max_probs, intent_max_probs;
-  fastdeploy::Max(domain_probs, &domain_max_probs, {-1}, true);
-  fastdeploy::Max(intent_probs, &intent_max_probs, {-1}, true);
-
-  std::vector<float> transition;
-  int num_tags;
-  LoadTransitionFromFile("joint_transition.txt", &transition, &num_tags);
-  fastdeploy::FDTensor trans;
-  trans.SetExternalData({num_tags, num_tags}, fastdeploy::FDDataType::FP32,
-                        transition.data());
-
-  fastdeploy::FDTensor best_path;
-  ViterbiDecode<float>(outputs[2], trans, &best_path);
-  // 6. Print result
-  domain_max_probs.PrintInfo();
-  intent_max_probs.PrintInfo();
-
-  batch_size = best_path.shape[0];
-  seq_len = best_path.shape[1];
-  const int64_t* best_path_ptr =
-      reinterpret_cast<const int64_t*>(best_path.Data());
-  for (int i = 0; i < batch_size; ++i) {
-    std::cout << "best_path[" << i << "] = ";
-    for (int j = 0; j < seq_len; ++j) {
-      std::cout << best_path_ptr[i * seq_len + j] << ", ";
-    }
-    std::cout << std::endl;
-  }
-  best_path.PrintInfo();
-  return 0;
-}
diff --git a/examples/text/information_extraction/ernie/cpp/CMakeLists.txt b/examples/text/uie/cpp/CMakeLists.txt
similarity index 85%
rename from examples/text/information_extraction/ernie/cpp/CMakeLists.txt
rename to examples/text/uie/cpp/CMakeLists.txt
index 1189820cb7..80731eda45 100644
--- a/examples/text/information_extraction/ernie/cpp/CMakeLists.txt
+++ b/examples/text/uie/cpp/CMakeLists.txt
@@ -21,5 +21,5 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
 
 include_directories(${FASTDEPLOY_INCS})
 
-add_executable(infer_ernie_demo ${PROJECT_SOURCE_DIR}/infer.cc)
-target_link_libraries(infer_ernie_demo ${FASTDEPLOY_LIBS})
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc ${PROJECT_SOURCE_DIR}/uie.cc)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
diff --git a/examples/text/uie/cpp/README.md b/examples/text/uie/cpp/README.md
new file mode 100644
index 0000000000..c943b5042b
--- /dev/null
+++ b/examples/text/uie/cpp/README.md
@@ -0,0 +1,47 @@
+# 通用信息抽取 UIE C++部署示例
+
+本目录下提供`infer.cc`快速完成[UIE模型](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/model_zoo/uie)在CPU/GPU的示例。
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../docs/quick_start/requirements.md)
+- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../docs/compile/prebuilt_libraries.md)
+
+以Linux上uie-base模型推理为例，在本目录执行如下命令即可完成编译测试。
+
+```
+# UIE目前还未发布，当前需开发者自行编译FastDeploy，通过如下脚本编译得到部署库fastdeploy-linux-x64-dev
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy
+mkdir build && cd build
+cmake .. -DENABLE_ORT_BACKEND=ON  \
+               -DENABLE_VISION=ON \
+               -DENABLE_PADDLE_BACKEND=ON \
+               -DENABLE_TEXT=ON \
+               -DWITH_GPU=ON \
+               -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-linux-x64-gpu-dev
+
+make -j8
+make install
+
+# 编译模型examples代码（SDK中包含了examples代码）
+cd ../examples/text/uie/cpp
+mkdir build
+cd build
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/../../../../../build/fastdeploy-linux-x64-gpu-dev
+make -j
+
+# 下载uie-base模型以及词表
+wget https://bj.bcebos.com/fastdeploy/models/uie/uie-base.tgz
+tar -xvfz uie-base.tgz
+
+
+# CPU 推理
+./infer_demo uie-base 0
+
+# GPU 推理
+./infer_demo uie-base 1
+```
+
+## 模型获取
+UIE 模型介绍可以参考https://github.com/PaddlePaddle/PaddleNLP/tree/develop/model_zoo/uie 。其中，在完成训练后，需要将训练后的模型导出成推理模型。该步骤可参考该文档完成导出：https://github.com/PaddlePaddle/PaddleNLP/tree/develop/model_zoo/uie#%E6%A8%A1%E5%9E%8B%E9%83%A8%E7%BD%B2 。
diff --git a/examples/text/uie/cpp/infer.cc b/examples/text/uie/cpp/infer.cc
new file mode 100644
index 0000000000..70d6f3518d
--- /dev/null
+++ b/examples/text/uie/cpp/infer.cc
@@ -0,0 +1,115 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <iostream>
+#include <sstream>
+
+#include "fastdeploy/function/reduce.h"
+#include "fastdeploy/function/softmax.h"
+#include "fastdeploy/text.h"
+#include "faster_tokenizer/tokenizers/ernie_faster_tokenizer.h"
+#include "uie.h"
+
+using namespace paddlenlp;
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+int main(int argc, char* argv[]) {
+  if (argc < 3) {
+    std::cout << "Usage: infer_demo path/to/model run_option, "
+                 "e.g ./infer_demo uie-base  0"
+              << std::endl;
+    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
+                 "with gpu."
+              << std::endl;
+    return -1;
+  }
+  auto option = fastdeploy::RuntimeOption();
+  if (std::atoi(argv[2]) == 0) {
+    option.UseCpu();
+  } else {
+    option.UseGpu();
+  }
+  std::string model_dir(argv[1]);
+  std::string model_path = model_dir + sep + "inference.pdmodel";
+  std::string param_path = model_dir + sep + "inference.pdiparams";
+  std::string vocab_path = model_dir + sep + "vocab.txt";
+
+  auto predictor = UIEModel(model_path, param_path, vocab_path, 0.5, 128,
+                            {"时间", "选手", "赛事名称"}, option);
+  fastdeploy::FDINFO << "After init predictor" << std::endl;
+  std::vector<std::unordered_map<std::string, std::vector<UIEResult>>> results;
+  // Named Entity Recognition
+  predictor.Predict({"2月8日上午北京冬奥会自由式滑雪女子大跳台决赛中中国选手谷"
+                     "爱凌以188.25分获得金牌！"},
+                    &results);
+  std::cout << results << std::endl;
+  results.clear();
+
+  // Relation Extraction
+  predictor.SetSchema({{"竞赛名称",
+                        {SchemaNode("主办方"), SchemaNode("承办方"),
+                         SchemaNode("已举办次数")}}});
+  predictor.Predict(
+      {"2022语言与智能技术竞赛由中国中文信息学会和中国计算机学会联合主办，百度"
+       "公司、中国中文信息学会评测工作委员会和中国计算机学会自然语言处理专委会"
+       "承办，已连续举办4届，成为全球最热门的中文NLP赛事之一。"},
+      &results);
+  std::cout << results << std::endl;
+  results.clear();
+
+  // Event Extraction
+  predictor.SetSchema({{"地震触发词",
+                        {SchemaNode("地震强度"), SchemaNode("时间"),
+                         SchemaNode("震中位置"), SchemaNode("震源深度")}}});
+  predictor.Predict(
+      {"中国地震台网正式测定：5月16日06时08分在云南临沧市凤庆县(北纬24."
+       "34度，东经99.98度)发生3.5级地震，震源深度10千米。"},
+      &results);
+  std::cout << results << std::endl;
+  results.clear();
+
+  // Opinion Extraction
+  predictor.SetSchema(
+      {{"评价维度",
+        {SchemaNode("观点词"), SchemaNode("情感倾向[正向，负向]")}}});
+  predictor.Predict(
+      {"店面干净，很清静，服务员服务热情，性价比很高，发现收银台有排队"},
+      &results);
+  std::cout << results << std::endl;
+  results.clear();
+
+  // Sequence classification
+  predictor.SetSchema({"情感倾向[正向，负向]"});
+  predictor.Predict({"这个产品用起来真的很流畅，我非常喜欢"}, &results);
+  std::cout << results << std::endl;
+  results.clear();
+
+  // Cross task extraction
+
+  predictor.SetSchema({{"法院", {}},
+                       {"原告", {SchemaNode("委托代理人")}},
+                       {"被告", {SchemaNode("委托代理人")}}});
+  predictor.Predict({"北京市海淀区人民法院\n民事判决书\n(199x)"
+                     "建初字第xxx号\n原告：张三。\n委托代理人李四，北京市 "
+                     "A律师事务所律师。\n被告：B公司，法定代表人王五，开发公司"
+                     "总经理。\n委托代理人赵六，北京市 C律师事务所律师。"},
+                    &results);
+  std::cout << results << std::endl;
+  results.clear();
+  return 0;
+}
diff --git a/examples/text/uie/cpp/uie.cc b/examples/text/uie/cpp/uie.cc
new file mode 100644
index 0000000000..e9f124ba52
--- /dev/null
+++ b/examples/text/uie/cpp/uie.cc
@@ -0,0 +1,646 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "uie.h"
+#include <algorithm>
+#include <codecvt>
+#include <locale>
+#include <queue>
+#include <sstream>
+
+#include "faster_tokenizer/pretokenizers/pretokenizer.h"
+#include "faster_tokenizer/utils/utf8.h"
+
+static std::string DBC2SBC(const std::string& content) {
+  std::string result;
+  size_t content_utf8_len = 0;
+  while (content_utf8_len < content.length()) {
+    uint32_t content_char;
+    auto content_char_width = faster_tokenizer::utils::UTF8ToUInt32(
+        content.data() + content_utf8_len, &content_char);
+    content_char = faster_tokenizer::utils::UTF8ToUnicode(content_char);
+    if (content_char == 0x3000) {
+      content_char = 0x0020;
+    } else {
+      content_char -= 0xfee0;
+    }
+    if (!(content_char >= 0x0021 && content_char <= 0x7e)) {
+      result.append(content.data() + content_utf8_len, content_char_width);
+    } else {
+      char dst_char[5] = {0};
+      uint32_t utf8_uint32 =
+          faster_tokenizer::utils::UnicodeToUTF8(content_char);
+      uint32_t utf8_char_count =
+          faster_tokenizer::utils::UnicodeToUTF8Char(utf8_uint32, dst_char);
+      result.append(dst_char, utf8_char_count);
+    }
+    content_utf8_len += content_char_width;
+  }
+  return result;
+}
+
+static std::ostream& PrintResult(std::ostream& os, const UIEResult& result,
+                                 int tab_size) {
+  constexpr int TAB_OFFSET = 4;
+  // Print text
+  for (int i = 0; i < tab_size; ++i) {
+    os << " ";
+  }
+  os << "text: " << result.text_ << "\n";
+
+  // Print probability
+  for (int i = 0; i < tab_size; ++i) {
+    os << " ";
+  }
+  os << "probability: " << result.probability_ << "\n";
+
+  if (result.start_ != 0 || result.end_ != 0) {
+    // Print start
+    for (int i = 0; i < tab_size; ++i) {
+      os << " ";
+    }
+    os << "start: " << result.start_ << "\n";
+
+    // Print end
+    for (int i = 0; i < tab_size; ++i) {
+      os << " ";
+    }
+    os << "end: " << result.end_ << "\n";
+  }
+
+  // Print relation
+  if (result.relation_.size() > 0) {
+    for (int i = 0; i < tab_size; ++i) {
+      os << " ";
+    }
+    os << "relation:\n";
+    for (auto&& curr_relation : result.relation_) {
+      for (int i = 0; i < tab_size + TAB_OFFSET; ++i) {
+        os << " ";
+      }
+      os << curr_relation.first << ":\n";
+      for (int i = 0; i < curr_relation.second.size(); ++i) {
+        PrintResult(os, curr_relation.second[i],
+                    tab_size + TAB_OFFSET + TAB_OFFSET);
+      }
+    }
+  }
+  os << "\n";
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const UIEResult& result) {
+  return PrintResult(os, result, 0);
+}
+
+std::ostream& operator<<(
+    std::ostream& os,
+    const std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>&
+        results) {
+  os << "The result:\n";
+  for (int i = 0; i < results.size(); ++i) {
+    for (auto&& curr_result : results[i]) {
+      os << curr_result.first << ": \n";
+      for (auto&& uie_result : curr_result.second) {
+        PrintResult(os, uie_result, 4);
+      }
+    }
+    os << std::endl;
+  }
+  return os;
+}
+
+void Schema::CreateRoot(const std::string& name) {
+  root_ = fastdeploy::utils::make_unique<SchemaNode>(name);
+}
+
+Schema::Schema(const std::string& schema, const std::string& name) {
+  CreateRoot(name);
+  root_->AddChild(schema);
+}
+
+Schema::Schema(const std::vector<std::string>& schema_list,
+               const std::string& name) {
+  CreateRoot(name);
+  for (const auto& schema : schema_list) {
+    root_->AddChild(schema);
+  }
+}
+
+Schema::Schema(
+    const std::unordered_map<std::string, std::vector<SchemaNode>>& schema_map,
+    const std::string& name) {
+  CreateRoot(name);
+  for (auto& schema_item : schema_map) {
+    root_->AddChild(schema_item.first, schema_item.second);
+  }
+}
+
+UIEModel::UIEModel(const std::string& model_file,
+                   const std::string& params_file,
+                   const std::string& vocab_file, float position_prob,
+                   size_t max_length, const std::vector<std::string>& schema,
+                   const fastdeploy::RuntimeOption& custom_option,
+                   const fastdeploy::Frontend& model_format)
+    : max_length_(max_length),
+      position_prob_(position_prob),
+      tokenizer_(vocab_file) {
+  runtime_option_ = custom_option;
+  runtime_option_.model_format = model_format;
+  runtime_option_.SetModelPath(model_file, params_file);
+  runtime_.Init(runtime_option_);
+  SetSchema(schema);
+  tokenizer_.EnableTruncMethod(
+      max_length, 0, faster_tokenizer::core::Direction::RIGHT,
+      faster_tokenizer::core::TruncStrategy::LONGEST_FIRST);
+}
+
+UIEModel::UIEModel(
+    const std::string& model_file, const std::string& params_file,
+    const std::string& vocab_file, float position_prob, size_t max_length,
+    const std::unordered_map<std::string, std::vector<SchemaNode>>& schema,
+    const fastdeploy::RuntimeOption& custom_option,
+    const fastdeploy::Frontend& model_format)
+    : max_length_(max_length),
+      position_prob_(position_prob),
+      tokenizer_(vocab_file) {
+  runtime_option_ = custom_option;
+  runtime_option_.model_format = model_format;
+  runtime_option_.SetModelPath(model_file, params_file);
+  runtime_.Init(runtime_option_);
+  SetSchema(schema);
+  tokenizer_.EnableTruncMethod(
+      max_length, 0, faster_tokenizer::core::Direction::RIGHT,
+      faster_tokenizer::core::TruncStrategy::LONGEST_FIRST);
+}
+
+void UIEModel::SetSchema(const std::vector<std::string>& schema) {
+  schema_ = fastdeploy::utils::make_unique<Schema>(schema);
+}
+
+void UIEModel::SetSchema(
+    const std::unordered_map<std::string, std::vector<SchemaNode>>& schema) {
+  schema_ = fastdeploy::utils::make_unique<Schema>(schema);
+}
+
+void UIEModel::AutoSplitter(
+    const std::vector<std::string>& texts, size_t max_length,
+    std::vector<std::string>* short_texts,
+    std::unordered_map<size_t, std::vector<size_t>>* input_mapping) {
+  size_t cnt_org = 0;
+  size_t cnt_short = 0;
+  for (auto& text : texts) {
+    auto text_len = faster_tokenizer::utils::GetUnicodeLenFromUTF8(
+        text.c_str(), text.length());
+    if (text_len <= max_length) {
+      short_texts->push_back(text);
+      if (input_mapping->count(cnt_org) == 0) {
+        (*input_mapping)[cnt_org] = {cnt_short};
+      } else {
+        (*input_mapping)[cnt_org].push_back(cnt_short);
+      }
+      cnt_short += 1;
+    } else {
+      faster_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(
+          text);
+      for (size_t start = 0; start < text_len; start += max_length) {
+        size_t end = start + max_length;
+        if (end > text_len) {
+          end = text_len;
+        }
+        faster_tokenizer::core::Offset byte_offset;
+        converter.convert({start, end}, &byte_offset);
+        short_texts->emplace_back(text.data() + byte_offset.first,
+                                  byte_offset.second - byte_offset.first);
+      }
+      auto short_idx = cnt_short;
+      cnt_short += text_len / max_length;
+      if (text_len % max_length != 0) {
+        ++cnt_short;
+      }
+      std::vector<size_t> temp_text_id(cnt_short - short_idx);
+      std::iota(temp_text_id.begin(), temp_text_id.end(), short_idx);
+      if (input_mapping->count(cnt_org) == 0) {
+        (*input_mapping)[cnt_org] = std::move(temp_text_id);
+      } else {
+        (*input_mapping)[cnt_org].insert((*input_mapping)[cnt_org].end(),
+                                         temp_text_id.begin(),
+                                         temp_text_id.end());
+      }
+    }
+    cnt_org += 1;
+  }
+}
+
+void UIEModel::GetCandidateIdx(
+    const float* probs, int64_t batch_size, int64_t seq_len,
+    std::vector<std::vector<std::pair<int64_t, float>>>* candidate_idx_prob,
+    float threshold) const {
+  for (int i = 0; i < batch_size; ++i) {
+    candidate_idx_prob->push_back({});
+    for (int j = 0; j < seq_len; ++j) {
+      if (probs[i * seq_len + j] > threshold) {
+        candidate_idx_prob->back().push_back({j, probs[i * seq_len + j]});
+      }
+    }
+  }
+}
+
+bool UIEModel::IdxProbCmp::operator()(
+    const std::pair<IDX_PROB, IDX_PROB>& lhs,
+    const std::pair<IDX_PROB, IDX_PROB>& rhs) const {
+  if (lhs.first.first == rhs.first.first) {
+    return lhs.second.first < rhs.second.first;
+  }
+  return lhs.first.first < rhs.first.first;
+}
+
+void UIEModel::GetSpan(const std::vector<IDX_PROB>& start_idx_prob,
+                       const std::vector<IDX_PROB>& end_idx_prob,
+                       SPAN_SET* span_set) const {
+  size_t start_pointer = 0;
+  size_t end_pointer = 0;
+  size_t len_start = start_idx_prob.size();
+  size_t len_end = end_idx_prob.size();
+  while (start_pointer < len_start && end_pointer < len_end) {
+    if (start_idx_prob[start_pointer].first ==
+        end_idx_prob[end_pointer].first) {
+      span_set->insert(std::make_pair(start_idx_prob[start_pointer],
+                                      end_idx_prob[end_pointer]));
+      ++start_pointer;
+      ++end_pointer;
+    } else if (start_idx_prob[start_pointer].first <
+               end_idx_prob[end_pointer].first) {
+      span_set->insert(std::make_pair(start_idx_prob[start_pointer],
+                                      end_idx_prob[end_pointer]));
+      ++start_pointer;
+    } else {
+      ++end_pointer;
+    }
+  }
+}
+void UIEModel::GetSpanIdxAndProbs(
+    const SPAN_SET& span_set,
+    const std::vector<faster_tokenizer::core::Offset>& offset_mapping,
+    std::vector<SpanIdx>* span_idxs, std::vector<float>* probs) const {
+  auto first_sep_idx =
+      std::find_if(offset_mapping.begin() + 1, offset_mapping.end(),
+                   [](const faster_tokenizer::core::Offset& offset) {
+                     return offset == faster_tokenizer::core::Offset(0, 0);
+                   });
+  auto prompt_end_token_id =
+      std::distance(offset_mapping.begin(), first_sep_idx) - 1;
+  for (auto&& span_item : span_set) {
+    probs->push_back(span_item.first.second * span_item.second.second);
+    auto start_id = offset_mapping[span_item.first.first].first;
+    auto end_id = offset_mapping[span_item.second.first].second;
+    bool is_prompt = span_item.second.first <= prompt_end_token_id &&
+                     span_item.second.first > 0;
+    span_idxs->push_back({{start_id, end_id}, is_prompt});
+  }
+}
+
+void UIEModel::ConvertSpanToUIEResult(
+    const std::vector<std::string>& texts,
+    const std::vector<std::string>& prompts,
+    const std::vector<std::vector<SpanIdx>>& span_idxs,
+    const std::vector<std::vector<float>>& probs,
+    std::vector<std::vector<UIEResult>>* results) const {
+  auto batch_size = texts.size();
+  for (int i = 0; i < batch_size; ++i) {
+    std::vector<UIEResult> result_list;
+    if (span_idxs[i].size() == 0) {
+      results->push_back({});
+      continue;
+    }
+    auto&& text = texts[i];
+    auto&& prompt = prompts[i];
+    for (int j = 0; j < span_idxs[i].size(); ++j) {
+      auto start = span_idxs[i][j].offset_.first;
+      auto end = span_idxs[i][j].offset_.second;
+      std::string span_text;
+      std::vector<uint32_t> offset_mapping;
+      if (span_idxs[i][j].is_prompt_) {
+        faster_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(
+            prompt);
+        faster_tokenizer::core::Offset byte_offset;
+        converter.convert({start, end}, &byte_offset);
+        span_text = prompt.substr(byte_offset.first,
+                                  byte_offset.second - byte_offset.first);
+        // Indicate cls task
+        start = 0;
+        end = 0;
+      } else {
+        faster_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(
+            text);
+        faster_tokenizer::core::Offset byte_offset;
+        converter.convert({start, end}, &byte_offset);
+        span_text = text.substr(byte_offset.first,
+                                byte_offset.second - byte_offset.first);
+      }
+      result_list.emplace_back(start, end, probs[i][j], span_text);
+    }
+    results->push_back(result_list);
+  }
+}
+
+void UIEModel::AutoJoiner(
+    const std::vector<std::string>& short_texts,
+    const std::unordered_map<size_t, std::vector<size_t>>& input_mapping,
+    std::vector<std::vector<UIEResult>>* results) {
+  bool is_cls_task = false;
+  // 1. Detect if it's a cls task
+  for (auto&& short_result : *results) {
+    if (short_result.size() == 0) {
+      continue;
+    } else if (short_result[0].start_ == 0 && short_result[0].end_ == 0) {
+      is_cls_task = true;
+      break;
+    } else {
+      break;
+    }
+  }
+  // 2. Get the final result
+  std::vector<std::vector<UIEResult>> final_result;
+  if (is_cls_task) {
+    for (auto&& input_mapping_item : input_mapping) {
+      auto curr_mapping = input_mapping_item.second;
+      std::unordered_map<std::string, std::pair<int, float>> cls_options;
+      for (auto&& result_idx : curr_mapping) {
+        if ((*results)[result_idx].size() == 0) {
+          continue;
+        }
+        auto&& text = (*results)[result_idx].front().text_;
+        auto&& probability = (*results)[result_idx].front().probability_;
+        if (cls_options.count(text) == 0) {
+          cls_options[text] = std::make_pair(1, probability);
+        } else {
+          cls_options[text].first += 1;
+          cls_options[text].second += probability;
+        }
+      }
+      std::vector<UIEResult> result_list;
+      if (cls_options.size() > 0) {
+        auto max_iter = std::max_element(
+            cls_options.begin(), cls_options.end(),
+            [](const std::pair<std::string, std::pair<int, float>>& lhs,
+               const std::pair<std::string, std::pair<int, float>>& rhs) {
+              return lhs.second.second < rhs.second.second;
+            });
+        result_list.emplace_back(
+            0, 0, max_iter->second.second / max_iter->second.first,
+            max_iter->first);
+      }
+      final_result.push_back(result_list);
+    }
+  } else {
+    for (auto&& input_mapping_item : input_mapping) {
+      auto curr_mapping = input_mapping_item.second;
+      size_t offset = 0;
+      std::vector<UIEResult> result_list;
+      for (auto&& result_idx : curr_mapping) {
+        if (result_idx == 0) {
+          result_list = std::move((*results)[result_idx]);
+          offset += faster_tokenizer::utils::GetUnicodeLenFromUTF8(
+              short_texts[result_idx].c_str(), short_texts[result_idx].size());
+        } else {
+          for (auto&& curr_result : (*results)[result_idx]) {
+            curr_result.start_ += offset;
+            curr_result.end_ += offset;
+          }
+          offset += faster_tokenizer::utils::GetUnicodeLenFromUTF8(
+              short_texts[result_idx].c_str(), short_texts[result_idx].size());
+          result_list.insert(result_list.end(), (*results)[result_idx].begin(),
+                             (*results)[result_idx].end());
+        }
+      }
+      final_result.push_back(result_list);
+    }
+  }
+  *results = std::move(final_result);
+}
+
+void UIEModel::PredictUIEInput(const std::vector<std::string>& input_texts,
+                               const std::vector<std::string>& prompts,
+                               std::vector<std::vector<UIEResult>>* results) {
+  // 1. Shortten the input texts and prompts
+  auto max_prompt_iter = std::max_element(
+      prompts.begin(), prompts.end(),
+      [](const std::string& lhs, const std::string& rhs) {
+        auto lhs_ulen = faster_tokenizer::utils::GetUnicodeLenFromUTF8(
+            lhs.c_str(), lhs.length());
+        auto rhs_ulen = faster_tokenizer::utils::GetUnicodeLenFromUTF8(
+            rhs.c_str(), rhs.length());
+        return lhs_ulen < rhs_ulen;
+      });
+
+  auto max_prompt_len = faster_tokenizer::utils::GetUnicodeLenFromUTF8(
+      max_prompt_iter->c_str(), max_prompt_iter->length());
+  auto max_predict_len = max_length_ - 3 - max_prompt_len;
+
+  std::vector<std::string> short_texts;
+  std::unordered_map<size_t, std::vector<size_t>> input_mapping;
+  AutoSplitter(input_texts, max_predict_len, &short_texts, &input_mapping);
+
+  std::vector<std::string> short_texts_prompts;
+  for (auto& item : input_mapping) {
+    short_texts_prompts.insert(short_texts_prompts.end(), item.second.size(),
+                               prompts[item.first]);
+  }
+  std::vector<faster_tokenizer::core::EncodeInput> text_pair_input;
+  for (int i = 0; i < short_texts.size(); ++i) {
+    text_pair_input.emplace_back(std::pair<std::string, std::string>(
+        short_texts_prompts[i], short_texts[i]));
+  }
+
+  // 2. Tokenize the short texts and short prompts
+  std::vector<faster_tokenizer::core::Encoding> encodings;
+  tokenizer_.EncodeBatchStrings(text_pair_input, &encodings);
+  // 3. Construct the input vector tensor
+  // 3.1 Convert encodings to input_ids, token_type_ids, position_ids, attn_mask
+  std::vector<int64_t> input_ids, token_type_ids, position_ids, attn_mask;
+  std::vector<std::vector<faster_tokenizer::core::Offset>> offset_mapping;
+  for (int i = 0; i < encodings.size(); ++i) {
+    auto&& curr_input_ids = encodings[i].GetIds();
+    auto&& curr_type_ids = encodings[i].GetTypeIds();
+    auto&& curr_attn_mask = encodings[i].GetAttentionMask();
+    auto&& curr_offsets = encodings[i].GetOffsets();
+    input_ids.insert(input_ids.end(), curr_input_ids.begin(),
+                     curr_input_ids.end());
+    token_type_ids.insert(token_type_ids.end(), curr_type_ids.begin(),
+                          curr_type_ids.end());
+    attn_mask.insert(attn_mask.end(), curr_attn_mask.begin(),
+                     curr_attn_mask.end());
+    offset_mapping.push_back(curr_offsets);
+    std::vector<int64_t> curr_position_ids(curr_input_ids.size());
+    std::iota(curr_position_ids.begin(), curr_position_ids.end(), 0);
+    position_ids.insert(position_ids.end(), curr_position_ids.begin(),
+                        curr_position_ids.end());
+  }
+
+  // 3.2 Set data to input vector
+  int64_t batch_size = short_texts.size();
+  int64_t seq_len = input_ids.size() / batch_size;
+  std::vector<fastdeploy::FDTensor> inputs(runtime_.NumInputs());
+  int64_t* inputs_ptrs[] = {input_ids.data(), token_type_ids.data(),
+                            position_ids.data(), attn_mask.data()};
+  for (int i = 0; i < runtime_.NumInputs(); ++i) {
+    inputs[i].SetExternalData({batch_size, seq_len},
+                              fastdeploy::FDDataType::INT64, inputs_ptrs[i]);
+    inputs[i].name = runtime_.GetInputInfo(i).name;
+  }
+
+  std::vector<fastdeploy::FDTensor> outputs(runtime_.NumOutputs());
+  // 4. Infer
+  runtime_.Infer(inputs, &outputs);
+  auto* start_prob = reinterpret_cast<float*>(outputs[0].Data());
+  auto* end_prob = reinterpret_cast<float*>(outputs[1].Data());
+
+  // 5. Postprocess
+  std::vector<std::vector<std::pair<int64_t, float>>> start_candidate_idx_prob,
+      end_candidate_idx_prob;
+  GetCandidateIdx(start_prob, outputs[0].shape[0], outputs[0].shape[1],
+                  &start_candidate_idx_prob, position_prob_);
+  GetCandidateIdx(end_prob, outputs[1].shape[0], outputs[1].shape[1],
+                  &end_candidate_idx_prob, position_prob_);
+  SPAN_SET span_set;
+  std::vector<std::vector<float>> probs(batch_size);
+  std::vector<std::vector<SpanIdx>> span_idxs(batch_size);
+  for (int i = 0; i < batch_size; ++i) {
+    GetSpan(start_candidate_idx_prob[i], end_candidate_idx_prob[i], &span_set);
+    GetSpanIdxAndProbs(span_set, offset_mapping[i], &span_idxs[i], &probs[i]);
+    span_set.clear();
+  }
+  ConvertSpanToUIEResult(short_texts, short_texts_prompts, span_idxs, probs,
+                         results);
+  AutoJoiner(short_texts, input_mapping, results);
+}
+
+void UIEModel::Predict(
+    const std::vector<std::string>& texts,
+    std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>*
+        results) {
+  std::queue<SchemaNode> nodes;
+  for (auto& node : schema_->root_->children_) {
+    nodes.push(node);
+  }
+  results->resize(texts.size());
+  while (!nodes.empty()) {
+    auto node = nodes.front();
+    nodes.pop();
+    std::vector<std::vector<size_t>> input_mapping;
+    size_t idx = 0;
+    std::vector<std::string> input_texts;
+    std::vector<std::string> prompts;
+    // 1. Construct input data from raw text
+    if (node.prefix_.empty()) {
+      for (int i = 0; i < texts.size(); ++i) {
+        input_texts.push_back(texts[i]);
+        prompts.push_back(DBC2SBC(node.name_));
+        input_mapping.push_back({idx});
+        idx += 1;
+      }
+    } else {
+      for (int i = 0; i < texts.size(); ++i) {
+        if (node.prefix_[i].size() == 0) {
+          input_mapping.push_back({});
+        } else {
+          for (auto&& pre : node.prefix_[i]) {
+            input_texts.push_back(texts[i]);
+            prompts.push_back(DBC2SBC(pre + node.name_));
+          }
+          auto prefix_len = node.prefix_[i].size();
+          input_mapping.push_back({});
+          input_mapping.back().resize(prefix_len);
+          std::iota(input_mapping.back().begin(), input_mapping.back().end(),
+                    idx);
+          idx += prefix_len;
+        }
+      }
+    }
+
+    // 2. Predict from UIEInput
+    std::vector<std::vector<UIEResult>> results_list;
+    PredictUIEInput(input_texts, prompts, &results_list);
+    // 3. Postprocess
+    std::vector<std::vector<UIEResult*>> relations;
+    relations.resize(texts.size());
+    if (node.relations_.size() == 0) {
+      for (int i = 0; i < input_mapping.size(); ++i) {
+        auto&& input_mapping_item = input_mapping[i];
+        auto& curr_result = (*results)[i];
+        for (auto&& idx : input_mapping_item) {
+          if (results_list[idx].size() == 0) {
+            continue;
+          }
+          if (curr_result.count(node.name_) == 0) {
+            curr_result[node.name_] = results_list[idx];
+          } else {
+            curr_result[node.name_].insert(curr_result[node.name_].end(),
+                                           results_list[idx].begin(),
+                                           results_list[idx].end());
+          }
+        }
+        if (curr_result.count(node.name_) > 0) {
+          for (auto&& curr_result_ref : curr_result[node.name_]) {
+            relations[i].push_back(&curr_result_ref);
+          }
+        }
+      }
+    } else {
+      auto& new_relations = node.relations_;
+      for (int i = 0; i < input_mapping.size(); ++i) {
+        auto&& input_mapping_item = input_mapping[i];
+        for (int j = 0; j < input_mapping_item.size(); ++j) {
+          auto idx = input_mapping_item[j];
+          if (results_list[idx].size() == 0) {
+            continue;
+          }
+          if (new_relations[i][j]->relation_.count(node.name_) == 0) {
+            new_relations[i][j]->relation_[node.name_] = results_list[idx];
+          } else {
+            auto& curr_result = new_relations[i][j]->relation_[node.name_];
+            curr_result.insert(curr_result.end(), results_list[idx].begin(),
+                               results_list[idx].end());
+          }
+        }
+      }
+      for (int i = 0; i < new_relations.size(); ++i) {
+        for (int j = 0; j < new_relations[i].size(); ++j) {
+          if (new_relations[i][j]->relation_.count(node.name_)) {
+            auto& curr_relation = new_relations[i][j]->relation_[node.name_];
+            for (auto&& curr_result_ref : curr_relation) {
+              relations[i].push_back(&curr_result_ref);
+            }
+          }
+        }
+      }
+    }
+    std::vector<std::vector<std::string>> prefix(texts.size());
+    for (int i = 0; i < input_mapping.size(); ++i) {
+      auto&& input_mapping_item = input_mapping[i];
+      for (auto&& idx : input_mapping_item) {
+        for (int j = 0; j < results_list[idx].size(); ++j) {
+          auto prefix_str = results_list[idx][j].text_ + "\xe7\x9a\x84";
+          prefix[i].push_back(prefix_str);
+        }
+      }
+    }
+    for (auto& node_child : node.children_) {
+      node_child.relations_ = relations;
+      node_child.prefix_ = prefix;
+      nodes.push(node_child);
+    }
+  }
+}
diff --git a/examples/text/uie/cpp/uie.h b/examples/text/uie/cpp/uie.h
new file mode 100644
index 0000000000..5894ae1d63
--- /dev/null
+++ b/examples/text/uie/cpp/uie.h
@@ -0,0 +1,156 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <ostream>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include "fastdeploy/fastdeploy_model.h"
+#include "fastdeploy/utils/unique_ptr.h"
+#include "faster_tokenizer/tokenizers/ernie_faster_tokenizer.h"
+
+using namespace paddlenlp;
+
+struct UIEResult {
+  size_t start_;
+  size_t end_;
+  double probability_;
+  std::string text_;
+  std::unordered_map<std::string, std::vector<UIEResult>> relation_;
+  UIEResult() = default;
+  UIEResult(size_t start, size_t end, double probability, std::string text)
+      : start_(start), end_(end), probability_(probability), text_(text) {}
+};
+
+std::ostream& operator<<(std::ostream& os, const UIEResult& result);
+std::ostream& operator<<(
+    std::ostream& os,
+    const std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>&
+        results);
+
+struct SchemaNode {
+  std::string name_;
+  std::vector<std::vector<std::string>> prefix_;
+  std::vector<std::vector<UIEResult*>> relations_;
+  std::vector<SchemaNode> children_;
+
+  explicit SchemaNode(const std::string& name,
+                      const std::vector<SchemaNode>& children = {})
+      : name_(name), children_(children) {}
+  void AddChild(const std::string& schema) { children_.emplace_back(schema); }
+  void AddChild(const SchemaNode& schema) { children_.push_back(schema); }
+  void AddChild(const std::string& schema,
+                const std::vector<std::string>& children) {
+    SchemaNode schema_node(schema);
+    for (auto& child : children) {
+      schema_node.children_.emplace_back(child);
+    }
+    children_.emplace_back(schema_node);
+  }
+  void AddChild(const std::string& schema,
+                const std::vector<SchemaNode>& children) {
+    SchemaNode schema_node(schema);
+    schema_node.children_ = children;
+    children_.emplace_back(schema_node);
+  }
+};
+
+struct Schema {
+  explicit Schema(const std::string& schema, const std::string& name = "root");
+  explicit Schema(const std::vector<std::string>& schema_list,
+                  const std::string& name = "root");
+  explicit Schema(const std::unordered_map<std::string,
+                                           std::vector<SchemaNode>>& schema_map,
+                  const std::string& name = "root");
+
+ private:
+  void CreateRoot(const std::string& name);
+  std::unique_ptr<SchemaNode> root_;
+  friend class UIEModel;
+};
+
+struct UIEModel {
+ public:
+  UIEModel(
+      const std::string& model_file, const std::string& params_file,
+      const std::string& vocab_file, float position_prob, size_t max_length,
+      const std::vector<std::string>& schema,
+      const fastdeploy::RuntimeOption& custom_option =
+          fastdeploy::RuntimeOption(),
+      const fastdeploy::Frontend& model_format = fastdeploy::Frontend::PADDLE);
+  UIEModel(
+      const std::string& model_file, const std::string& params_file,
+      const std::string& vocab_file, float position_prob, size_t max_length,
+      const std::unordered_map<std::string, std::vector<SchemaNode>>& schema,
+      const fastdeploy::RuntimeOption& custom_option =
+          fastdeploy::RuntimeOption(),
+      const fastdeploy::Frontend& model_format = fastdeploy::Frontend::PADDLE);
+  void SetSchema(const std::vector<std::string>& schema);
+  void SetSchema(
+      const std::unordered_map<std::string, std::vector<SchemaNode>>& schema);
+
+  void PredictUIEInput(const std::vector<std::string>& input_texts,
+                       const std::vector<std::string>& prompts,
+                       std::vector<std::vector<UIEResult>>* results);
+  void Predict(
+      const std::vector<std::string>& texts,
+      std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>*
+          results);
+
+ private:
+  using IDX_PROB = std::pair<int64_t, float>;
+  struct IdxProbCmp {
+    bool operator()(const std::pair<IDX_PROB, IDX_PROB>& lhs,
+                    const std::pair<IDX_PROB, IDX_PROB>& rhs) const;
+  };
+  using SPAN_SET = std::set<std::pair<IDX_PROB, IDX_PROB>, IdxProbCmp>;
+  struct SpanIdx {
+    faster_tokenizer::core::Offset offset_;
+    bool is_prompt_;
+  };
+  void AutoSplitter(
+      const std::vector<std::string>& texts, size_t max_length,
+      std::vector<std::string>* short_texts,
+      std::unordered_map<size_t, std::vector<size_t>>* input_mapping);
+  void AutoJoiner(
+      const std::vector<std::string>& short_texts,
+      const std::unordered_map<size_t, std::vector<size_t>>& input_mapping,
+      std::vector<std::vector<UIEResult>>* results);
+  // Get idx of the last dimension in probability arrays, which is greater than
+  // a limitation.
+  void GetCandidateIdx(const float* probs, int64_t batch_size, int64_t seq_len,
+                       std::vector<std::vector<IDX_PROB>>* candidate_idx_prob,
+                       float threshold = 0.5) const;
+  void GetSpan(const std::vector<IDX_PROB>& start_idx_prob,
+               const std::vector<IDX_PROB>& end_idx_prob,
+               SPAN_SET* span_set) const;
+  void GetSpanIdxAndProbs(
+      const SPAN_SET& span_set,
+      const std::vector<faster_tokenizer::core::Offset>& offset_mapping,
+      std::vector<SpanIdx>* span_idxs, std::vector<float>* probs) const;
+  void ConvertSpanToUIEResult(
+      const std::vector<std::string>& texts,
+      const std::vector<std::string>& prompts,
+      const std::vector<std::vector<SpanIdx>>& span_idxs,
+      const std::vector<std::vector<float>>& probs,
+      std::vector<std::vector<UIEResult>>* results) const;
+  fastdeploy::RuntimeOption runtime_option_;
+  fastdeploy::Runtime runtime_;
+  std::unique_ptr<Schema> schema_;
+  size_t max_length_;
+  float position_prob_;
+  faster_tokenizer::tokenizers_impl::ErnieFasterTokenizer tokenizer_;
+};
diff --git a/examples/vision/detection/nanodet_plus/cpp/README.md b/examples/vision/detection/nanodet_plus/cpp/README.md
index bb0a8dc811..71a37a92b4 100644
--- a/examples/vision/detection/nanodet_plus/cpp/README.md
+++ b/examples/vision/detection/nanodet_plus/cpp/README.md
@@ -12,7 +12,7 @@
 ```bash
 mkdir build
 cd build
-wget https://https://bj.bcebos.com/paddlehub/fastdeploy/cpp/fastdeploy-linux-x64-gpu-0.2.0.tgz
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-0.2.0.tgz
 tar xvf fastdeploy-linux-x64-0.2.0.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-0.2.0
 make -j
diff --git a/examples/vision/detection/scaledyolov4/cpp/README.md b/examples/vision/detection/scaledyolov4/cpp/README.md
index 04325f822a..f70177111e 100644
--- a/examples/vision/detection/scaledyolov4/cpp/README.md
+++ b/examples/vision/detection/scaledyolov4/cpp/README.md
@@ -12,7 +12,7 @@
 ```bash
 mkdir build
 cd build
-wget https://https://bj.bcebos.com/paddlehub/fastdeploy/cpp/fastdeploy-linux-x64-gpu-0.2.0.tgz
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-0.2.0.tgz
 tar xvf fastdeploy-linux-x64-0.2.0.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-0.2.0
 make -j
diff --git a/examples/vision/detection/yolor/cpp/README.md b/examples/vision/detection/yolor/cpp/README.md
index 2cf9a47fa6..100ba35c38 100644
--- a/examples/vision/detection/yolor/cpp/README.md
+++ b/examples/vision/detection/yolor/cpp/README.md
@@ -12,7 +12,7 @@
 ```bash
 mkdir build
 cd build
-wget https://https://bj.bcebos.com/paddlehub/fastdeploy/cpp/fastdeploy-linux-x64-gpu-0.2.0.tgz
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-0.2.0.tgz
 tar xvf fastdeploy-linux-x64-0.2.0.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-0.2.0
 make -j
diff --git a/examples/vision/detection/yolov5/cpp/README.md b/examples/vision/detection/yolov5/cpp/README.md
index 7a6e55335e..a1b3d0c328 100644
--- a/examples/vision/detection/yolov5/cpp/README.md
+++ b/examples/vision/detection/yolov5/cpp/README.md
@@ -12,7 +12,7 @@
 ```bash
 mkdir build
 cd build
-wget https://https://bj.bcebos.com/paddlehub/fastdeploy/cpp/fastdeploy-linux-x64-gpu-0.2.0.tgz
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-0.2.0.tgz
 tar xvf fastdeploy-linux-x64-0.2.0.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-0.2.0
 make -j
diff --git a/examples/vision/detection/yolov5lite/cpp/README.md b/examples/vision/detection/yolov5lite/cpp/README.md
index f548cb1bd1..b5e787b02d 100644
--- a/examples/vision/detection/yolov5lite/cpp/README.md
+++ b/examples/vision/detection/yolov5lite/cpp/README.md
@@ -12,7 +12,7 @@
 ```bash
 mkdir build
 cd build
-wget https://https://bj.bcebos.com/paddlehub/fastdeploy/cpp/fastdeploy-linux-x64-gpu-0.2.0.tgz
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-0.2.0.tgz
 tar xvf fastdeploy-linux-x64-0.2.0.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-0.2.0
 make -j
diff --git a/examples/vision/detection/yolov6/cpp/README.md b/examples/vision/detection/yolov6/cpp/README.md
index ceeb286b89..e7b72b6367 100644
--- a/examples/vision/detection/yolov6/cpp/README.md
+++ b/examples/vision/detection/yolov6/cpp/README.md
@@ -12,7 +12,7 @@
 ```bash
 mkdir build
 cd build
-wget https://https://bj.bcebos.com/paddlehub/fastdeploy/cpp/fastdeploy-linux-x64-gpu-0.2.0.tgz
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-0.2.0.tgz
 tar xvf fastdeploy-linux-x64-0.2.0.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-0.2.0
 make -j
diff --git a/examples/vision/detection/yolov7/cpp/README.md b/examples/vision/detection/yolov7/cpp/README.md
index a4f4232e7c..5fbc81e88d 100644
--- a/examples/vision/detection/yolov7/cpp/README.md
+++ b/examples/vision/detection/yolov7/cpp/README.md
@@ -12,7 +12,7 @@
 ```bash
 mkdir build
 cd build
-wget https://https://bj.bcebos.com/paddlehub/fastdeploy/cpp/fastdeploy-linux-x64-gpu-0.2.0.tgz
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-0.2.0.tgz
 tar xvf fastdeploy-linux-x64-0.2.0.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-0.2.0
 make -j
diff --git a/examples/vision/detection/yolox/cpp/README.md b/examples/vision/detection/yolox/cpp/README.md
index 0f8a0e623a..f5d83efc2a 100644
--- a/examples/vision/detection/yolox/cpp/README.md
+++ b/examples/vision/detection/yolox/cpp/README.md
@@ -12,7 +12,7 @@
 ```bash
 mkdir build
 cd build
-wget https://https://bj.bcebos.com/paddlehub/fastdeploy/cpp/fastdeploy-linux-x64-gpu-0.2.0.tgz
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-0.2.0.tgz
 tar xvf fastdeploy-linux-x64-0.2.0.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-0.2.0
 make -j
diff --git a/examples/vision/facedet/retinaface/cpp/README.md b/examples/vision/facedet/retinaface/cpp/README.md
index b501ae4dd4..87dfe12d3b 100644
--- a/examples/vision/facedet/retinaface/cpp/README.md
+++ b/examples/vision/facedet/retinaface/cpp/README.md
@@ -12,7 +12,7 @@
 ```bash
 mkdir build
 cd build
-wget https://https://bj.bcebos.com/paddlehub/fastdeploy/cpp/fastdeploy-linux-x64-gpu-0.2.0.tgz
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-0.2.0.tgz
 tar xvf fastdeploy-linux-x64-0.2.0.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-0.2.0
 make -j
diff --git a/examples/vision/facedet/scrfd/cpp/README.md b/examples/vision/facedet/scrfd/cpp/README.md
index 3d129470b1..c35f6328af 100644
--- a/examples/vision/facedet/scrfd/cpp/README.md
+++ b/examples/vision/facedet/scrfd/cpp/README.md
@@ -12,7 +12,7 @@
 ```bash
 mkdir build
 cd build
-wget https://https://bj.bcebos.com/paddlehub/fastdeploy/cpp/fastdeploy-linux-x64-gpu-0.2.0.tgz
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-0.2.0.tgz
 tar xvf fastdeploy-linux-x64-0.2.0.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-0.2.0
 make -j
diff --git a/examples/vision/facedet/ultraface/cpp/README.md b/examples/vision/facedet/ultraface/cpp/README.md
index 3189c3f0ba..f3fd55b39e 100644
--- a/examples/vision/facedet/ultraface/cpp/README.md
+++ b/examples/vision/facedet/ultraface/cpp/README.md
@@ -12,7 +12,7 @@
 ```bash
 mkdir build
 cd build
-wget https://https://bj.bcebos.com/paddlehub/fastdeploy/cpp/fastdeploy-linux-x64-gpu-0.2.0.tgz
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-0.2.0.tgz
 tar xvf fastdeploy-linux-x64-0.2.0.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-0.2.0
 make -j
diff --git a/examples/vision/faceid/insightface/cpp/README.md b/examples/vision/faceid/insightface/cpp/README.md
index 547c527a3c..57d298653b 100644
--- a/examples/vision/faceid/insightface/cpp/README.md
+++ b/examples/vision/faceid/insightface/cpp/README.md
@@ -12,7 +12,7 @@
 ```bash
 mkdir build
 cd build
-wget https://https://bj.bcebos.com/paddlehub/fastdeploy/cpp/fastdeploy-linux-x64-gpu-0.2.0.tgz
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-0.2.0.tgz
 tar xvf fastdeploy-linux-x64-0.2.0.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-0.2.0
 make -j
diff --git a/examples/vision/matting/modnet/cpp/README.md b/examples/vision/matting/modnet/cpp/README.md
index 18c4fc94ed..838af56de3 100644
--- a/examples/vision/matting/modnet/cpp/README.md
+++ b/examples/vision/matting/modnet/cpp/README.md
@@ -12,7 +12,7 @@
 ```bash
 mkdir build
 cd build
-wget https://https://bj.bcebos.com/paddlehub/fastdeploy/cpp/fastdeploy-linux-x64-gpu-0.2.0.tgz
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-0.2.0.tgz
 tar xvf fastdeploy-linux-x64-0.2.0.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-0.2.0
 make -j
diff --git a/examples/vision/segmentation/paddleseg/cpp/README.md b/examples/vision/segmentation/paddleseg/cpp/README.md
index 0ecf54daee..23529782f8 100644
--- a/examples/vision/segmentation/paddleseg/cpp/README.md
+++ b/examples/vision/segmentation/paddleseg/cpp/README.md
@@ -25,11 +25,11 @@ wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
 
 
 # CPU推理
-./infer_demo Unet_cityscapes_without_argmax_infer infer.cc cityscapes_demo.png 0
+./infer_demo Unet_cityscapes_without_argmax_infer Unet_cityscapes_without_argmax_infer cityscapes_demo.png 0
 # GPU推理
-./infer_demo Unet_cityscapes_without_argmax_infer infer.cc cityscapes_demo.png 1
+./infer_demo Unet_cityscapes_without_argmax_infer Unet_cityscapes_without_argmax_infer cityscapes_demo.png 1
 # GPU上TensorRT推理
-./infer_demo Unet_cityscapes_without_argmax_infer infer.cc cityscapes_demo.png 2
+./infer_demo Unet_cityscapes_without_argmax_infer Unet_cityscapes_without_argmax_infer cityscapes_demo.png 2
 ```
 
 运行完成可视化结果如下图所示
diff --git a/external/faster_tokenizer.cmake b/external/faster_tokenizer.cmake
index fabc33db58..469280b86f 100644
--- a/external/faster_tokenizer.cmake
+++ b/external/faster_tokenizer.cmake
@@ -23,10 +23,14 @@ set(FASTERTOKENIZER_INSTALL_DIR ${THIRD_PARTY_PATH}/install/faster_tokenizer)
 set(FASTERTOKENIZER_INC_DIR
     "${FASTERTOKENIZER_INSTALL_DIR}/include"
     "${FASTERTOKENIZER_INSTALL_DIR}/third_party/include"
+    "${FASTERTOKENIZER_INSTALL_DIR}/third_party/include/faster_tokenizer" # TODO (zhoushunjie): Will remove it later.
     CACHE PATH "faster_tokenizer include directory." FORCE)
 set(FASTERTOKENIZER_LIB_DIR
     "${FASTERTOKENIZER_INSTALL_DIR}/lib/"
     CACHE PATH "faster_tokenizer lib directory." FORCE)
+set(FASTERTOKENIZER_THIRD_LIB_DIR
+    "${FASTERTOKENIZER_INSTALL_DIR}/third_party/lib/"
+    CACHE PATH "faster_tokenizer lib directory." FORCE)
 set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}"
                       "${FASTERTOKENIZER_LIB_DIR}")
 
@@ -34,15 +38,20 @@ include_directories(${FASTERTOKENIZER_INC_DIR})
 
 # Set lib path
 if(WIN32)
+set(FASTERTOKENIZER_COMPILE_LIB "${FASTERTOKENIZER_LIB_DIR}/core_tokenizers.lib"
+    CACHE FILEPATH "faster_tokenizer compile library." FORCE)
+message("FASTERTOKENIZER_COMPILE_LIB = ${FASTERTOKENIZER_COMPILE_LIB}")
+set(ICUDT_LIB "${FASTERTOKENIZER_THIRD_LIB_DIR}/icudt.lib")
+set(ICUUC_LIB "${FASTERTOKENIZER_THIRD_LIB_DIR}/icuuc.lib")
+
 elseif(APPLE)
-# Not support apple so far.
+set(FASTERTOKENIZER_COMPILE_LIB "${FASTERTOKENIZER_LIB_DIR}/libcore_tokenizers.dylib"
+    CACHE FILEPATH "faster_tokenizer compile library." FORCE)
 else()
 
 set(FASTERTOKENIZER_COMPILE_LIB "${FASTERTOKENIZER_LIB_DIR}/libcore_tokenizers.so"
     CACHE FILEPATH "faster_tokenizer compile library." FORCE)
 message("FASTERTOKENIZER_COMPILE_LIB = ${FASTERTOKENIZER_COMPILE_LIB}")
-set(ICUDT_LIB "")
-set(ICUUC_LIB "")
 endif(WIN32)
 
 set(FASTERTOKENIZER_URL_BASE "https://bj.bcebos.com/paddlenlp/faster_tokenizer/")
@@ -50,7 +59,16 @@ set(FASTERTOKENIZER_VERSION "dev")
 
 # Set download url
 if(WIN32)
+  set(FASTERTOKENIZER_FILE "faster_tokenizer-win-x64-${FASTERTOKENIZER_VERSION}.zip")
+  if(NOT CMAKE_CL_64)
+    set(FASTERTOKENIZER_FILE "faster_tokenizer-win-x86-${FASTERTOKENIZER_VERSION}.zip")
+  endif()
 elseif(APPLE)
+  if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
+    set(FASTERTOKENIZER_FILE "faster_tokenizer-osx-arm64-${FASTERTOKENIZER_VERSION}.tgz")
+  else()
+    set(FASTERTOKENIZER_FILE "faster_tokenizer-osx-x86_64-${FASTERTOKENIZER_VERSION}.tgz")
+  endif()
 else()
   if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
     set(FASTERTOKENIZER_FILE "faster_tokenizer-linux-aarch64-${FASTERTOKENIZER_VERSION}.tgz")
@@ -76,4 +94,16 @@ ExternalProject_Add(
 add_library(faster_tokenizer STATIC IMPORTED GLOBAL)
 set_property(TARGET faster_tokenizer PROPERTY IMPORTED_LOCATION ${FASTERTOKENIZER_COMPILE_LIB})
 add_dependencies(faster_tokenizer ${FASTERTOKENIZER_PROJECT})
-list(APPEND DEPEND_LIBS faster_tokenizer)
\ No newline at end of file
+list(APPEND DEPEND_LIBS faster_tokenizer)
+
+if (WIN32)
+  add_library(icudt STATIC IMPORTED GLOBAL)
+  set_property(TARGET icudt PROPERTY IMPORTED_LOCATION ${ICUDT_LIB})
+  add_dependencies(icudt ${FASTERTOKENIZER_PROJECT})
+  list(APPEND DEPEND_LIBS icudt)
+
+  add_library(icuuc STATIC IMPORTED GLOBAL)
+  set_property(TARGET icuuc PROPERTY IMPORTED_LOCATION ${ICUUC_LIB})
+  add_dependencies(icuuc ${FASTERTOKENIZER_PROJECT})
+  list(APPEND DEPEND_LIBS icuuc)
+endif()
\ No newline at end of file
diff --git a/external/openvino.cmake b/external/openvino.cmake
new file mode 100644
index 0000000000..db9b0b105b
--- /dev/null
+++ b/external/openvino.cmake
@@ -0,0 +1,91 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+include(ExternalProject)
+
+set(OPENVINO_PROJECT "extern_openvino")
+set(OPENVINO_PREFIX_DIR ${THIRD_PARTY_PATH}/openvino)
+set(OPENVINO_SOURCE_DIR
+    ${THIRD_PARTY_PATH}/openvino/src/${OPENVINO_PROJECT})
+set(OPENVINO_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openvino)
+set(OPENVINO_INC_DIR
+    "${OPENVINO_INSTALL_DIR}/include"
+    CACHE PATH "openvino include directory." FORCE)
+set(OPENVINO_LIB_DIR
+    "${OPENVINO_INSTALL_DIR}/lib/"
+    CACHE PATH "openvino lib directory." FORCE)
+set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${OPENVINO_LIB_DIR}")
+
+set(OPENVINO_VERSION "2022.3.0")
+set(OPENVINO_URL_PREFIX "https://bj.bcebos.com/fastdeploy/third_libs/")
+
+if(WIN32)
+  message(FATAL_ERROR "FastDeploy cannot ENABLE_OPENVINO_BACKEND in windows now.")
+  set(OPENVINO_FILENAME "openvino-win-x64-${OPENVINO_VERSION}.zip")
+  if(NOT CMAKE_CL_64)
+    message(FATAL_ERROR "FastDeploy cannot ENABLE_OPENVINO_BACKEND in win32 now.")
+  endif()
+elseif(APPLE)
+  message(FATAL_ERROR "FastDeploy cannot ENABLE_OPENVINO_BACKEND in Mac OSX now.")
+  if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
+    set(OPENVINO_FILENAME "openvino-osx-arm64-${OPENVINO_VERSION}.tgz")
+  else()
+    set(OPENVINO_FILENAME "openvino-osx-x86_64-${OPENVINO_VERSION}.tgz")
+  endif()
+else()
+  if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+    message("Cannot compile with openvino while in linux-aarch64 platform")
+  else()
+    set(OPENVINO_FILENAME "openvino-linux-x64-${OPENVINO_VERSION}.tgz")
+  endif()
+endif()
+set(OPENVINO_URL "${OPENVINO_URL_PREFIX}${OPENVINO_FILENAME}")
+
+include_directories(${OPENVINO_INC_DIR}
+)# For OPENVINO code to include internal headers.
+
+if(WIN32)
+  set(OPENVINO_LIB
+      "${OPENVINO_INSTALL_DIR}/lib/openvino.lib"
+      CACHE FILEPATH "OPENVINO static library." FORCE)
+elseif(APPLE)
+  set(OPENVINO_LIB
+      "${OPENVINO_INSTALL_DIR}/lib/libopenvino.dylib"
+      CACHE FILEPATH "OPENVINO static library." FORCE)
+else()
+  set(OPENVINO_LIB
+      "${OPENVINO_INSTALL_DIR}/lib/libopenvino.so"
+      CACHE FILEPATH "OPENVINO static library." FORCE)
+endif()
+
+ExternalProject_Add(
+  ${OPENVINO_PROJECT}
+  ${EXTERNAL_PROJECT_LOG_ARGS}
+  URL ${OPENVINO_URL}
+  PREFIX ${OPENVINO_PREFIX_DIR}
+  DOWNLOAD_NO_PROGRESS 1
+  CONFIGURE_COMMAND ""
+  BUILD_COMMAND ""
+  UPDATE_COMMAND ""
+  INSTALL_COMMAND
+    ${CMAKE_COMMAND} -E remove_directory ${OPENVINO_INSTALL_DIR} &&
+    ${CMAKE_COMMAND} -E make_directory ${OPENVINO_INSTALL_DIR} &&
+    ${CMAKE_COMMAND} -E rename ${OPENVINO_SOURCE_DIR}/lib/intel64 ${OPENVINO_INSTALL_DIR}/lib &&
+    ${CMAKE_COMMAND} -E copy_directory ${OPENVINO_SOURCE_DIR}/include
+    ${OPENVINO_INC_DIR}
+  BUILD_BYPRODUCTS ${OPENVINO_LIB})
+
+add_library(external_openvino STATIC IMPORTED GLOBAL)
+set_property(TARGET external_openvino PROPERTY IMPORTED_LOCATION ${OPENVINO_LIB})
+add_dependencies(external_openvino ${OPENVINO_PROJECT})
diff --git a/external/summary.cmake b/external/summary.cmake
index 754af9c3ec..86785d6c97 100644
--- a/external/summary.cmake
+++ b/external/summary.cmake
@@ -33,12 +33,16 @@ function(fastdeploy_summary)
   message(STATUS "  ENABLE_ORT_BACKEND        : ${ENABLE_ORT_BACKEND}")
   message(STATUS "  ENABLE_PADDLE_BACKEND     : ${ENABLE_PADDLE_BACKEND}")
   message(STATUS "  ENABLE_TRT_BACKEND        : ${ENABLE_TRT_BACKEND}")
+  message(STATUS "  ENABLE_OPENVINO_BACKEND   : ${ENABLE_OPENVINO_BACKEND}")
   if(ENABLE_ORT_BACKEND)
     message(STATUS "  ONNXRuntime version       : ${ONNXRUNTIME_VERSION}")
   endif()
   if(ENABLE_PADDLE_BACKEND)
     message(STATUS "  Paddle Inference version  : ${PADDLEINFERENCE_VERSION}")
   endif()
+  if(ENABLE_OPENVINO_BACKEND)
+    message(STATUS "  OpenVINO version          : ${OPENVINO_VERSION}")
+  endif()
   if(WITH_GPU)
     message(STATUS "  WITH_GPU                  : ${WITH_GPU}")
     message(STATUS "  CUDA_DIRECTORY            : ${CUDA_DIRECTORY}")
diff --git a/fastdeploy/c_lib_wrap.py.in b/fastdeploy/c_lib_wrap.py.in
index 6af53fc7cb..d91e9c8f8e 100644
--- a/fastdeploy/c_lib_wrap.py.in
+++ b/fastdeploy/c_lib_wrap.py.in
@@ -32,6 +32,9 @@ def is_built_with_trt() -> bool:
 def is_built_with_paddle() -> bool:
     return True if "@ENABLE_PADDLE_BACKEND@" == "ON" else False
 
+def is_built_with_openvino() ->bool:
+    return True if "@ENABLE_OPENVINO_BACKEND@" == "ON" else False
+
 
 def get_default_cuda_directory() -> str:
     if not is_built_with_gpu():
@@ -56,9 +59,10 @@ def add_cuda_shared_lib_dir_windows():
         # paths. User should set it manually if the
         # cuda toolkit is not locate in the default
         # path we assume.
+        base_url = "https://github.com/PaddlePaddle/FastDeploy/blob/"
         default_cuda_dir = get_default_cuda_directory()
+        default_cuda_version = os.path.basename(default_cuda_dir)  # v11.2
         cuda_shared_lib_dir = os.path.join(default_cuda_dir, "bin")
-        # TODO: add FAQ docs reference.
         if not os.path.exists(cuda_shared_lib_dir):
             # try to get cuda directory from user's local env
             custom_cuda_dir = "NOTFOUNDED"
@@ -72,8 +76,20 @@ def add_cuda_shared_lib_dir_windows():
                 logging.warnings.warn(f"\n--- FastDeploy was built with gpu, \
                     \n--- but the default cuda directory does not exists. \
                     \n--- Please setup one of {custom_cuda_envs} manually, \
-                    \n--- this path should look like: {default_cuda_dir}")
+                    \n--- this path should look like: {default_cuda_dir}. \
+                    \n--- Check FAQ: {base_url + 'develop/docs/FAQ.md'}")
+                return
+            # check cuda version
+            custom_cuda_version = os.path.basename(custom_cuda_dir) # v11.2
+            if default_cuda_version != custom_cuda_version:
+                logging.warnings.warn(
+                    f"\n--- FastDeploy was built with CUDA version {default_cuda_version}, \
+                    \n--- but found custom CUDA version {custom_cuda_version} at {custom_cuda_dir} \
+                    \n--- Please setup one of {custom_cuda_envs} manually, \
+                    \n--- this path should look like: {default_cuda_dir}. \
+                    \n--- Check FAQ: {base_url + 'develop/docs/FAQ.md'}")
                 return
+            # path to cuda dlls
             cuda_shared_lib_dir = os.path.join(custom_cuda_dir, "bin")
         add_dll_search_dir(cuda_shared_lib_dir)
         print(f"[FastDeploy][CUDA]: Found valid cuda directroy and added it: -> {cuda_shared_lib_dir}")
diff --git a/fastdeploy/runtime.py b/fastdeploy/runtime.py
index 9613a344bc..b22c34935b 100644
--- a/fastdeploy/runtime.py
+++ b/fastdeploy/runtime.py
@@ -76,6 +76,9 @@ def use_ort_backend(self):
     def use_trt_backend(self):
         return self._option.use_trt_backend()
 
+    def use_openvino_backend(self):
+        return self._option.use_openvino_backend()
+
     def enable_paddle_mkldnn(self):
         return self._option.enable_paddle_mkldnn()
 
diff --git a/setup.py b/setup.py
index 4d5cd70911..abcfcb4a19 100644
--- a/setup.py
+++ b/setup.py
@@ -47,9 +47,9 @@
 setup_configs["ENABLE_PADDLE_FRONTEND"] = os.getenv("ENABLE_PADDLE_FRONTEND",
                                                     "ON")
 setup_configs["ENABLE_ORT_BACKEND"] = os.getenv("ENABLE_ORT_BACKEND", "ON")
+setup_configs["ENABLE_OPENVINO_BACKEND"] = os.getenv("ENABLE_OPENVINO_BACKEND", "OFF")
 setup_configs["ENABLE_PADDLE_BACKEND"] = os.getenv("ENABLE_PADDLE_BACKEND",
                                                    "OFF")
-setup_configs["BUILD_DEMO"] = os.getenv("BUILD_DEMO", "ON")
 setup_configs["ENABLE_VISION"] = os.getenv("ENABLE_VISION", "ON")
 setup_configs["ENABLE_TRT_BACKEND"] = os.getenv("ENABLE_TRT_BACKEND", "OFF")
 setup_configs["WITH_GPU"] = os.getenv("WITH_GPU", "OFF")