From 1035d21f5002361c82190dc36aca210d41e1d69e Mon Sep 17 00:00:00 2001
From: huzhiqiang <912790387@qq.com>
Date: Thu, 17 Feb 2022 01:03:24 -0600
Subject: [PATCH 1/3] refine data loader api in infrt (#39580)

* update generate_pd_op_dialect_from_paddle_op_maker.py

* update mlir tensor load interface

* refine

* fix bug

* fix

* refine

* fix

* 3

* fix

* codestyle

Co-authored-by: weishengying <1343838695@qq.com>
---
 paddle/infrt/dialect/dense_tensor.td          |   3 +-
 paddle/infrt/host_context/paddle_mlir.cc      |  13 +-
 paddle/infrt/kernel/tensor_kernels.cc         |   9 +-
 .../tests/dialect/tensor/tensor_map.mlir.in   |   3 +-
 paddle/scripts/infrt_build.sh                 |   2 +-
 ...rate_pd_op_dialect_from_paddle_op_maker.py | 147 ++++++++++++------
 6 files changed, 115 insertions(+), 62 deletions(-)

diff --git a/paddle/infrt/dialect/dense_tensor.td b/paddle/infrt/dialect/dense_tensor.td
index 7156e22951225..75c8a0d88e4c1 100644
--- a/paddle/infrt/dialect/dense_tensor.td
+++ b/paddle/infrt/dialect/dense_tensor.td
@@ -112,6 +112,7 @@ def LoadParamsOp : DT_Op<"load_params", [NoSideEffect]> {
   let verifier = ?;
 }
 
+
 def TensorMapGetTensorOp : DT_Op<"tensor_map_get_tensor", [NoSideEffect]> {
   let summary = "dt.tensor_map_get_tensor operation";
 
@@ -122,7 +123,7 @@ def TensorMapGetTensorOp : DT_Op<"tensor_map_get_tensor", [NoSideEffect]> {
   // input path of model params.
   let arguments = (ins
           TensorMapType:$map,
-          StringType:$name
+          StrAttr:$name
           );
   let results = (outs TensorType:$output);
   let assemblyFormat = "`(` operands `)` attr-dict `->` type($output)";
diff --git a/paddle/infrt/host_context/paddle_mlir.cc b/paddle/infrt/host_context/paddle_mlir.cc
index 475e1e8816820..1c36b04f366bf 100644
--- a/paddle/infrt/host_context/paddle_mlir.cc
+++ b/paddle/infrt/host_context/paddle_mlir.cc
@@ -171,7 +171,7 @@ void MLIRModelGenImpl::UpdateModelParams(
                       builder_,
                       &precision_);
       mlir::Type type_ = mlir::RankedTensorType::get(dims, precision_);
-      auto op = builder_.create<infrt::dt::GetParamOp>(
+      auto op = builder_.create<infrt::dt::TensorMapGetTensorOp>(
           mlir::UnknownLoc::get(context_), type_, map, name);
       params_map_.insert(std::pair<std::string, mlir::Value>(
           var_desc.name(), op.getOperation()->getResult(0)));
@@ -224,15 +224,14 @@ llvm::SmallVector<mlir::Value, 4> MLIRModelGenImpl::GetOpInputValue(
     const infrt::paddle::framework_proto::OpDesc &op_) {
   llvm::SmallVector<mlir::Value, 4> operands;
 
-  std::vector<std::string> inputs_info = {};
+  std::unordered_map<std::string, uint8_t> inputs_info = {};
   if (pd_dialect_inputs_info_map_.count(op_.type()))
     inputs_info = pd_dialect_inputs_info_map_.at(op_.type());
 
   for (int var_idx = 0; var_idx < op_.inputs_size(); ++var_idx) {
     auto &var = op_.inputs(var_idx);
     if (!var.arguments().empty()) {
-      if (!std::count(inputs_info.begin(), inputs_info.end(), var.parameter()))
-        continue;
+      if (!inputs_info.count(var.parameter())) continue;
       operands.push_back((params_map_[var.arguments()[0]]));
     }
   }
@@ -243,7 +242,7 @@ llvm::SmallVector<mlir::Type, 4> MLIRModelGenImpl::GetOpOutputType(
     const infrt::paddle::framework_proto::OpDesc &op_) {
   llvm::SmallVector<mlir::Type, 4> resultTypes;
 
-  std::vector<std::string> pd_dialect_outputs_info = {};
+  std::unordered_map<std::string, uint8_t> pd_dialect_outputs_info = {};
   if (pd_dialect_outputs_info_map_.count(op_.type()))
     pd_dialect_outputs_info = pd_dialect_outputs_info_map_.at(op_.type());
 
@@ -251,9 +250,7 @@ llvm::SmallVector<mlir::Type, 4> MLIRModelGenImpl::GetOpOutputType(
   for (int var_idx = 0; var_idx < op_.outputs_size(); ++var_idx) {
     auto &var_name = op_.outputs(var_idx).arguments()[0];
 
-    if (!std::count(pd_dialect_outputs_info.begin(),
-                    pd_dialect_outputs_info.end(),
-                    op_.outputs(var_idx).parameter()))
+    if (!pd_dialect_outputs_info.count(op_.outputs(var_idx).parameter()))
       continue;
 
     // update persistable tensors
diff --git a/paddle/infrt/kernel/tensor_kernels.cc b/paddle/infrt/kernel/tensor_kernels.cc
index c6e28c4c79d29..1e55bcd07ae80 100644
--- a/paddle/infrt/kernel/tensor_kernels.cc
+++ b/paddle/infrt/kernel/tensor_kernels.cc
@@ -54,10 +54,11 @@ TensorMap LoadParams(const std::string &path) {
 }
 
 void TensorMapGetTensor(TensorMap map,
-                        const std::string &name,
-                        DenseHostTensor *out) {
-  auto it = map.find(name);
-  CHECK(it != map.end()) << "No tensor called " << name << " in the TensorMap";
+                        DenseHostTensor *out,
+                        Attribute<std::string> name) {
+  auto it = map.find(name.get());
+  CHECK(it != map.end()) << "No tensor called " << name.get()
+                         << " in the TensorMap";
   *out = *it->second;
 }
 
diff --git a/paddle/infrt/tests/dialect/tensor/tensor_map.mlir.in b/paddle/infrt/tests/dialect/tensor/tensor_map.mlir.in
index 3baa6bcd42050..4edb918b5a28f 100644
--- a/paddle/infrt/tests/dialect/tensor/tensor_map.mlir.in
+++ b/paddle/infrt/tests/dialect/tensor/tensor_map.mlir.in
@@ -6,8 +6,7 @@ func @load_tensor_map() {
   %size = dt.tensor_map_get_size(%map) -> i32
   infrt.print.i32 %size
 
-  %tensor_name = infrt.get_string("fc_bias")
-  %a = dt.tensor_map_get_tensor(%map, %tensor_name) -> !infrt.tensor<X86, NCHW, F32>
+  %a = dt.tensor_map_get_tensor(%map) {name="fc_bias"} -> !infrt.tensor<X86, NCHW, F32>
 
   // CHECK: tensor: shape=shape[2], values=[0, 0]
   dt.print_tensor (%a : !infrt.tensor<X86, NCHW, F32>)
diff --git a/paddle/scripts/infrt_build.sh b/paddle/scripts/infrt_build.sh
index f76fa497d6a03..ff86e7f52d535 100755
--- a/paddle/scripts/infrt_build.sh
+++ b/paddle/scripts/infrt_build.sh
@@ -90,7 +90,7 @@ function infrt_gen_and_build() {
         exit 7;
     fi
 
-    make -j ${parallel_number} infrt infrtopt infrtexec test_infrt_exec trt-exec infrt_lib_dist;build_error=$?
+    make -j ${parallel_number} infrt infrtopt infrtexec test_infrt_exec trt-exec infrt_lib_dist paddle-mlir-convert;build_error=$?
     if [ "$build_error" != 0 ];then
         exit 7;
     fi
diff --git a/tools/infrt/generate_pd_op_dialect_from_paddle_op_maker.py b/tools/infrt/generate_pd_op_dialect_from_paddle_op_maker.py
index f77ef86cc6c43..027dfe4328a55 100644
--- a/tools/infrt/generate_pd_op_dialect_from_paddle_op_maker.py
+++ b/tools/infrt/generate_pd_op_dialect_from_paddle_op_maker.py
@@ -110,10 +110,92 @@ def get_all_ops_desc():
     return all_op_protos_dict
 
 
+def generate_all_ops_inputs_outputs_map(op_descs):
+    # 1. Collect input and output name information of each Op
+    original_ops_ = get_original_ops()
+    ops_inputs_map = {}
+    ops_outputs_map = {}
+    for op_type, op_proto in op_descs.items():
+        if op_type not in original_ops_:
+            continue
+        inputs = list()
+        outpus = list()
+        for input_ in op_proto[INPUTS]:
+            if op_proto[INPUTS][input_][EXTRA] != True and op_proto[INPUTS][
+                    input_][INTERMEDIATE] != True:
+                inputs.append(input_)
+        for output_ in op_proto[OUTPUTS]:
+            if op_proto[OUTPUTS][output_][EXTRA] != True and op_proto[OUTPUTS][
+                    output_][INTERMEDIATE] != True:
+                outpus.append(output_)
+        ops_inputs_map[op_type] = inputs
+        ops_outputs_map[op_type] = outpus
+
+    # 2. Generate Cpp style map str
+    cpp_style_ops_inputs_map_str = ""
+    start_ = "#include <unordered_map>\n#include <vector>\n#include <string>\n" + \
+            "const std::unordered_map<std::string, std::unordered_map<std::string, uint8_t>> pd_dialect_inputs_info_map_  = {\n"
+    ops_inputs_str = ""
+    for ele in ops_inputs_map.items():
+        op_name = ele[0]
+        op_inputs = ele[1]
+        op_inputs_str = "{"
+        input_idx = 0
+        for op_input in op_inputs:
+            op_input_str = '{left_brace}"{op_input}", {input_idx}{right_brace}, '.format(
+                left_brace="{",
+                op_input=op_input,
+                input_idx=input_idx,
+                right_brace="}")
+            input_idx = input_idx + 1
+            op_inputs_str = op_inputs_str + op_input_str
+        op_inputs_str = op_inputs_str[:-2] + "}"
+        pair = '{left_brace}"{op_name}", {op_inputs}{right_brace},\n'.format(
+            left_brace="{",
+            op_name=op_name,
+            op_inputs=op_inputs_str,
+            right_brace="}")
+        ops_inputs_str = ops_inputs_str + "    " + pair
+    ops_inputs_str = ops_inputs_str[:-2]
+    cpp_style_ops_inputs_map_str = start_ + ops_inputs_str + "\n};"
+
+    cpp_style_ops_outputs_map_str = ""
+    start_ = "const std::unordered_map<std::string, std::unordered_map<std::string, uint8_t>> pd_dialect_outputs_info_map_  = {\n"
+    ops_outputs_str = ""
+    for ele in ops_outputs_map.items():
+        op_name = ele[0]
+        op_outputs = ele[1]
+        op_outputs_str = "{"
+        output_idx = 0
+        for op_output in op_outputs:
+            op_output_str = '{left_brace}"{op_output}", {output_idx}{right_brace}, '.format(
+                left_brace="{",
+                op_output=op_output,
+                output_idx=output_idx,
+                right_brace="}")
+            output_idx = output_idx + 1
+            op_outputs_str = op_outputs_str + op_output_str
+        op_outputs_str = op_outputs_str[:-2] + "}"
+        pair = '{left_brace}"{op_name}", {op_outputs}{right_brace},\n'.format(
+            left_brace="{",
+            op_name=op_name,
+            op_outputs=op_outputs_str,
+            right_brace="}")
+        ops_outputs_str = ops_outputs_str + "    " + pair
+    ops_outputs_str = ops_outputs_str[:-2]
+    cpp_style_ops_outputs_map_str = start_ + ops_outputs_str + "\n};"
+
+    # 3. Write to header file
+    dst_head_file = "../../paddle/infrt/dialect/pd_ops_info.h"
+    with open(dst_head_file, 'w') as ops_inputs_outputs_head_file:
+        ops_inputs_outputs_head_file.write(cpp_style_ops_inputs_map_str)
+        ops_inputs_outputs_head_file.write("\n\n")
+        ops_inputs_outputs_head_file.write(cpp_style_ops_outputs_map_str)
+
+
 # funtion to generate paddle op dialect file
 def convert_op_proto_into_mlir(op_descs):
     dst_dialect_file = "../../paddle/infrt/dialect/pd_ops.td"
-    dialect_info_file = "../../paddle/infrt/dialect/pd_ops_info.h"
     custom_dialect_file = "custom_pdop.td"
 
     # 1. Head files
@@ -153,41 +235,38 @@ def convert_op_proto_into_mlir(op_descs):
 
     original_ops_ = get_original_ops()
     automatically_generated_op_dialect = []
-    ops_inputs_map_ = {}
-    ops_outputs_map_ = {}
-
     for op_type, op_proto in op_descs.items():
         if (op_type in skipped_op_list) or (op_type not in original_ops_):
             continue
         automatically_generated_op_dialect.append(op_type)
         # 2.1 OpDef
-        HEAD = "def PD_" + op_type.capitalize(
-        ) + "Op : PD_Op<\"" + op_type + "\", [NoSideEffect]> {\n"
-        SUMMARY = "  let summary = \"" + op_type + " op\";\n"
+        HEAD = 'def PD_{op_type_capitalize}Op : PD_Op<"{op_type}", [NoSideEffect]> {left_brace}\n'.format(
+            op_type_capitalize=op_type.capitalize(),
+            op_type=op_type,
+            left_brace="{")
+        SUMMARY = '  let summary = "{} op";\n'.format(op_type)
         CANONICALIZATION = "let hasCanonicalizer = 1;" if op_type in ops_having_canonicalization else ""
 
         # 2.2 Description
-        DESCRIPTION = "  let description = [{\n"
-        contents = (op_proto[COMMENT]).split("\n")
-        for line_ in contents:
-            DESCRIPTION = DESCRIPTION + "    " + line_ + "\n"
-        DESCRIPTION += "  }];\n"
+        contents = ""
+        origin_contents = (op_proto[COMMENT]).split("\n")
+        for line_ in origin_contents:
+            contents = contents + "    {}\n".format(line_)
+        DESCRIPTION = "  let description = [{left_brace}\n{description}  {right_brace}];\n".format(
+            left_brace="{", description=contents, right_brace="}")
 
         # 2.3 arguments info
         ARGUMENTS = ""
         if (len(op_proto[INPUTS]) > 0 or len(op_proto[ATTRS]) > 0):
             ARGUMENTS = "  let arguments = (ins "
             # 2.3.1 inputs
-            ins_cache_list_ = []
             for input_ in op_proto[INPUTS]:
                 if op_proto[INPUTS][input_][EXTRA] != True and op_proto[INPUTS][
                         input_][INTERMEDIATE] != True:
-                    ins_cache_list_.append(input_)
                     if op_proto[INPUTS][input_][DUPLICABLE] != "true":
                         ARGUMENTS = ARGUMENTS + " PD_Tensor:$" + input_ + ","
                     else:
                         ARGUMENTS = ARGUMENTS + " PD_Tensor_Array:$" + input_ + ","
-            ops_inputs_map_[op_type] = ins_cache_list_
             # unsupported:   BLOCK = 8;  BLOCKS = 10;
             attr_mlir_converter = {
                 0: 'SI32Attr',
@@ -252,19 +331,17 @@ def convert_op_proto_into_mlir(op_descs):
         # 2.4 results info
         RESULTS = ""
         if (len(op_proto[OUTPUTS]) > 0):
-            RESULTS = "\n  let results = (outs "
-            outs_cache_list_ = []
+            outputs = ""
             for output_ in op_proto[OUTPUTS]:
                 if op_proto[OUTPUTS][output_][EXTRA] != True and op_proto[
                         OUTPUTS][output_][INTERMEDIATE] != True:
-                    outs_cache_list_.append(output_)
                     if op_proto[OUTPUTS][output_][DUPLICABLE] != "true":
-                        RESULTS = RESULTS + "PD_Tensor:$" + output_ + ","
+                        outputs = outputs + "PD_Tensor:${},".format(output_)
                     else:
-                        RESULTS = RESULTS + "PD_Tensor_Array:$" + output_ + ","
-                        print(HEAD + " PD_Tensor_Array:$" + output_ + ",")
-            ops_outputs_map_[op_type] = outs_cache_list_
-            RESULTS = RESULTS[:-1] + ");\n"
+                        outputs = outputs + "PD_Tensor_Array:${},".format(
+                            output_)
+            RESULTS = "\n  let results = (outs {});\n".format(outputs[:-1])
+
         with open(dst_dialect_file, 'a') as ops_mlir_file:
             ops_mlir_file.write(HEAD)
             ops_mlir_file.write(SUMMARY)
@@ -278,29 +355,6 @@ def convert_op_proto_into_mlir(op_descs):
     print("Automatically generated op dialects num: " + str(
         len(automatically_generated_op_dialect)))
 
-    with open(dialect_info_file, 'w') as pd_ops_info_file:
-        pd_ops_info_file.write(
-            "#include<map>\n#include<string>\n#include<vector>\n")
-        pd_ops_info_file.write(
-            "const std::map<std::string, std::vector<std::string>> pd_dialect_inputs_info_map_ = {\n"
-        )
-        for data_ in ops_inputs_map_:
-            pd_ops_info_file.write("  {\"" + data_ + "\", {")
-            for var_ in ops_inputs_map_[data_]:
-                pd_ops_info_file.write("\"" + var_ + "\",")
-            pd_ops_info_file.write("}},\n")
-        pd_ops_info_file.write("};\n")
-
-        pd_ops_info_file.write(
-            "const std::map<std::string, std::vector<std::string>> pd_dialect_outputs_info_map_ = {\n"
-        )
-        for data_ in ops_outputs_map_:
-            pd_ops_info_file.write("  {\"" + data_ + "\", {")
-            for var_ in ops_outputs_map_[data_]:
-                pd_ops_info_file.write("\"" + var_ + "\",")
-            pd_ops_info_file.write("}},\n")
-        pd_ops_info_file.write("};\n")
-
     # 3. custom op dialect and end of file
     with open(dst_dialect_file, 'a') as ops_mlir_file:
         with open(custom_dialect_file, 'r') as custom_ops_file:
@@ -313,4 +367,5 @@ def convert_op_proto_into_mlir(op_descs):
 
 if __name__ == "__main__":
     all_op_protos_dict = get_all_ops_desc()
+    generate_all_ops_inputs_outputs_map(all_op_protos_dict)
     convert_op_proto_into_mlir(all_op_protos_dict)

From 8f2d14adaff7155de76e572eb6019890825b507b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=9F=B3=E6=99=93=E4=BC=9F?=
 <39303645+Shixiaowei02@users.noreply.github.com>
Date: Thu, 17 Feb 2022 15:09:52 +0800
Subject: [PATCH 2/3] change classes to pten, test=develop (#39643)

---
 paddle/infrt/CMakeLists.txt                   |  6 +-
 paddle/infrt/backends/host/pten_allocator.h   | 33 ++++++++
 paddle/infrt/backends/host/pten_context.h     | 26 ++++++
 paddle/infrt/dialect/pten/CMakeLists.txt      |  1 +
 .../infrt/dialect/pten/infrt_pten_kernel.td   | 26 ++++++
 paddle/infrt/dialect/pten/infrt_pten_tensor.h |  1 +
 .../infrt/dialect/pten/infrt_pten_tensor.td   | 82 ++++---------------
 paddle/infrt/dialect/pten/pten_base.cc        | 28 +++++--
 paddle/infrt/host_context/kernel_registry.h   |  3 +-
 .../host_context/kernel_registry_test.cc      |  2 +-
 paddle/infrt/host_context/mlir_exec.cc        |  6 ++
 paddle/infrt/host_context/value.cc            | 34 ++++----
 paddle/infrt/host_context/value.h             | 37 +++++++--
 paddle/infrt/kernel/CMakeLists.txt            |  2 +
 paddle/infrt/kernel/pten/CMakeLists.txt       | 19 +++++
 paddle/infrt/kernel/pten/allocator_kernels.cc | 25 ++++++
 .../pten/allocator_kernels.h}                 | 19 ++---
 paddle/infrt/kernel/pten/context_kernels.cc   | 25 ++++++
 .../pten/context_kernels.h}                   | 24 ++----
 .../infrt/kernel/pten/dense_tensor_kernels.cc | 38 +++++++++
 .../dense_tensor_kernels.h}                   | 29 +++----
 .../pten}/infershaped/elementwise_add.h       | 36 ++++----
 .../infershaped/infershape_launchers_test.cc  | 36 ++++----
 .../infershaped_kernel_launcher.cc            | 19 +++--
 .../infershaped/infershaped_kernel_launcher.h |  8 +-
 .../infershaped_kernel_launchers.cc           | 36 ++++++++
 .../infershaped_kernel_launchers.h            | 10 +--
 .../pten}/infershaped/infershaped_utils.h     |  6 +-
 paddle/infrt/kernel/pten/registry.cc          | 61 ++++++++++++++
 .../{pten_kernels.h => pten/registry.h}       |  0
 paddle/infrt/naive/CMakeLists.txt             |  8 --
 .../naive/infershaped/infershaped_registry.cc | 55 -------------
 .../naive/infershaped/infershaped_registry.h  | 56 -------------
 paddle/infrt/naive/meta_tensor.h              | 47 -----------
 .../tests/dialect/pten/dense_tensor.mlir      |  9 +-
 paddle/pten/backends/cpu/cpu_context.cc       |  4 +
 paddle/pten/backends/cpu/cpu_context.h        |  2 +
 paddle/pten/core/device_context.cc            |  2 +
 paddle/pten/core/device_context.h             |  5 ++
 39 files changed, 499 insertions(+), 367 deletions(-)
 create mode 100644 paddle/infrt/backends/host/pten_allocator.h
 create mode 100644 paddle/infrt/backends/host/pten_context.h
 create mode 100644 paddle/infrt/dialect/pten/infrt_pten_kernel.td
 create mode 100644 paddle/infrt/kernel/pten/CMakeLists.txt
 create mode 100644 paddle/infrt/kernel/pten/allocator_kernels.cc
 rename paddle/infrt/{naive/meta_tensor.cc => kernel/pten/allocator_kernels.h} (64%)
 create mode 100644 paddle/infrt/kernel/pten/context_kernels.cc
 rename paddle/infrt/{naive/infershaped/infershaped_kernel_launchers.cc => kernel/pten/context_kernels.h} (50%)
 create mode 100644 paddle/infrt/kernel/pten/dense_tensor_kernels.cc
 rename paddle/infrt/kernel/{pten_kernels.cc => pten/dense_tensor_kernels.h} (55%)
 rename paddle/infrt/{naive => kernel/pten}/infershaped/elementwise_add.h (67%)
 rename paddle/infrt/{naive => kernel/pten}/infershaped/infershape_launchers_test.cc (56%)
 rename paddle/infrt/{naive => kernel/pten}/infershaped/infershaped_kernel_launcher.cc (74%)
 rename paddle/infrt/{naive => kernel/pten}/infershaped/infershaped_kernel_launcher.h (90%)
 create mode 100644 paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.cc
 rename paddle/infrt/{naive => kernel/pten}/infershaped/infershaped_kernel_launchers.h (79%)
 rename paddle/infrt/{naive => kernel/pten}/infershaped/infershaped_utils.h (95%)
 create mode 100644 paddle/infrt/kernel/pten/registry.cc
 rename paddle/infrt/kernel/{pten_kernels.h => pten/registry.h} (100%)
 delete mode 100644 paddle/infrt/naive/CMakeLists.txt
 delete mode 100644 paddle/infrt/naive/infershaped/infershaped_registry.cc
 delete mode 100644 paddle/infrt/naive/infershaped/infershaped_registry.h
 delete mode 100644 paddle/infrt/naive/meta_tensor.h

diff --git a/paddle/infrt/CMakeLists.txt b/paddle/infrt/CMakeLists.txt
index c8253effe8488..2486c54d5addc 100644
--- a/paddle/infrt/CMakeLists.txt
+++ b/paddle/infrt/CMakeLists.txt
@@ -82,7 +82,6 @@ add_subdirectory(tensor)
 add_subdirectory(support)
 add_subdirectory(external_kernels)
 add_subdirectory(paddle)
-add_subdirectory(naive)
 add_subdirectory(tests)
 
 
@@ -99,14 +98,15 @@ set(infrt_mlir_incs
         trt_ops_inc
         )
 if (INFRT_WITH_PTEN)
+    set(pten_libs pten)
     set(infrt_mlir_incs ${infrt_mlir_incs}
         MLIRinfrt_pten_tensorIncGen
         MLIRinfrt_pten_baseIncGen
         )
 endif()
 
-cc_library(infrt SHARED SRCS ${infrt_src} DEPS glog boost ${mlir_libs} paddle_framework_proto infrt_naive)
-cc_library(infrt_static SRCS ${infrt_src} DEPS glog boost ${mlir_libs} paddle_framework_proto)
+cc_library(infrt SHARED SRCS ${infrt_src} DEPS glog boost ${mlir_libs} ${pten_libs} paddle_framework_proto infrt_naive)
+cc_library(infrt_static SRCS ${infrt_src} DEPS glog boost ${mlir_libs} ${pten_libs} paddle_framework_proto)
 add_dependencies(infrt ${infrt_mlir_incs} mlir-headers)
 
 add_custom_target(test_infrt_exec DEPENDS ${INFRT_TEST_TARGETS})
diff --git a/paddle/infrt/backends/host/pten_allocator.h b/paddle/infrt/backends/host/pten_allocator.h
new file mode 100644
index 0000000000000..172a808afbb5b
--- /dev/null
+++ b/paddle/infrt/backends/host/pten_allocator.h
@@ -0,0 +1,33 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/pten/core/allocator.h"
+
+namespace infrt {
+namespace backends {
+
+class CpuPtenAllocator : public pten::Allocator {
+ public:
+  static void deleter(pten::Allocation* ptr) { ::operator delete(ptr); }
+
+  AllocationPtr Allocate(size_t bytes_size) {
+    return AllocationPtr(
+        new pten::Allocation(::operator new(bytes_size),
+                             bytes_size,
+                             pten::Place(pten::AllocationType::CPU)),
+        deleter);
+  }
+};
+
+}  // namespace backends
+}  // namespace infrt
diff --git a/paddle/infrt/backends/host/pten_context.h b/paddle/infrt/backends/host/pten_context.h
new file mode 100644
index 0000000000000..1f5efeb272cef
--- /dev/null
+++ b/paddle/infrt/backends/host/pten_context.h
@@ -0,0 +1,26 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/pten/backends/cpu/cpu_context.h"
+
+namespace infrt {
+namespace backends {
+
+class CpuPtenContext : public pten::CPUContext {
+ public:
+  using Base = pten::CPUContext;
+  using pten::CPUContext::SetEigenDevice;
+};
+
+}  // namespace backends
+}  // namespace infrt
diff --git a/paddle/infrt/dialect/pten/CMakeLists.txt b/paddle/infrt/dialect/pten/CMakeLists.txt
index 0fb268952d54f..b4ed5cdc1d82f 100644
--- a/paddle/infrt/dialect/pten/CMakeLists.txt
+++ b/paddle/infrt/dialect/pten/CMakeLists.txt
@@ -5,6 +5,7 @@ endif()
 #mlir_tablegen_on(infrt_pten_base DIALECT pten)
 add_mlir_dialect(infrt_pten_base pten)
 add_mlir_dialect(infrt_pten_tensor pten_dt)
+add_mlir_dialect(infrt_pten_kernel pten_kernel)
 #mlir_tablegen_on(infrt_pten_tensor)
 
 gather_srcs(infrt_src SRCS
diff --git a/paddle/infrt/dialect/pten/infrt_pten_kernel.td b/paddle/infrt/dialect/pten/infrt_pten_kernel.td
new file mode 100644
index 0000000000000..a3a1609d9918a
--- /dev/null
+++ b/paddle/infrt/dialect/pten/infrt_pten_kernel.td
@@ -0,0 +1,26 @@
+#ifndef PTEN_KERNEL
+#define PTEN_KERNEL
+
+include "paddle/infrt/dialect/pten/infrt_pten_tensor.td"
+
+def PTEN_KernelDialect : Dialect {
+  let name = "pten_kernel";
+
+  let description = [{
+    The PTEN Kernel dialect.
+  }];
+
+  let cppNamespace = "::infrt::pten";
+}
+
+// PTEN Kernel related ops.
+class PDT_Kernel<string mnemonic, list<OpTrait> traits = []> : Op<PTEN_KernelDialect, mnemonic, !listconcat(traits, [IsolatedFromAbove])> {
+}
+
+def FakeKernelOp : PDT_Kernel<"pten.matmul.host.fp32"> {
+  let arguments = (ins CPU_Context:$dev_ctx, TensorType:$x, TensorType:$y, BoolAttr:$transpose_x, BoolAttr:$transpose_y);
+  let results = (outs TensorType:$output);
+}
+
+#endif
+
diff --git a/paddle/infrt/dialect/pten/infrt_pten_tensor.h b/paddle/infrt/dialect/pten/infrt_pten_tensor.h
index 24ac2d851fe86..5fe259300d2ae 100644
--- a/paddle/infrt/dialect/pten/infrt_pten_tensor.h
+++ b/paddle/infrt/dialect/pten/infrt_pten_tensor.h
@@ -33,6 +33,7 @@
 #include "paddle/infrt/dialect/pten/infrt_pten_tensorTypes.h.inc"
 
 #include "paddle/infrt/dialect/dense_tensor.h"
+#include "paddle/infrt/dialect/pten/pten_base.h"
 // NOLINT
 #define GET_OP_CLASSES
 #include "paddle/infrt/dialect/pten/infrt_pten_tensor.h.inc"
diff --git a/paddle/infrt/dialect/pten/infrt_pten_tensor.td b/paddle/infrt/dialect/pten/infrt_pten_tensor.td
index 040c8ec3d3695..528f0f919680d 100644
--- a/paddle/infrt/dialect/pten/infrt_pten_tensor.td
+++ b/paddle/infrt/dialect/pten/infrt_pten_tensor.td
@@ -21,84 +21,36 @@ def PTEN_DenseTensorDialect : Dialect {
 class PDT_Op<string mnemonic, list<OpTrait> traits = []> : Op<PTEN_DenseTensorDialect, mnemonic, !listconcat(traits, [IsolatedFromAbove])> {
 }
 
-class CreateUninitTensorOp<string dtype>
-      : PDT_Op<"create_uninit_tensor." # dtype, [NoSideEffect]> {
-  let summary = "pdt.create_uninit_tensor operation";
-
-  let description = [{
-      An operation that creates an uninitialized tensor.
-  }];
-
-  let arguments = (ins I64ArrayAttr:$shape);
-  let results = (outs TensorType:$output);
-}
-
-class CreateInitedTensorOp<string dtype, Attr array_attr>
-      : PDT_Op<"create_inited_tensor." #dtype, [NoSideEffect]> {
-  let summary = "pdt.create_inited_tensor operation";
-
-  let description = [{
-      An operation that creates an tensor with shape and values assigned.
-  }];
-
-  let arguments = (ins I64ArrayAttr:$shape, array_attr:$values);
+class CreateDenseTensorOp<string place, string dtype, string layout> 
+      : PDT_Op<"create_dense_tensor." # place # "." # dtype # "." # layout, [NoSideEffect]> {
+  let arguments = (ins CPU_Allocator:$allocator, I64ArrayAttr:$dims, I64ArrayAttr:$lod);
   let results = (outs TensorType:$output);
 }
 
-def PrintTensorOp : PDT_Op<"print_tensor"> {
-  let summary = "pdt.print_tensor operation";
-
-  let description = [{
-    An operation that prints a tensor.
-  }];
-
-  let arguments = (ins TensorType:$input);
-  let results = (outs);
-  let assemblyFormat = "`(` $input `:` type($input) `)` attr-dict";
-}
-
-class FillTensor<string dtype, Attr attr_type> :
-      PDT_Op<"fill_tensor." # dtype> {
-  let summary = "dt.fill_tensor operation";
-
-  let description = [{
-      An operation that fills an input tensor with a values.
-  }];
-
+class FillDenseTensorOp<Attr attr_type, string dtype> :
+      PDT_Op<"fill_dense_tensor." # dtype> {
   let arguments = (ins
       TensorType:$input,
       attr_type:$value
   );
   let results = (outs);
-
-  let assemblyFormat = "`(` $input `:` type($input) `)`  attr-dict";
 }
 
-class FillTensorWithConstantOp<string dtype> :
-      PDT_Op<"fill_tensor_with_constant." # dtype> {
-  let summary = "dt.fill_tensor_with_constant operation";
-
-  let description = [{
-      An operation that fills an input tensor with a single value.
-  }];
-
-  let arguments = (ins
-      TensorType:$input,
-      AnyAttr:$value
-  );
-  let results = (outs);
-
-  let assemblyFormat = "`(` $input `:` type($input) `)`  attr-dict";
+class CreateCPUAllocatorOp
+      : PDT_Op<"create_allocator." # "cpu", [NoSideEffect]> {
+  let arguments = (ins);
+  let results = (outs CPU_Allocator:$output);
 }
 
-foreach dtype = ["ui8", "ui16", "ui32", "ui64", "i32", "f32", "f64", "i64"] in {
-    def PDT_CreateUninitTensorOp_#dtype : CreateUninitTensorOp<dtype>;
-    def PDT_FillTensorWithConstantOp_#dtype : FillTensorWithConstantOp<dtype>;
+class CreateCPUContextOp
+      : PDT_Op<"create_context." # "cpu", [NoSideEffect]> {
+  let arguments = (ins);
+  let results = (outs CPU_Context:$output);
 }
 
-def PDT_FillTensor_f32: FillTensor<"f32", F32ArrayAttr>;
-def PDT_FillTensor_i32: FillTensor<"i32", I32ArrayAttr>;
-def PDT_CreateInitedTensorOp_f32 : CreateInitedTensorOp<"f32", F32ArrayAttr>;
-def PDT_CreateInitedTensorOp_i32 : CreateInitedTensorOp<"i32", I32ArrayAttr>;
+def PDT_CreateDenseTensorOp_cpu_f32_nchw : CreateDenseTensorOp<"cpu", "f32", "nchw">;
+def PDT_FillDenseTensorOp_f32 : FillDenseTensorOp<F32ArrayAttr, "f32">;
+def PDT_CreateAllocatorOp_cpu : CreateCPUAllocatorOp;
+def PDT_CreateContextOp_cpu : CreateCPUContextOp;
 
 #endif
diff --git a/paddle/infrt/dialect/pten/pten_base.cc b/paddle/infrt/dialect/pten/pten_base.cc
index ac23d44248982..ba87787dd7f7c 100644
--- a/paddle/infrt/dialect/pten/pten_base.cc
+++ b/paddle/infrt/dialect/pten/pten_base.cc
@@ -29,7 +29,23 @@ namespace pten {
 
 void PTENDialect::printType(::mlir::Type type,
                             mlir::DialectAsmPrinter& os) const {
-  Dialect::printType(type, os);
+  if (type.isa<CPUAllocatorType>()) {
+    os << "CPU_Allocator";
+    return;
+  }
+  if (type.isa<GPUAllocatorType>()) {
+    os << "GPU_Allocator";
+    return;
+  }
+  if (type.isa<CPUContextType>()) {
+    os << "CPU_Context";
+    return;
+  }
+  if (type.isa<GPUContextType>()) {
+    os << "GPU_Context";
+    return;
+  }
+  llvm_unreachable("unexpected 'allocator/context' type kind");
 }
 
 void PTENDialect::initialize() {
@@ -46,14 +62,16 @@ void PTENDialect::initialize() {
 mlir::Type PTENDialect::parseType(mlir::DialectAsmParser& parser) const {
   llvm::StringRef keyword;
   if (parser.parseKeyword(&keyword)) return mlir::Type();
-  if (keyword == "allocator_CPU") {
+  if (keyword == "CPU_allocator") {
     return CPUAllocatorType::get(parser.getContext());
-  } else if (keyword == "allocator_GPU") {
+  } else if (keyword == "GPU_allocator") {
     return GPUAllocatorType::get(parser.getContext());
-  } else if (keyword == "context_CPU") {
+  } else if (keyword == "CPU_context") {
     return CPUContextType::get(parser.getContext());
-  } else if (keyword == "context_GPU") {
+  } else if (keyword == "GPU_context") {
     return GPUContextType::get(parser.getContext());
+  } else {
+    llvm_unreachable("unexpected 'allocator/context' type kind");
   }
 
   return mlir::Type();
diff --git a/paddle/infrt/host_context/kernel_registry.h b/paddle/infrt/host_context/kernel_registry.h
index d65969999f6ed..a813f690efb0b 100644
--- a/paddle/infrt/host_context/kernel_registry.h
+++ b/paddle/infrt/host_context/kernel_registry.h
@@ -14,6 +14,7 @@
 
 #pragma once
 
+#include <functional>
 #include <memory>
 #include <string>
 #include <vector>
@@ -23,7 +24,7 @@ namespace host_context {
 
 class KernelFrame;
 
-using KernelImplementation = void (*)(KernelFrame *frame);
+using KernelImplementation = std::function<void(KernelFrame *frame)>;
 
 /**
  * Hold the kernels registered in the system.
diff --git a/paddle/infrt/host_context/kernel_registry_test.cc b/paddle/infrt/host_context/kernel_registry_test.cc
index 7fca56343041c..fd2aecb3e6c1e 100644
--- a/paddle/infrt/host_context/kernel_registry_test.cc
+++ b/paddle/infrt/host_context/kernel_registry_test.cc
@@ -28,7 +28,7 @@ TEST(KernelRegistry, basic) {
   std::string key = "infrt.test.add.i32";
   registry.AddKernel(key, INFRT_KERNEL(add_i32));
 
-  auto* kernel_impl = registry.GetKernel(key);
+  const auto& kernel_impl = registry.GetKernel(key);
   ASSERT_TRUE(kernel_impl);
 
   ValueRef a(1);
diff --git a/paddle/infrt/host_context/mlir_exec.cc b/paddle/infrt/host_context/mlir_exec.cc
index b0d70af5ef9f2..62c907bc9159f 100644
--- a/paddle/infrt/host_context/mlir_exec.cc
+++ b/paddle/infrt/host_context/mlir_exec.cc
@@ -28,6 +28,9 @@
 #include "paddle/infrt/kernel/tensor_kernels.h"
 #include "paddle/infrt/kernel/tensor_shape_kernels.h"
 #include "paddle/infrt/kernel/test_kernels.h"
+#ifdef INFRT_WITH_PTEN
+#include "paddle/infrt/kernel/pten/registry.h"
+#endif
 
 static llvm::cl::list<std::string> cl_shared_libs(  // NOLINT
     "shared_libs",
@@ -53,6 +56,9 @@ int main(int argc, char** argv) {
   kernel::RegisterTensorShapeKernels(&registry);
   kernel::RegisterTensorKernels(&registry);
   kernel::RegisterControlFlowKernels(&registry);
+#ifdef INFRT_WITH_PTEN
+  kernel::RegisterPtenKernels(&registry);
+#endif
 
   // load extra shared library
   for (const auto& lib_path : cl_shared_libs) {
diff --git a/paddle/infrt/host_context/value.cc b/paddle/infrt/host_context/value.cc
index 1c5a577092636..e8b904efb74a1 100644
--- a/paddle/infrt/host_context/value.cc
+++ b/paddle/infrt/host_context/value.cc
@@ -24,7 +24,13 @@ ValueRef::ValueRef(int64_t val) : Shared<Value>(new Value(val)) {}
 ValueRef::ValueRef(float val) : Shared<Value>(new Value(val)) {}
 ValueRef::ValueRef(double val) : Shared<Value>(new Value(val)) {}
 ValueRef::ValueRef(bool val) : Shared<Value>(new Value(val)) {}
-ValueRef::ValueRef(naive::MetaTensor&& val)
+ValueRef::ValueRef(backends::CpuPtenContext&& val)
+    : Shared<Value>(new Value(std::move(val))) {}
+ValueRef::ValueRef(::pten::CPUContext&& val)
+    : Shared<Value>(new Value(std::move(val))) {}
+ValueRef::ValueRef(::pten::DenseTensor&& val)
+    : Shared<Value>(new Value(std::move(val))) {}
+ValueRef::ValueRef(::pten::MetaTensor&& val)
     : Shared<Value>(new Value(std::move(val))) {}
 
 const char* Value::type_info() const { return __type_info__; }
@@ -36,31 +42,31 @@ void CopyTo(const Value& from, Value* to) {
       [&](auto&& arg) {
         using T = std::decay_t<decltype(arg)>;
         if (std::is_same<T, int16_t>::value)
-          to->data = arg;
+          to->data = reinterpret_cast<int16_t const&>(arg);
         else if (std::is_same<T, int32_t>::value)
-          to->data = arg;
+          to->data = reinterpret_cast<int32_t const&>(arg);
         else if (std::is_same<T, float>::value)
-          to->data = arg;
+          to->data = reinterpret_cast<float const&>(arg);
         else if (std::is_same<T, double>::value)
-          to->data = arg;
+          to->data = reinterpret_cast<double const&>(arg);
         else if (std::is_same<T, uint32_t>::value)
-          to->data = arg;
+          to->data = reinterpret_cast<uint32_t const&>(arg);
         else if (std::is_same<T, uint64_t>::value)
-          to->data = arg;
+          to->data = reinterpret_cast<uint64_t const&>(arg);
         else if (std::is_same<T, bool>::value)
-          to->data = arg;
+          to->data = reinterpret_cast<bool const&>(arg);
         else if (std::is_same<T, tensor::TensorShape>::value)
-          to->data = arg;
+          to->data = reinterpret_cast<tensor::TensorShape const&>(arg);
         else if (std::is_same<T, MlirFunctionExecutable*>::value)
-          to->data = arg;
+          to->data = reinterpret_cast<MlirFunctionExecutable* const&>(arg);
         else if (std::is_same<T, tensor::DenseHostTensor>::value)
-          to->data = arg;
+          to->data = reinterpret_cast<tensor::DenseHostTensor const&>(arg);
         else if (std::is_same<T, std::vector<int16_t>>::value)
-          to->data = arg;
+          to->data = reinterpret_cast<std::vector<int16_t> const&>(arg);
         else if (std::is_same<T, std::vector<int64_t>>::value)
-          to->data = arg;
+          to->data = reinterpret_cast<std::vector<int64_t> const&>(arg);
         else if (std::is_same<T, tensor::TensorMap>::value)
-          to->data = arg;
+          to->data = reinterpret_cast<tensor::TensorMap const&>(arg);
         else
           LOG(FATAL) << "Not supported Value copy: " << typeid(T).name();
       },
diff --git a/paddle/infrt/host_context/value.h b/paddle/infrt/host_context/value.h
index 904e51f92838d..f623e141512ce 100644
--- a/paddle/infrt/host_context/value.h
+++ b/paddle/infrt/host_context/value.h
@@ -23,15 +23,19 @@
 #include "paddle/infrt/common/object.h"
 #include "paddle/infrt/common/shared.h"
 #include "paddle/infrt/host_context/function.h"
-#include "paddle/infrt/naive/meta_tensor.h"
 #include "paddle/infrt/support/variant.h"
 #include "paddle/infrt/tensor/dense_host_tensor.h"
 #include "paddle/infrt/tensor/dense_tensor_view.h"
 #include "paddle/infrt/tensor/tensor_map.h"
 #include "paddle/infrt/tensor/tensor_shape.h"
-// Disabled temporarily for failed compile, will enable latter.
-// #include "paddle/pten/backends/cpu/cpu_context.h"
-// #include "paddle/pten/core/dense_tensor.h"
+#include "paddle/pten/core/meta_tensor.h"
+
+#ifdef INFRT_WITH_PTEN
+#include "paddle/infrt/backends/host/pten_allocator.h"
+#include "paddle/infrt/backends/host/pten_context.h"
+#include "paddle/pten/backends/cpu/cpu_context.h"
+#include "paddle/pten/core/dense_tensor.h"
+#endif
 
 namespace infrt {
 namespace host_context {
@@ -44,14 +48,20 @@ using ValueVariantType = Variant<int16_t,
                                  float,
                                  double,
                                  bool,
+                                 uint32_t,
+                                 uint64_t,
                                  std::string,
                                  tensor::TensorShape,
                                  tensor::DenseHostTensor,
                                  MlirFunctionExecutable*,
                                  tensor::TensorMap,
-                                 // pten::CPUContext,
-                                 // pten::DenseTensor,
-                                 naive::MetaTensor,
+#ifdef INFRT_WITH_PTEN
+                                 ::pten::MetaTensor,
+                                 ::pten::DenseTensor,
+                                 backends::CpuPtenAllocator,
+                                 backends::CpuPtenContext,
+                                 ::pten::CPUContext,
+#endif
                                  std::vector<int16_t>,
                                  std::vector<int32_t>,
                                  std::vector<int64_t>,
@@ -84,7 +94,13 @@ class Value : public common::Object {
   explicit Value(tensor::TensorShape&& x) : data(std::move(x)) {}
   explicit Value(tensor::DenseHostTensor&& x) : data(std::move(x)) {}
   explicit Value(MlirFunctionExecutable* x) : data(x) {}
-  explicit Value(naive::MetaTensor&& x) : data(std::move(x)) {}
+#ifdef INFRT_WITH_PTEN
+  explicit Value(backends::CpuPtenContext&& x) : data(std::move(x)) {}
+  explicit Value(::pten::CPUContext&& x) : data(std::move(x)) {}
+  explicit Value(::pten::DenseTensor&& x) : data(std::move(x)) {}
+  explicit Value(::pten::MetaTensor&& x) : data(std::move(x)) {}
+  explicit Value(backends::CpuPtenAllocator&& x) : data(std::move(x)) {}
+#endif
 
   template <typename T>
   const T& get() const {
@@ -142,7 +158,10 @@ class ValueRef : common::Shared<Value> {
   explicit ValueRef(float val);
   explicit ValueRef(double val);
   explicit ValueRef(bool val);
-  explicit ValueRef(naive::MetaTensor&& val);
+  explicit ValueRef(::pten::MetaTensor&& val);
+  explicit ValueRef(backends::CpuPtenContext&& x);
+  explicit ValueRef(::pten::CPUContext&& x);
+  explicit ValueRef(::pten::DenseTensor&& x);
 
   using common::Shared<Value>::get;
   using common::Shared<Value>::Reset;
diff --git a/paddle/infrt/kernel/CMakeLists.txt b/paddle/infrt/kernel/CMakeLists.txt
index b7ef5691e4760..402665119ac2d 100644
--- a/paddle/infrt/kernel/CMakeLists.txt
+++ b/paddle/infrt/kernel/CMakeLists.txt
@@ -1,3 +1,5 @@
+add_subdirectory(pten)
+
 core_gather_headers()
 
 gather_srcs(infrt_src SRCS
diff --git a/paddle/infrt/kernel/pten/CMakeLists.txt b/paddle/infrt/kernel/pten/CMakeLists.txt
new file mode 100644
index 0000000000000..65c10b0b15f8d
--- /dev/null
+++ b/paddle/infrt/kernel/pten/CMakeLists.txt
@@ -0,0 +1,19 @@
+if (NOT INFRT_WITH_PTEN)
+    return()
+endif()
+
+core_gather_headers()
+
+gather_srcs(infrt_src SRCS
+    registry.cc
+    dense_tensor_kernels.cc
+    context_kernels.cc
+    allocator_kernels.cc
+)
+
+cc_library(infrt_naive SRCS infershaped/infershaped_kernel_launcher.cc
+        infershaped/infershaped_kernel_launchers.cc
+        )
+
+cc_test_tiny(test_infrt_infershape_launchers SRCS
+infershaped/infershape_launchers_test.cc DEPS infrt)
diff --git a/paddle/infrt/kernel/pten/allocator_kernels.cc b/paddle/infrt/kernel/pten/allocator_kernels.cc
new file mode 100644
index 0000000000000..d3ecbed15da96
--- /dev/null
+++ b/paddle/infrt/kernel/pten/allocator_kernels.cc
@@ -0,0 +1,25 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/infrt/kernel/pten/allocator_kernels.h"
+
+namespace infrt {
+namespace kernel {
+namespace pten {
+
+backends::CpuPtenAllocator CreateCpuAllocator() { return {}; }
+
+}  // namespace pten
+}  // namespace kernel
+}  // namespace infrt
diff --git a/paddle/infrt/naive/meta_tensor.cc b/paddle/infrt/kernel/pten/allocator_kernels.h
similarity index 64%
rename from paddle/infrt/naive/meta_tensor.cc
rename to paddle/infrt/kernel/pten/allocator_kernels.h
index 2f7ee3a69e290..33127711193a2 100644
--- a/paddle/infrt/naive/meta_tensor.cc
+++ b/paddle/infrt/kernel/pten/allocator_kernels.h
@@ -12,20 +12,17 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/infrt/naive/meta_tensor.h"
+#pragma once
 
-#include "paddle/infrt/tensor/dense_host_tensor.h"
-#include "paddle/infrt/tensor/tensor_shape.h"
+#include "paddle/infrt/backends/host/pten_allocator.h"
+#include "paddle/pten/core/dense_tensor.h"
 
 namespace infrt {
-namespace naive {
+namespace kernel {
+namespace pten {
 
-const tensor::TensorShape& MetaTensor::shape() const {
-  return mutable_tensor_->shape();
-}
-tensor::TensorShape* MetaTensor::mutable_shape() {
-  return mutable_tensor_->mutable_shape();
-}
+backends::CpuPtenAllocator CreateCpuAllocator();
 
-}  // namespace naive
+}  // namespace pten
+}  // namespace kernel
 }  // namespace infrt
diff --git a/paddle/infrt/kernel/pten/context_kernels.cc b/paddle/infrt/kernel/pten/context_kernels.cc
new file mode 100644
index 0000000000000..0c5e53212113b
--- /dev/null
+++ b/paddle/infrt/kernel/pten/context_kernels.cc
@@ -0,0 +1,25 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/infrt/kernel/pten/context_kernels.h"
+
+namespace infrt {
+namespace kernel {
+namespace pten {
+
+backends::CpuPtenContext CreateCpuContext() { return {}; }
+
+}  // namespace pten
+}  // namespace kernel
+}  // namespace infrt
diff --git a/paddle/infrt/naive/infershaped/infershaped_kernel_launchers.cc b/paddle/infrt/kernel/pten/context_kernels.h
similarity index 50%
rename from paddle/infrt/naive/infershaped/infershaped_kernel_launchers.cc
rename to paddle/infrt/kernel/pten/context_kernels.h
index e570b3521b795..14a151d9d1d8e 100644
--- a/paddle/infrt/naive/infershaped/infershaped_kernel_launchers.cc
+++ b/paddle/infrt/kernel/pten/context_kernels.h
@@ -12,23 +12,17 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/infrt/naive/infershaped/infershaped_kernel_launchers.h"
-#include "paddle/infrt/naive/infershaped/elementwise_add.h"
-#include "paddle/infrt/naive/infershaped/infershaped_registry.h"
+#pragma once
 
-namespace infrt {
-namespace naive {
+#include "paddle/infrt/backends/host/pten_context.h"
+#include "paddle/pten/core/dense_tensor.h"
 
-using ElementwiseAddLauncher =
-    KernelLauncher<decltype(&ElementwiseAdd),
-                   &ElementwiseAdd,
-                   decltype(&ElementwiseAddInferShape),
-                   &ElementwiseAddInferShape>;
+namespace infrt {
+namespace kernel {
+namespace pten {
 
-void RegisterInferShapeLaunchers(InferShapedKernelRegistry* registry) {
-  registry->AddKernel("elementwise_add",
-                      INFERSHAPED_KERNEL_CREATOR(ElementwiseAddLauncher));
-}
+backends::CpuPtenContext CreateCpuContext();
 
-}  // namespace naive
+}  // namespace pten
+}  // namespace kernel
 }  // namespace infrt
diff --git a/paddle/infrt/kernel/pten/dense_tensor_kernels.cc b/paddle/infrt/kernel/pten/dense_tensor_kernels.cc
new file mode 100644
index 0000000000000..2db5f4a3c1179
--- /dev/null
+++ b/paddle/infrt/kernel/pten/dense_tensor_kernels.cc
@@ -0,0 +1,38 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/infrt/kernel/pten/dense_tensor_kernels.h"
+
+namespace infrt {
+namespace kernel {
+namespace pten {
+
+::pten::DenseTensor CreateDenseTensorCpuF32Nchw(
+    backends::CpuPtenAllocator* allocator,
+    host_context::Attribute<std::vector<int64_t>> dims,
+    host_context::Attribute<std::vector<int64_t>> lod) {
+  return ::pten::DenseTensor(
+      allocator,
+      ::pten::DenseTensorMeta(::pten::DataType::FLOAT32,
+                              ::pten::framework::make_ddim(dims.get()),
+                              ::pten::DataLayout::NCHW,
+                              {}));
+}
+
+void FillDenseTensorF32(::pten::DenseTensor* dense_tensor,
+                        host_context::Attribute<std::vector<int64_t>> values) {}
+
+}  // namespace pten
+}  // namespace kernel
+}  // namespace infrt
diff --git a/paddle/infrt/kernel/pten_kernels.cc b/paddle/infrt/kernel/pten/dense_tensor_kernels.h
similarity index 55%
rename from paddle/infrt/kernel/pten_kernels.cc
rename to paddle/infrt/kernel/pten/dense_tensor_kernels.h
index 62e2db659ad42..f60525707cd77 100644
--- a/paddle/infrt/kernel/pten_kernels.cc
+++ b/paddle/infrt/kernel/pten/dense_tensor_kernels.h
@@ -12,29 +12,24 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/infrt/kernel/pten_kernels.h"
+#pragma once
 
-#include <iostream>
-#include <string>
-
-#include "paddle/infrt/host_context/kernel_registry.h"
+#include "paddle/infrt/backends/host/pten_allocator.h"
 #include "paddle/infrt/host_context/kernel_utils.h"
-
-// Disable temporarily.
-// #include "paddle/pten/backends/cpu/cpu_context.h"
-// #include "paddle/pten/kernels/math_kernel.h"
-
-using infrt::host_context::Attribute;
+#include "paddle/pten/core/dense_tensor.h"
 
 namespace infrt {
 namespace kernel {
+namespace pten {
+
+::pten::DenseTensor CreateDenseTensorCpuF32Nchw(
+    backends::CpuPtenAllocator* allocator,
+    host_context::Attribute<std::vector<int64_t>> dims,
+    host_context::Attribute<std::vector<int64_t>> lod);
 
-void RegisterPtenKernels(host_context::KernelRegistry* registry) {
-  registry->AddKernel("pd_cpu.add.float32",
-                      INFRT_KERNEL(pten::AddKernel<float, pten::CPUContext>));
-  registry->AddKernel("pd_cpu.add.int32",
-                      INFRT_KERNEL(pten::AddKernel<int, pten::CPUContext>));
-}
+void FillDenseTensorF32(::pten::DenseTensor* dense_tensor,
+                        host_context::Attribute<std::vector<int64_t>> values);
 
+}  // namespace pten
 }  // namespace kernel
 }  // namespace infrt
diff --git a/paddle/infrt/naive/infershaped/elementwise_add.h b/paddle/infrt/kernel/pten/infershaped/elementwise_add.h
similarity index 67%
rename from paddle/infrt/naive/infershaped/elementwise_add.h
rename to paddle/infrt/kernel/pten/infershaped/elementwise_add.h
index ee044e38da03d..1d9d0106da539 100644
--- a/paddle/infrt/naive/infershaped/elementwise_add.h
+++ b/paddle/infrt/kernel/pten/infershaped/elementwise_add.h
@@ -16,27 +16,23 @@
 #include <llvm/ADT/SmallVector.h>
 
 #include "paddle/infrt/host_context/kernel_utils.h"
-#include "paddle/infrt/naive/infershaped/infershaped_kernel_launcher.h"
-#include "paddle/infrt/naive/infershaped/infershaped_utils.h"
+#include "paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launcher.h"
+#include "paddle/infrt/kernel/pten/infershaped/infershaped_utils.h"
 
 // This file contains a example of the infershape ElementwiseAdd kernel.
 // Some of the following code should be generated from PTEN by script.
 
 namespace infrt {
-namespace naive {
+namespace kernel {
 
-static void ElementwiseAddInferShape(const MetaTensor& a,
-                                     const MetaTensor& b,
-                                     MetaTensor* c) {
-  CHECK(a.shape() == b.shape())
-      << "ElementwiseAdd, but shapes of a b are not match";
-  *c->mutable_shape() = a.shape();
-}
+static void ElementwiseAddInferShape(const ::pten::MetaTensor& a,
+                                     const ::pten::MetaTensor& b,
+                                     ::pten::MetaTensor* c) {}
 
-static void ElementwiseAdd(tensor::DenseHostTensor* /*Context*/,
-                           const tensor::DenseHostTensor& a,
-                           const tensor::DenseHostTensor& b,
-                           tensor::DenseHostTensor* c) {}
+static void ElementwiseAdd(const ::pten::CPUContext& /*Context*/,
+                           const ::pten::DenseTensor& a,
+                           const ::pten::DenseTensor& b,
+                           ::pten::DenseTensor* c) {}
 
 template <typename KernelFunc,
           KernelFunc kernel,
@@ -64,5 +60,15 @@ class KernelLauncher : public InferShapedKernelLauncher {
   }
 };
 
-}  // namespace naive
+template <typename KernelFunc,
+          KernelFunc kernel,
+          typename InferShapedFunc,
+          InferShapedFunc infershape>
+void KernelLauncherFunc(
+    KernelLauncher<KernelFunc, kernel, InferShapedFunc, infershape> launcher,
+    host_context::KernelFrame* frame) {
+  launcher.Invoke(frame);
+}
+
+}  // namespace kernel
 }  // namespace infrt
diff --git a/paddle/infrt/naive/infershaped/infershape_launchers_test.cc b/paddle/infrt/kernel/pten/infershaped/infershape_launchers_test.cc
similarity index 56%
rename from paddle/infrt/naive/infershaped/infershape_launchers_test.cc
rename to paddle/infrt/kernel/pten/infershaped/infershape_launchers_test.cc
index ba6fdbdd5783f..64b99110d94c7 100644
--- a/paddle/infrt/naive/infershaped/infershape_launchers_test.cc
+++ b/paddle/infrt/kernel/pten/infershaped/infershape_launchers_test.cc
@@ -14,19 +14,17 @@
 
 #include <gtest/gtest.h>
 
-#include "paddle/infrt/naive/infershaped/infershaped_kernel_launcher.h"
-#include "paddle/infrt/naive/infershaped/infershaped_kernel_launchers.h"
-#include "paddle/infrt/naive/infershaped/infershaped_registry.h"
-#include "paddle/infrt/naive/infershaped/infershaped_utils.h"
-#include "paddle/infrt/tensor/dense_host_tensor.h"
+#include "paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launcher.h"
+#include "paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.h"
+#include "paddle/infrt/kernel/pten/infershaped/infershaped_utils.h"
 
 namespace infrt {
-namespace naive {
+namespace kernel {
 
 namespace {
-static void ElementwiseAddTest(const tensor::DenseHostTensor& a,
-                               const tensor::DenseHostTensor& b,
-                               tensor::DenseHostTensor* c);
+static void ElementwiseAddTest(const ::pten::DenseTensor& a,
+                               const ::pten::DenseTensor& b,
+                               ::pten::DenseTensor* c);
 }
 
 TEST(utils, registry) {
@@ -35,26 +33,24 @@ TEST(utils, registry) {
   CHECK_EQ(count, 2U);
 }
 
-TEST(ElementwiseAdd, registry) {
-  InferShapedKernelRegistry registry;
+TEST(ElementwiseAdd, launcher_registry) {
+  host_context::KernelRegistry registry;
   RegisterInferShapeLaunchers(&registry);
   ASSERT_EQ(registry.size(), 1UL);
   auto creator = registry.GetKernel("elementwise_add");
-  auto infershape_launcher_handle = creator();
-  // fake some tensors
 
-  tensor::DenseHostTensor a({2, 8}, GetDType<float>());
-  tensor::DenseHostTensor b({2, 8}, GetDType<float>());
-  tensor::DenseHostTensor c({2, 8}, GetDType<float>());
+  ::pten::CPUContext ctx{};
+  ::pten::DenseTensor a{};
+  ::pten::DenseTensor b{};
+  ::pten::DenseTensor c{};
 
   host_context::KernelFrameBuilder kernel_frame_builder;
-  kernel_frame_builder.AddArgument(new host_context::Value(0));
+  kernel_frame_builder.AddArgument(new host_context::Value(std::move(ctx)));
   kernel_frame_builder.AddArgument(new host_context::Value(std::move(a)));
   kernel_frame_builder.AddArgument(new host_context::Value(std::move(b)));
   kernel_frame_builder.SetResults({new host_context::Value(std::move(c))});
-
-  infershape_launcher_handle->Invoke(&kernel_frame_builder);
+  creator(&kernel_frame_builder);
 }
 
-}  // namespace naive
+}  // namespace kernel
 }  // namespace infrt
diff --git a/paddle/infrt/naive/infershaped/infershaped_kernel_launcher.cc b/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launcher.cc
similarity index 74%
rename from paddle/infrt/naive/infershaped/infershaped_kernel_launcher.cc
rename to paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launcher.cc
index 6a2c4a51ecdb2..80f8bae4018cb 100644
--- a/paddle/infrt/naive/infershaped/infershaped_kernel_launcher.cc
+++ b/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launcher.cc
@@ -12,18 +12,19 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/infrt/naive/infershaped/infershaped_kernel_launcher.h"
+#include "paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launcher.h"
 
 namespace infrt {
-namespace naive {
+namespace kernel {
 
 void InferShapedKernelLauncher::CreateKernelFrameForInferShape(
     host_context::KernelFrame* frame) {
   for (host_context::Value* value :
        frame->GetValues(1, frame->GetNumElements() - 1)) {
     // TODO(Superjomn) To extend this.
-    if (value->is_type<tensor::DenseHostTensor>()) {
-      values.emplace_back(MetaTensor{&value->get<tensor::DenseHostTensor>()});
+    if (value->is_type<::pten::DenseTensor>()) {
+      values.emplace_back(
+          ::pten::MetaTensor{&value->get<::pten::DenseTensor>()});
       infershape_kernel_frame_builder.AddArgument(values.back().get());
     } else {
       infershape_kernel_frame_builder.AddArgument(value);
@@ -35,8 +36,9 @@ void InferShapedKernelLauncher::BuildInferShapeCache(
     const uint16_t num_inputs) {
   tensor_shape_cache.resize(num_inputs);
   for (uint16_t i = 0; i < num_inputs; i++) {
-    tensor_shape_cache[i] =
-        infershape_kernel_frame_builder.GetArgAt(i)->get<MetaTensor>().shape();
+    tensor_shape_cache[i] = infershape_kernel_frame_builder.GetArgAt(i)
+                                ->get<::pten::MetaTensor>()
+                                .dims();
   }
 }
 
@@ -49,10 +51,11 @@ bool InferShapedKernelLauncher::IsShapeChanged(
   for (uint16_t i = 0; i < num_inputs && !changed; i++) {
     changed = changed ||
               (tensor_shape_cache[i] !=
-               infershape_kernel_frame_builder.GetArgAt<MetaTensor>(i).shape());
+               infershape_kernel_frame_builder.GetArgAt<::pten::MetaTensor>(i)
+                   .dims());
   }
   return changed;
 }
 
-}  // namespace naive
+}  // namespace kernel
 }  // namespace infrt
diff --git a/paddle/infrt/naive/infershaped/infershaped_kernel_launcher.h b/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launcher.h
similarity index 90%
rename from paddle/infrt/naive/infershaped/infershaped_kernel_launcher.h
rename to paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launcher.h
index 890a779ed2403..9348bf8d05008 100644
--- a/paddle/infrt/naive/infershaped/infershaped_kernel_launcher.h
+++ b/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launcher.h
@@ -17,11 +17,9 @@
 
 #include "paddle/infrt/host_context/kernel_frame.h"
 #include "paddle/infrt/host_context/value.h"
-#include "paddle/infrt/naive/meta_tensor.h"
-#include "paddle/infrt/tensor/dense_host_tensor.h"
 
 namespace infrt {
-namespace naive {
+namespace kernel {
 
 struct InferShapedKernelLauncher {
   virtual void Invoke(host_context::KernelFrame* frame) = 0;
@@ -46,9 +44,9 @@ struct InferShapedKernelLauncher {
 
   // values to hold the TensorMeta.
   llvm::SmallVector<host_context::ValueRef, 3> values;
-  llvm::SmallVector<tensor::TensorShape, 3> tensor_shape_cache;
+  llvm::SmallVector<::pten::DDim, 3> tensor_shape_cache;
   host_context::KernelFrameBuilder infershape_kernel_frame_builder;
 };
 
-}  // namespace naive
+}  // namespace kernel
 }  // namespace infrt
diff --git a/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.cc b/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.cc
new file mode 100644
index 0000000000000..23d4f919af057
--- /dev/null
+++ b/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.cc
@@ -0,0 +1,36 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.h"
+#include "paddle/infrt/kernel/pten/infershaped/elementwise_add.h"
+
+namespace infrt {
+namespace kernel {
+
+void RegisterInferShapeLaunchers(host_context::KernelRegistry* registry) {
+  registry->AddKernel(
+      "elementwise_add",
+      std::bind(&KernelLauncherFunc<decltype(&ElementwiseAdd),
+                                    &ElementwiseAdd,
+                                    decltype(&ElementwiseAddInferShape),
+                                    &ElementwiseAddInferShape>,
+                KernelLauncher<decltype(&ElementwiseAdd),
+                               &ElementwiseAdd,
+                               decltype(&ElementwiseAddInferShape),
+                               &ElementwiseAddInferShape>(),
+                std::placeholders::_1));
+}
+
+}  // namespace kernel
+}  // namespace infrt
diff --git a/paddle/infrt/naive/infershaped/infershaped_kernel_launchers.h b/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.h
similarity index 79%
rename from paddle/infrt/naive/infershaped/infershaped_kernel_launchers.h
rename to paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.h
index 3e83b690bb8df..ba25f06876cca 100644
--- a/paddle/infrt/naive/infershaped/infershaped_kernel_launchers.h
+++ b/paddle/infrt/kernel/pten/infershaped/infershaped_kernel_launchers.h
@@ -14,12 +14,12 @@
 
 #pragma once
 
-namespace infrt {
-namespace naive {
+#include "paddle/infrt/host_context/kernel_registry.h"
 
-struct InferShapedKernelRegistry;
+namespace infrt {
+namespace kernel {
 
-void RegisterInferShapeLaunchers(InferShapedKernelRegistry* registry);
+void RegisterInferShapeLaunchers(host_context::KernelRegistry* registry);
 
-}  // namespace naive
+}  // namespace kernel
 }  // namespace infrt
diff --git a/paddle/infrt/naive/infershaped/infershaped_utils.h b/paddle/infrt/kernel/pten/infershaped/infershaped_utils.h
similarity index 95%
rename from paddle/infrt/naive/infershaped/infershaped_utils.h
rename to paddle/infrt/kernel/pten/infershaped/infershaped_utils.h
index 8155d87231a8f..aa5e900b8b26a 100644
--- a/paddle/infrt/naive/infershaped/infershaped_utils.h
+++ b/paddle/infrt/kernel/pten/infershaped/infershaped_utils.h
@@ -18,10 +18,10 @@
 #include "paddle/infrt/tensor/dense_host_tensor.h"
 
 namespace infrt {
-namespace naive {
+namespace kernel {
 namespace infershaped {
 
-using KeyType = const tensor::DenseHostTensor&;
+using KeyType = const ::pten::DenseTensor&;
 using CountType = uint8_t;
 
 constexpr CountType value(std::true_type) { return 1; }
@@ -73,5 +73,5 @@ struct InferShapeHelper<Return (*)(Args...)> {
   static constexpr int count = infershaped::count<Args...>();
 };
 
-}  // namespace naive
+}  // namespace kernel
 }  // namespace infrt
diff --git a/paddle/infrt/kernel/pten/registry.cc b/paddle/infrt/kernel/pten/registry.cc
new file mode 100644
index 0000000000000..888992c47d968
--- /dev/null
+++ b/paddle/infrt/kernel/pten/registry.cc
@@ -0,0 +1,61 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/infrt/kernel/pten/registry.h"
+
+#include <iostream>
+#include <string>
+
+#include "paddle/infrt/host_context/kernel_registry.h"
+#include "paddle/infrt/host_context/kernel_utils.h"
+#include "paddle/infrt/kernel/pten/allocator_kernels.h"
+#include "paddle/infrt/kernel/pten/context_kernels.h"
+#include "paddle/infrt/kernel/pten/dense_tensor_kernels.h"
+#include "paddle/infrt/kernel/pten/infershaped/elementwise_add.h"
+#include "paddle/pten/include/infermeta.h"
+#include "paddle/pten/include/kernels.h"
+#include "paddle/pten/kernels/matmul_kernel.h"
+
+using infrt::host_context::Attribute;
+
+namespace infrt {
+namespace kernel {
+
+void RegisterPtenKernels(host_context::KernelRegistry* registry) {
+  registry->AddKernel("pten_dt.create_allocator.cpu",
+                      INFRT_KERNEL(infrt::kernel::pten::CreateCpuAllocator));
+  registry->AddKernel("pten_dt.create_context.cpu",
+                      INFRT_KERNEL(infrt::kernel::pten::CreateCpuContext));
+  registry->AddKernel(
+      "pten_dt.create_dense_tensor.cpu.f32.nchw",
+      INFRT_KERNEL(infrt::kernel::pten::CreateDenseTensorCpuF32Nchw));
+  registry->AddKernel("pten_dt.fill_dense_tensor.f32",
+                      INFRT_KERNEL(infrt::kernel::pten::FillDenseTensorF32));
+  registry->AddKernel(
+      "pten.matmul.host.fp32",
+      std::bind(&kernel::KernelLauncherFunc<
+                    decltype(&::pten::MatmulKernel<float, ::pten::CPUContext>),
+                    &::pten::MatmulKernel<float, ::pten::CPUContext>,
+                    decltype(&::pten::MatmulInferMeta),
+                    &::pten::MatmulInferMeta>,
+                kernel::KernelLauncher<
+                    decltype(&::pten::MatmulKernel<float, ::pten::CPUContext>),
+                    &::pten::MatmulKernel<float, ::pten::CPUContext>,
+                    decltype(&::pten::MatmulInferMeta),
+                    &::pten::MatmulInferMeta>(),
+                std::placeholders::_1));
+}
+
+}  // namespace kernel
+}  // namespace infrt
diff --git a/paddle/infrt/kernel/pten_kernels.h b/paddle/infrt/kernel/pten/registry.h
similarity index 100%
rename from paddle/infrt/kernel/pten_kernels.h
rename to paddle/infrt/kernel/pten/registry.h
diff --git a/paddle/infrt/naive/CMakeLists.txt b/paddle/infrt/naive/CMakeLists.txt
deleted file mode 100644
index c90c6e7ba7b88..0000000000000
--- a/paddle/infrt/naive/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-cc_library(infrt_naive SRCS meta_tensor.cc
-        infershaped/infershaped_kernel_launcher.cc
-        infershaped/infershaped_registry.cc
-        infershaped/infershaped_kernel_launchers.cc
-        )
-
-cc_test_tiny(test_infrt_infershape_launchers SRCS 
-infershaped/infershape_launchers_test.cc DEPS infrt)
diff --git a/paddle/infrt/naive/infershaped/infershaped_registry.cc b/paddle/infrt/naive/infershaped/infershaped_registry.cc
deleted file mode 100644
index 94218a9a6f6a6..0000000000000
--- a/paddle/infrt/naive/infershaped/infershaped_registry.cc
+++ /dev/null
@@ -1,55 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/infrt/naive/infershaped/infershaped_registry.h"
-
-#include <unordered_map>
-
-#include "paddle/infrt/naive/infershaped/infershaped_kernel_launcher.h"
-
-namespace infrt {
-namespace naive {
-
-struct InferShapedKernelRegistry::Impl {
-  std::unordered_map<std::string, InferShapeLauncherCreator> data;
-};
-
-InferShapedKernelRegistry::InferShapedKernelRegistry()
-    : impl_(std::make_unique<Impl>()) {}
-
-void InferShapedKernelRegistry::AddKernel(
-    const std::string& key,
-    InferShapedKernelRegistry::InferShapeLauncherCreator&& creator) {
-  CHECK(!impl_->data.count(key)) << "Item called " << key << " duplicates";
-  impl_->data.emplace(key, std::move(creator));
-}
-
-const InferShapedKernelRegistry::InferShapeLauncherCreator&
-InferShapedKernelRegistry::GetKernel(const std::string& key) const {
-  auto it = impl_->data.find(key);
-  CHECK(it != impl_->data.end()) << "No item called " << key << " exists";
-  return it->second;
-}
-
-size_t InferShapedKernelRegistry::size() const { return impl_->data.size(); }
-
-InferShapedKernelRegistry* GetInferShapeRegistry() {
-  static auto registry = std::make_unique<InferShapedKernelRegistry>();
-  return registry.get();
-}
-
-InferShapedKernelRegistry::~InferShapedKernelRegistry() {}
-
-}  // namespace naive
-}  // namespace infrt
diff --git a/paddle/infrt/naive/infershaped/infershaped_registry.h b/paddle/infrt/naive/infershaped/infershaped_registry.h
deleted file mode 100644
index e0e56a148fa3d..0000000000000
--- a/paddle/infrt/naive/infershaped/infershaped_registry.h
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include <functional>
-#include <memory>
-#include <string>
-
-namespace infrt {
-namespace naive {
-
-struct InferShapedKernelLauncher;
-
-class InferShapedKernelRegistry {
- public:
-  using InferShapeLauncherHandle = std::unique_ptr<InferShapedKernelLauncher>;
-  using InferShapeLauncherCreator = std::function<InferShapeLauncherHandle()>;
-
-  InferShapedKernelRegistry();
-
-  void AddKernel(const std::string& key, InferShapeLauncherCreator&& creator);
-
-  const InferShapeLauncherCreator& GetKernel(const std::string& key) const;
-
-  size_t size() const;
-
-  ~InferShapedKernelRegistry();
-
- private:
-  struct Impl;
-
-  std::unique_ptr<Impl> impl_;
-};
-
-//! The global infershape registry.
-InferShapedKernelRegistry* GetInferShapeRegistry();
-
-}  // namespace naive
-}  // namespace infrt
-
-#define INFERSHAPED_KERNEL_CREATOR(infershape_launcher_class_)                 \
-  []()                                                                         \
-      -> ::infrt::naive::InferShapedKernelRegistry::InferShapeLauncherHandle { \
-        return std::make_unique<infershape_launcher_class_>();                 \
-      }
diff --git a/paddle/infrt/naive/meta_tensor.h b/paddle/infrt/naive/meta_tensor.h
deleted file mode 100644
index 4b62f3021a3a6..0000000000000
--- a/paddle/infrt/naive/meta_tensor.h
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// A naive implementation of MetaTensor
-#pragma once
-#include "paddle/infrt/common/common.h"
-
-namespace infrt {
-namespace tensor {
-struct DenseHostTensor;
-struct TensorShape;
-}  // namespace tensor
-
-namespace naive {
-
-class MetaTensor {
- public:
-  MetaTensor() = default;
-  explicit MetaTensor(tensor::DenseHostTensor* tensor)
-      : mutable_tensor_(tensor) {}
-  explicit MetaTensor(const tensor::DenseHostTensor* tensor)
-      : mutable_tensor_(&Reference(tensor)) {}
-  explicit MetaTensor(MetaTensor&& other)
-      : mutable_tensor_(other.mutable_tensor_) {}
-  explicit MetaTensor(const MetaTensor& other)
-      : mutable_tensor_(other.mutable_tensor_) {}
-
-  const tensor::TensorShape& shape() const;
-  tensor::TensorShape* mutable_shape();
-
- private:
-  tensor::DenseHostTensor* mutable_tensor_{};
-};
-
-}  // namespace naive
-}  // namespace infrt
diff --git a/paddle/infrt/tests/dialect/pten/dense_tensor.mlir b/paddle/infrt/tests/dialect/pten/dense_tensor.mlir
index 109fa2d6fa741..88f5b289fd9f8 100644
--- a/paddle/infrt/tests/dialect/pten/dense_tensor.mlir
+++ b/paddle/infrt/tests/dialect/pten/dense_tensor.mlir
@@ -1,10 +1,11 @@
 // RUN: infrtopt %s | FileCheck %s
 
-// CHECK-LABEL: basic_tensor
+// CHECK-LABEL: @basic_tensor
 func @basic_tensor() {
-  %a = "pten_dt.create_uninit_tensor.f32" () { shape=[12:i64, 23:i64] } : () -> !infrt.tensor<X86, NCHW, F32>
-  %b = "pten_dt.create_inited_tensor.f32" () { shape=[2:i64, 2:i64], values=[0.1:f32, 0.2:f32, 0.3:f32, 0.4:f32] } : () -> !infrt.tensor<X86, NCHW, F32>
-  "pten_dt.fill_tensor_with_constant.f32" (%a) { value=0.1:f32 } : (!infrt.tensor<X86, NCHW, F32>) -> ()
+  %a = "pten_dt.create_allocator.cpu" (): () -> !pten.CPU_allocator
+  %b = "pten_dt.create_context.cpu" (): () -> !pten.CPU_context
+  %c = "pten_dt.create_dense_tensor.cpu.f32.nchw" (%a) {dims=[1:i64], lod=[1:i64]}: (!pten.CPU_allocator) -> (!infrt.tensor<X86, NCHW, F32>)
+  // "pten_dt.fill_dense_tensor.f32" (%c) {value=[1.0:f32]} : (!infrt.tensor<X86, NCHW, F32>) -> ()
 
   infrt.return
 }
diff --git a/paddle/pten/backends/cpu/cpu_context.cc b/paddle/pten/backends/cpu/cpu_context.cc
index 4029c286a5b28..5eb89c2dc658d 100644
--- a/paddle/pten/backends/cpu/cpu_context.cc
+++ b/paddle/pten/backends/cpu/cpu_context.cc
@@ -58,6 +58,10 @@ CPUContext::CPUContext(const Place& place)
 
 CPUContext::~CPUContext() = default;
 
+CPUContext::CPUContext(CPUContext&&) = default;
+
+CPUContext& CPUContext::operator=(CPUContext&&) = default;
+
 void CPUContext::Init() { impl_->Init(); }
 
 Eigen::DefaultDevice* CPUContext::eigen_device() const {
diff --git a/paddle/pten/backends/cpu/cpu_context.h b/paddle/pten/backends/cpu/cpu_context.h
index dca87a786b961..1e4109d3eeb7f 100644
--- a/paddle/pten/backends/cpu/cpu_context.h
+++ b/paddle/pten/backends/cpu/cpu_context.h
@@ -27,6 +27,8 @@ namespace pten {
 class CPUContext : public DeviceContext {
  public:
   CPUContext();
+  CPUContext(CPUContext&&);
+  CPUContext& operator=(CPUContext&&);
   explicit CPUContext(const Place&);
   virtual ~CPUContext();
   Eigen::DefaultDevice* eigen_device() const;
diff --git a/paddle/pten/core/device_context.cc b/paddle/pten/core/device_context.cc
index 70d71b5c767ea..bc9d7fc7d29b2 100644
--- a/paddle/pten/core/device_context.cc
+++ b/paddle/pten/core/device_context.cc
@@ -149,6 +149,8 @@ DeviceContext::DeviceContext(DeviceContext&& other) {
   impl_ = std::move(other.impl_);
 }
 
+DeviceContext& DeviceContext::operator=(DeviceContext&&) = default;
+
 DeviceContext::~DeviceContext() = default;
 
 void DeviceContext::SetAllocator(const Allocator* allocator) {
diff --git a/paddle/pten/core/device_context.h b/paddle/pten/core/device_context.h
index d627f19b55dbc..05753b531ff08 100644
--- a/paddle/pten/core/device_context.h
+++ b/paddle/pten/core/device_context.h
@@ -49,6 +49,11 @@ class DeviceContext {
    */
   DeviceContext(DeviceContext&&);
 
+  /**
+   * @brief Move assign operator.
+   */
+  DeviceContext& operator=(DeviceContext&&);
+
   /**
    * @brief Default destruct.
    */

From 1f7f8561fcece3050166e07719228fa5dd8c2256 Mon Sep 17 00:00:00 2001
From: QingshuChen <chenqingshu@baidu.com>
Date: Thu, 17 Feb 2022 15:27:23 +0800
Subject: [PATCH 3/3] update kunlun label_smooth unitest (#39611)

* update kunlun label_smooth unitest
*test=kunlun

* minor
*test=kunlun
---
 .../unittests/xpu/test_label_smooth_op_xpu.py | 79 ++++++++++++-------
 tools/check_file_diff_approvals.sh            |  4 +-
 2 files changed, 52 insertions(+), 31 deletions(-)

diff --git a/python/paddle/fluid/tests/unittests/xpu/test_label_smooth_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_label_smooth_op_xpu.py
index 5a827c1beb291..afe1662ce5cfc 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_label_smooth_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_label_smooth_op_xpu.py
@@ -20,45 +20,66 @@
 import sys
 sys.path.append("..")
 from op_test_xpu import XPUOpTest
+from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper
 
 paddle.enable_static()
 
 
-class TestLabelSmoothOp(XPUOpTest):
-    def config(self):
-        self.op_type = "label_smooth"
-        self.epsilon = 0.1
-        self.use_xpu = True
-        batch_size, self.label_dim = 10, 12
-        self.label = np.zeros((batch_size, self.label_dim)).astype("float32")
-        nonzero_index = np.random.randint(self.label_dim, size=(batch_size))
-        self.label[np.arange(batch_size), nonzero_index] = 1
+class XPUTestLabelSmoothOp(XPUOpTestWrapper):
+    def __init__(self):
+        self.op_name = 'label_smooth'
+        self.use_dynamic_create_class = True
 
-    def setUp(self):
-        self.config()
-        smoothed_label = (1 - self.epsilon
-                          ) * self.label + self.epsilon / self.label_dim
-        self.inputs = {'X': self.label}
-        self.attrs = {'epsilon': self.epsilon}
-        self.outputs = {'Out': smoothed_label}
+    def dynamic_create_class(self):
+        base_class = self.TestLabelSmoothOp
+        classes = []
+        batch_sizes = [1, 5, 1024]
+        label_dims = [1, 7, 12]
+        for bs in batch_sizes:
+            for label_dim in label_dims:
+                class_name = 'XPUTestLabelSmooth_' + \
+                       str(bs) + "_" + str(label_dim)
+                attr_dict = {'batch_size': bs, 'label_dim': label_dim}
+                classes.append([class_name, attr_dict])
+        classes.append(['XPUTestLabelSmooth_3d', {'is_3d': True}])
+        return base_class, classes
 
-    def test_check_output(self):
-        if not paddle.is_compiled_with_xpu():
-            return
-        self.check_output_with_place(paddle.XPUPlace(0), atol=1e-6)
+    class TestLabelSmoothOp(XPUOpTest):
+        def setUp(self):
+            self.op_type = "label_smooth"
+            self.epsilon = 0.1
+            self.use_xpu = True
+            if not hasattr(self, 'batch_size'):
+                self.batch_size = 10
+                self.label_dim = 12
+            self.label = np.zeros(
+                (self.batch_size, self.label_dim)).astype("float32")
+            nonzero_index = np.random.randint(
+                self.label_dim, size=(self.batch_size))
+            self.label[np.arange(self.batch_size), nonzero_index] = 1
+            smoothed_label = (1 - self.epsilon
+                              ) * self.label + self.epsilon / self.label_dim
+            self.inputs = {'X': self.label}
+            self.attrs = {'epsilon': self.epsilon}
+            self.outputs = {'Out': smoothed_label}
+            if hasattr(self, 'is_3d') and self.is_3d:
+                self.inputs['X'] = self.inputs['X'].reshape(
+                    [2, -1, self.inputs['X'].shape[-1]])
+                self.outputs['Out'] = self.outputs['Out'].reshape(self.inputs[
+                    'X'].shape)
 
-    def test_check_grad(self):
-        return
+        def test_check_output(self):
+            if not paddle.is_compiled_with_xpu():
+                return
+            self.check_output_with_place(paddle.XPUPlace(0), atol=1e-6)
 
+        def test_check_grad(self):
+            return
 
-class TestLabelSmoothOp3D(TestLabelSmoothOp):
-    def setUp(self):
-        super(TestLabelSmoothOp3D, self).setUp()
-        self.inputs['X'] = self.inputs['X'].reshape(
-            [2, -1, self.inputs['X'].shape[-1]])
-        self.outputs['Out'] = self.outputs['Out'].reshape(self.inputs['X']
-                                                          .shape)
 
+support_types = get_xpu_op_support_types('label_smooth')
+for stype in support_types:
+    create_test_class(globals(), XPUTestLabelSmoothOp, stype)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tools/check_file_diff_approvals.sh b/tools/check_file_diff_approvals.sh
index a36f173454f6a..f9826da20ce08 100644
--- a/tools/check_file_diff_approvals.sh
+++ b/tools/check_file_diff_approvals.sh
@@ -213,8 +213,8 @@ fi
 NO_NPU_FILE=`git diff --name-only upstream/$BRANCH | grep -v "_npu.py"`
 HAS_UNITTEST_SKIP=`git diff -U0 upstream/$BRANCH ${NO_NPU_FILE} | grep "^+[[:space:]]\{0,\}@unittest.skip" || true`
 if [ "${HAS_UNITTEST_SKIP}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
-    echo_line="Unittest is not allowed to be disabled.\nYou must have one RD (kolinwei(Recommend), wanghuancoder, luotao1 or qili93) approval for the usage of @unittest.skip or @unittest.skipIf.\n${HAS_UNITTEST_SKIP}\n"
-    check_approval 1 22165420 6836917 46661762 26922892 16605440
+    echo_line="Unittest is not allowed to be disabled.\nYou must have one RD (kolinwei(Recommend), wanghuancoder, luotao1, QingshuChen or qili93) approval for the usage of @unittest.skip or @unittest.skipIf.\n${HAS_UNITTEST_SKIP}\n"
+    check_approval 1 22165420 6836917 46661762 26922892 16605440 2002279
   fi
 
 HAS_MODIFIED_DEMO_CMAKE=`git diff --name-only upstream/$BRANCH | grep "paddle/fluid/inference/api/demo_ci/CMakeLists.txt" || true`