PaddlePaddle · zyfncg · Mar 15, 2023 · Mar 9, 2023 · Mar 9, 2023 · Mar 9, 2023
diff --git a/paddle/fluid/framework/data_transform.cc b/paddle/fluid/framework/data_transform.cc
@@ -161,7 +161,7 @@ phi::GetKernelTypeForVarContext BuildGetKernelTypeForVarContext(
     const AttributeMap &fluid_attrs,
     phi::AttributeMap *phi_attrs,
     bool has_infer_varkernel_fn) {
-  // According to "GetKernelTypeForVar" in some ops those have MKLDNN codes,
+  // According to "GetKernelTypeForVar" in some ops executed with oneDNN,
   // the only "string" member, such as "data_layout" 、"data_format" of
   // AttibuteMap is useful. In the future the other args maybe used. Because the
   // "phi" module should not depend on the "fluid", transform

diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt
@@ -94,7 +94,7 @@ if(WITH_UNITY_BUILD)
     include(unity_build_rule.cmake)
 endif()
 
-set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi phi_utils backward_infermeta sparse_backward_infermeta static_prim_api)
+set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi phi_utils backward_infermeta sparse_backward_infermeta static_prim_api get_expected_kernel_func)
 
 register_operators(EXCLUDES py_func_op warpctc_op dgc_op generated_op1 generated_op2 generated_op3 generated_op4 load_combine_op lstm_op run_program_op eye_op quantize_linear_op
         recurrent_op save_combine_op sparse_attention_op sync_batch_norm_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS})

diff --git a/paddle/fluid/operators/generator/CMakeLists.txt b/paddle/fluid/operators/generator/CMakeLists.txt
@@ -243,3 +243,9 @@ file(APPEND ${op_utils_header}
 register_op_utils(op_compat_infos DEPS op_utils)
 
 copy_if_different(${op_utils_header} ${op_utils_header_final})
+
+# add special GetExpectedKernelType
+cc_library(
+  get_expected_kernel_func
+  SRCS get_expected_kernel_func.cc
+  DEPS operator)
diff --git a/paddle/fluid/operators/generator/generate_op.py b/paddle/fluid/operators/generator/generate_op.py
@@ -101,18 +101,12 @@ def process_scalar(op_item, scalar_configs):
                     and scalar_config['support_tensor']
                     else False
                 )
-                if attr_item['is_support_tensor']:
-                    attr_item['typename'] = (
-                        scalar_config['data_type']
-                        if 'data_type' in scalar_config
-                        else scalar_map[attr_type]
-                    )
-                else:
-                    attr_item['data_type'] = (
-                        scalar_config['data_type']
-                        if 'data_type' in scalar_config
-                        else scalar_map[attr_type]
-                    )
+                attr_item['data_type'] = (
+                    scalar_config['data_type']
+                    if 'data_type' in scalar_config
+                    else scalar_map[attr_type]
+                )
+                if attr_item['is_support_tensor'] is False:
                     attr_item['tensor_name'] = scalar_config['tensor_name']
 
 
@@ -136,19 +130,12 @@ def process_int_array(op_item, int_array_configs):
                     and int_array_config['support_tensor']
                     else False
                 )
-                if attr_item['is_support_tensor']:
-                    attr_item['typename'] = (
-                        'int[]'
-                        if 'data_type' in int_array_config
-                        and int_array_config['data_type'] == 'int'
-                        else 'int64_t[]'
-                    )
-                else:
-                    attr_item['data_type'] = (
-                        data_type_map[int_array_config['data_type']]
-                        if 'data_type' in int_array_config
-                        else 'std::vector<int64_t>'
-                    )
+                attr_item['data_type'] = (
+                    data_type_map[int_array_config['data_type']]
+                    if 'data_type' in int_array_config
+                    else 'std::vector<int64_t>'
+                )
+                if attr_item['is_support_tensor'] is False:
                     attr_item['manual_flag'] = True
                     if 'tensor_name' in int_array_config:
                         attr_item['tensor_name'] = int_array_config[
@@ -460,16 +447,16 @@ def process_invoke_op(forward_op_dict, backward_op_dict):
 
 
 def parse_drop_empty_grad(op_fluid_list: list, bw_op_dict: dict):
-    for op_op in op_fluid_list:
-        if 'drop_empty_grad' in op_op:
+    for op_comp_map in op_fluid_list:
+        if 'drop_empty_grad' in op_comp_map:
             bw_names = [
                 bw_name.split('(')[0].strip()
-                for bw_name in op_op['backward'].split(',')
+                for bw_name in op_comp_map['backward'].split(',')
             ]
             for bw_name in bw_names:
                 # static_ops.yaml and ops.yaml use the common op_compat.yaml
                 if bw_name in bw_op_dict:
-                    for out_grad in op_op['drop_empty_grad']:
+                    for out_grad in op_comp_map['drop_empty_grad']:
                         assert (
                             out_grad in bw_op_dict[bw_name]['output_dict']
                         ), f'''
@@ -479,6 +466,45 @@ def parse_drop_empty_grad(op_fluid_list: list, bw_op_dict: dict):
                         ] = False
 
 
+def parse_get_expected_kerneltype(
+    op_fluid_list: list, fw_op_dict: dict, bw_op_dict: dict
+):
+    for op_comp_map in op_fluid_list:
+        if 'get_expected_kernel_type' in op_comp_map:
+            fw_name = op_comp_map['op'].split('(')[0].strip()
+            if fw_name in op_comp_map['get_expected_kernel_type']:
+                # static_ops.yaml and ops.yaml use the common op_compat.yaml
+                if fw_name in fw_op_dict:
+                    fw_op_dict[fw_name][
+                        "get_expected_kernel_type"
+                    ] = op_comp_map['get_expected_kernel_type'][fw_name]
+            bw_names = [
+                bw_name.split('(')[0].strip()
+                for bw_name in op_comp_map['backward'].split(',')
+            ]
+            for bw_name in bw_names:
+                # static_ops.yaml and ops.yaml use the common op_compat.yaml
+                if (
+                    bw_name in bw_op_dict
+                    and bw_name in op_comp_map['get_expected_kernel_type']
+                ):
+                    bw_op_dict[bw_name][
+                        "get_expected_kernel_type"
+                    ] = op_comp_map['get_expected_kernel_type'][bw_name]
+
+
+def parse_keep_signature(
+    op_fluid_list: list, fw_op_dict: dict, bw_op_dict: dict
+):
+    for op_comp_map in op_fluid_list:
+        if 'manual_signature' in op_comp_map:
+            for op_name in op_comp_map['manual_signature']:
+                if op_name in fw_op_dict:
+                    fw_op_dict[op_name]["manual_signature"] = True
+                elif op_name in bw_op_dict:
+                    bw_op_dict[op_name]["manual_signature"] = True
+
+
 def split_ops_list(ops, backward_op_dict, split_num):
     new_ops_list = []
     new_bw_ops_list = []
@@ -547,6 +573,12 @@ def main(
     # deal the drop_empty_grad of bw_op by op_compat.yaml
     parse_drop_empty_grad(op_fluid_map_list, backward_op_dict)
 
+    parse_get_expected_kerneltype(
+        op_fluid_map_list, forward_op_dict, backward_op_dict
+    )
+
+    parse_keep_signature(op_fluid_map_list, forward_op_dict, backward_op_dict)
+
     add_composite_info(ops, backward_ops, backward_op_dict)
 
     add_compat_name(op_fluid_map_list, forward_op_dict, backward_op_dict)

diff --git a/paddle/fluid/operators/generator/get_expected_kernel_func.cc b/paddle/fluid/operators/generator/get_expected_kernel_func.cc
@@ -0,0 +1,106 @@
+/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <algorithm>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/operators/generator/get_expected_kernel_func.h"
+
+#include "paddle/fluid/framework/convert_utils.h"
+#include "paddle/fluid/framework/phi_utils.h"
+#include "paddle/fluid/framework/tensor_util.h"
+namespace paddle {
+namespace operators {
+
+// oneDNN's reduction kernel is optimized only for reducing throughout the
+// most outer dims, so in case of another type of reduction, it would be
+// better to fallback to native implementation
+static bool ReduceOpHasOptimizedOneDNNKernel(
+    const framework::ExecutionContext& ctx) {
+  // native reduce kernels don't support bf16
+  // so oneDNN kernel is enforced in that case
+  if (ctx.Input<phi::DenseTensor>("X")->dtype() ==
+      experimental::DataType::BFLOAT16)
+    return true;
+
+  if (!ctx.HasAttr("dim") || !ctx.HasAttr("reduce_all")) {
+    return false;
+  }
+
+  auto reduce_dims = ctx.Attr<std::vector<int>>("dim");
+  const bool reduce_all = ctx.Attr<bool>("reduce_all");
+  int ndims = ctx.Input<phi::DenseTensor>("X")->dims().size();
+
+  if (reduce_all) {
+    return true;
+  }
+
+  for (size_t i = 0; i < reduce_dims.size(); ++i) {
+    if (reduce_dims[i] < 0) reduce_dims[i] = ndims + reduce_dims[i];
+  }
+  sort(reduce_dims.begin(), reduce_dims.end());
+  for (size_t i = 0; i < reduce_dims.size(); ++i) {
+    if (reduce_dims[reduce_dims.size() - i - 1] !=
+        static_cast<int>(ndims - i - 1)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+phi::KernelKey GetReduceExpectedKernelType(
+    const framework::ExecutionContext& ctx,
+    const framework::OperatorWithKernel* op_ptr) {
+  // choose cudnn kernel if the runtime supported.
+  auto input_data_type = op_ptr->IndicateVarDataType(ctx, "X");
+
+  if (ctx.Input<phi::DenseTensor>("X")->dims().size() > 5 ||
+      !ReduceOpHasOptimizedOneDNNKernel(ctx)) {
+    op_ptr->SetDnnFallback(true);
+  }
+
+  if (input_data_type == framework::proto::VarType::FP16) {
+    PADDLE_ENFORCE_EQ(
+        platform::is_gpu_place(ctx.GetPlace()) ||
+            platform::is_npu_place(ctx.GetPlace()) ||
+            platform::is_mlu_place(ctx.GetPlace()) ||
+            platform::is_xpu_place(ctx.GetPlace()) ||
+            platform::is_custom_place(ctx.GetPlace()),
+        true,
+        platform::errors::InvalidArgument(
+            "float16 can only be used on GPU or NPU or MLU or XPU place"));
+  }
+  return phi::KernelKey(input_data_type, ctx.GetPlace());
+}
+
+phi::KernelKey GetReduceGradExpectedKernelType(
+    const framework::ExecutionContext& ctx,
+    const framework::OperatorWithKernel* op_ptr) {
+  int out_dtype = ctx.Attr<int>("out_dtype");
+  auto input_data_type =
+      (out_dtype >= 0)
+          ? static_cast<framework::proto::VarType::Type>(out_dtype)
+          : op_ptr->IndicateVarDataType(ctx, framework::GradVarName("Out"));
+  if (ctx.Input<phi::DenseTensor>("X")->dims().size() > 5) {
+    op_ptr->SetDnnFallback(true);
+  }
+
+  return phi::KernelKey(input_data_type, ctx.GetPlace());
+}
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/generator/get_expected_kernel_func.h b/paddle/fluid/operators/generator/get_expected_kernel_func.h
@@ -0,0 +1,32 @@
+/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/fluid/framework/operator.h"
+#include "paddle/phi/core/kernel_factory.h"
+
+namespace paddle {
+namespace operators {
+
+phi::KernelKey GetReduceExpectedKernelType(
+    const framework::ExecutionContext& ctx,
+    const framework::OperatorWithKernel* op_ptr);
+
+phi::KernelKey GetReduceGradExpectedKernelType(
+    const framework::ExecutionContext& ctx,
+    const framework::OperatorWithKernel* op_ptr);
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/generator/templates/ks.c.j2 b/paddle/fluid/operators/generator/templates/ks.c.j2
@@ -6,22 +6,28 @@
 namespace phi {
 
 {% for op in ops %}
-  {% if op is base_op %}
+  {% if "manual_signature" not in op %}
+    {% if op is base_op %}
 {{name_map(op)}}
+    {% endif %}
   {% endif %}
 {% endfor %}
 {% for op in backward_ops %}
-  {% if op is base_op %}
+  {% if "manual_signature" not in op %}
+    {% if op is base_op %}
 {{name_map(op)}}
+    {% endif %}
   {% endif %}
 {% endfor %}
 }  // namespace phi
 
 {% for op in ops + backward_ops %}
-  {% if op["name"] != op["op_name"] %}
+  {% if "manual_signature" not in op %}
+    {% if op["name"] != op["op_name"] %}
 {{register_base_kernel_name(op)}}
-  {% endif %}
-  {% if op is base_op %}
+    {% endif %}
+    {% if op is base_op %}
 {{register_name_map(op)}}
+    {% endif %}
   {% endif %}
 {% endfor %}
diff --git a/paddle/fluid/operators/generator/templates/op.c.j2 b/paddle/fluid/operators/generator/templates/op.c.j2
@@ -8,6 +8,7 @@
 #include "paddle/fluid/prim/api/composite_backward/composite_backward_api.h"
 #include "paddle/fluid/prim/utils/static/composite_grad_desc_maker.h"
 #include "paddle/fluid/prim/utils/static/desc_tensor.h"
+#include "paddle/fluid/operators/generator/get_expected_kernel_func.h"
 #include "paddle/phi/core/infermeta_utils.h"
 #include "paddle/phi/infermeta/backward.h"
 #include "paddle/phi/infermeta/binary.h"