PaddlePaddle · wanghuancoder · Feb 21, 2024 · Feb 20, 2024 · Feb 20, 2024
diff --git a/paddle/fluid/framework/new_executor/feed_fetch_utils.cc b/paddle/fluid/framework/new_executor/feed_fetch_utils.cc
@@ -115,6 +115,7 @@ void FetchTensors(const std::vector<std::string>& job_fetch_names,
         &(PADDLE_GET(phi::DenseTensor, fetch_list->at(micro_batch_id)[col]));
     if (src.IsInitialized()) {
       TensorCopy(src, platform::CPUPlace(), dst);
+      dst->set_lod(src.lod());
     } else {
       VLOG(6) << "Found " << var_name
               << " is not initialized and skip TensorCopy.";

diff --git a/paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.cc b/paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.cc
@@ -807,7 +807,14 @@ void BuildRuntimeContext(pir::Operation* op,
                             phi::errors::PreconditionNotMet(
                                 "can not find var[%s] in scope", in_var_name));
     auto var = inner_scope->FindVar(in_var_name);
-    runtime_ctx->inputs[legacy_attr_name].push_back(var);
+    if (var->IsType<VariableRefArray>()) {
+      for (auto single_var : var->Get<VariableRefArray>()) {
+        runtime_ctx->inputs[legacy_attr_name].push_back(
+            const_cast<framework::Variable*>(single_var));
+      }
+    } else {
+      runtime_ctx->inputs[legacy_attr_name].push_back(var);
+    }
   }
 
   auto& output_name_list = op_yaml_info.OutputNames();

diff --git a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
@@ -681,7 +681,7 @@ class MultiGRUHandler {
   const phi::Vector<size_t>& x_lod_;
 };
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class MultiGRUMKLDNNKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -720,8 +720,6 @@ class MultiGRUMKLDNNKernel : public framework::OpKernel<T> {
 }  // namespace paddle
 
 namespace ops = paddle::operators;
-REGISTER_OP_KERNEL(multi_gru,
-                   MKLDNN,
-                   phi::CPUPlace,
-                   ops::MultiGRUMKLDNNKernel<float>,
-                   ops::MultiGRUMKLDNNKernel<uint8_t>);
+
+PD_REGISTER_STRUCT_KERNEL(
+    multi_gru, OneDNN, ONEDNN, ops::MultiGRUMKLDNNKernel, float, uint8_t) {}
diff --git a/paddle/fluid/pir/dialect/op_generator/op_gen.py b/paddle/fluid/pir/dialect/op_generator/op_gen.py
@@ -2137,7 +2137,10 @@ def OpGenerator(
                             "data_format_tensors"
                         ]
                         op["dynamic_fallback"] = onednn_item["dynamic_fallback"]
-                        op["attrs"] = op["attrs"] + onednn_item["attrs"]
+                        if onednn_item["attrs"] is not None:
+                            op["attrs"] = op["attrs"] + onednn_item["attrs"]
+                        else:
+                            op["attrs"] = op["attrs"]
                 elif op['name'] in ops_onednn_extra_map:
                     onednn_item = ops_onednn_extra_map[op['name']]
                     op["is_onednn_only"] = onednn_item["is_onednn_only"]

diff --git a/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py b/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py
@@ -106,6 +106,7 @@
     'share_data',
     'onednn_to_paddle_layout',
     'lrn',
+    'multi_gru',
 ]
 
 NO_NEED_GEN_STATIC_ONLY_APIS = [

diff --git a/paddle/fluid/pir/dialect/operator/ir/onednn.yaml b/paddle/fluid/pir/dialect/operator/ir/onednn.yaml
@@ -28,6 +28,16 @@
     data_type : input
   optional : bias, residual_param
 
+- op: multi_gru
+  args: (Tensor x, Tensor[] weight_x, Tensor[] weight_h, Tensor[] bias, Tensor[] scale_weights, str activation="tanh", str gate_activation="sigmoid", int layers=1, bool origin_mode=false, str mkldnn_data_type="float32", float scale_data=1.0, float shift_data=1.0, bool force_fp32_output=false)
+  output: Tensor(hidden)
+  infer_meta:
+     func: MultiGruInferMeta
+  kernel:
+     func: multi_gru
+     data_type : x
+  optional: bias, scale_weights
+
 - op : quantize
   args : (Tensor input, bool is_negative_input=false, float scale=1.0, float shift=0.0, str output_format="NHWC", bool bfloat16=false)
   output : Tensor(output)

diff --git a/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml b/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml
@@ -170,7 +170,7 @@
 
 - op : mish_grad
 
-# - op : multi_gru
+- op : multi_gru
 
 - op : multiply
 

diff --git a/paddle/fluid/pir/dialect/operator/utils/utils.cc b/paddle/fluid/pir/dialect/operator/utils/utils.cc
@@ -79,6 +79,7 @@ const std::unordered_set<std::string> LegacyOpList = {
     paddle::onednn::dialect::LrnGradOp::name(),
     paddle::onednn::dialect::QuantizeOp::name(),
     paddle::onednn::dialect::RequantizeOp::name(),
+    paddle::onednn::dialect::MultiGruOp::name(),
 #endif
     CReduceMinOp::name(),
     PushSparseV2Op::name()};

diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml
@@ -2203,6 +2203,14 @@
     out : Out
   drop_empty_grad : [x_grad]
 
+- op : multi_gru
+  inputs :
+    {x : X, weight_x : WeightX, weight_h : WeightH, bias : Bias, scale_weights : Scale_weights}
+  outputs :
+    hidden : Hidden
+  attrs :
+    {scale_data : Scale_data,  shift_data : Shift_data}
+
 - op : multiclass_nms3
   inputs :
     {bboxes : BBoxes, scores : Scores, rois_num : RoisNum}

diff --git a/paddle/phi/infermeta/fusion.cc b/paddle/phi/infermeta/fusion.cc
@@ -3712,4 +3712,121 @@ void SinePosXPUInferMeta(const MetaTensor& x,
   out->set_dtype(x.dtype());
 }
 
+void MultiGruInferMeta(
+    const MetaTensor& x,
+    const std::vector<const MetaTensor*>& weight_x,
+    const std::vector<const MetaTensor*>& weight_h,
+    const paddle::optional<std::vector<const MetaTensor*>>& bias,
+    const paddle::optional<std::vector<const MetaTensor*>>& scale_weights,
+    const std::string& activation,
+    const std::string& gate_activation,
+    int layers,
+    bool origin_mode,
+    const std::string& mkldnn_data_type,
+    float scale_data,
+    float shift_data,
+    bool force_fp32_output,
+    MetaTensor* hidden) {
+  auto x_dims = x.dims();
+  auto x_mat_dims = (x_dims.size() == 3 && x_dims[1] == 1)
+                        ? common::flatten_to_2d(x_dims, 1)
+                        : x_dims;
+  PADDLE_ENFORCE_EQ(
+      x_mat_dims.size(),
+      2,
+      phi::errors::InvalidArgument("The size of input X dims should be 2, "
+                                   "or 3 with second dimension equal to "
+                                   "1, but now Input X dim is:[%s] ",
+                                   x_dims));
+
+  for (int i : {0, 1}) {
+    PADDLE_ENFORCE_EQ(
+        weight_x[i]->dims()[0],
+        x_mat_dims[1],
+        phi::errors::InvalidArgument(
+            "The first dimension of flattened WeightX #%d"
+            "should equal to last dimension of flattened input X, but "
+            "received fattened WeightX dimension is:%d, flattened X dimension "
+            "is:%d",
+            i,
+            weight_x[i]->dims()[0],
+            x_mat_dims[1]));
+  }
+
+  for (int i = 0; i < 2 * layers; ++i) {
+    PADDLE_ENFORCE_EQ(weight_x[i]->dims().size(),
+                      2,
+                      phi::errors::InvalidArgument(
+                          "The rank of WeightX #%d should be 2, but received "
+                          "WeightX dim size is:%d, WeightX dim is:[%s] ",
+                          i,
+                          weight_x[i]->dims().size(),
+                          weight_x[i]->dims()));
+    PADDLE_ENFORCE_EQ(weight_h[i]->dims().size(),
+                      2,
+                      phi::errors::InvalidArgument(
+                          "The rank of WeightH #%d should be 2, but received "
+                          "WeightH dim size is:%d, WeightH dim is:[%s] ",
+                          i,
+                          weight_h[i]->dims().size(),
+                          weight_h[i]->dims()));
+    int frame_size = static_cast<int>(weight_h[i]->dims()[0]);
+    PADDLE_ENFORCE_EQ(
+        weight_h[i]->dims()[1],
+        3 * frame_size,
+        phi::errors::InvalidArgument(
+            "The second dimension of WeightH #%d "
+            "should equal to 3 * frame_size, but received WeightH's "
+            "second dimension is: %d, frame size is:%d",
+            i,
+            weight_h[i]->dims()[1],
+            frame_size));
+    PADDLE_ENFORCE_EQ(
+        weight_x[i]->dims()[1],
+        3 * frame_size,
+        phi::errors::InvalidArgument(
+            "The second dimension of WeightX #%d "
+            "should equal to 3 * frame_size, but received WeightX's "
+            "second dimension is: %d, frame size is:%d",
+            i,
+            weight_x[i]->dims()[1],
+            frame_size));
+  }
+
+  if (bias) {
+    for (int i = 0; i < 2 * layers; ++i) {
+      int frame_size = static_cast<int>(weight_h[i]->dims()[0]);
+      PADDLE_ENFORCE_EQ(bias.get()[i]->dims().size(),
+                        2,
+                        phi::errors::InvalidArgument(
+                            "The rank of Bias #%d should be 2, but received "
+                            "Bias rank is:%d, Bias dim is:[%s]",
+                            i,
+                            bias.get()[i]->dims().size(),
+                            bias.get()[i]->dims()));
+      PADDLE_ENFORCE_EQ(bias.get()[i]->dims()[0],
+                        1,
+                        phi::errors::InvalidArgument(
+                            "The first dimension of Bias #%d should be 1, but "
+                            "received Bias first dim is:%d, Bias dim is:[%s]",
+                            i,
+                            bias.get()[i]->dims()[0],
+                            bias.get()[i]->dims()));
+      PADDLE_ENFORCE_EQ(
+          bias.get()[i]->dims()[1],
+          frame_size * 3,
+          phi::errors::InvalidArgument(
+              "The shape of Bias #%d must be [1, frame_size * 3], but "
+              "received bias dim is:[%s], frame size is:%d",
+              i,
+              bias.get()[i]->dims(),
+              frame_size));
+    }
+  }
+
+  int last_frame_size = static_cast<int>(weight_h.back()->dims()[0]);
+  phi::DDim out_dims({x_mat_dims[0], 2 * last_frame_size});
+  hidden->set_dims(out_dims);
+  hidden->share_lod(x);
+}
 }  // namespace phi
diff --git a/paddle/phi/infermeta/fusion.h b/paddle/phi/infermeta/fusion.h
@@ -839,4 +839,19 @@ void SinePosXPUInferMeta(const MetaTensor& x,
                          const MetaTensor& y,
                          MetaTensor* out);
 
+void MultiGruInferMeta(
+    const MetaTensor& x,
+    const std::vector<const MetaTensor*>& weight_x,
+    const std::vector<const MetaTensor*>& weight_h,
+    const paddle::optional<std::vector<const MetaTensor*>>& bias,
+    const paddle::optional<std::vector<const MetaTensor*>>& scale_weights,
+    const std::string& activation,
+    const std::string& gate_activation,
+    int layers,
+    bool origin_mode,
+    const std::string& mkldnn_data_type,
+    float scale_data,
+    float shift_data,
+    bool force_fp32_output,
+    MetaTensor* hidden);
 }  // namespace phi
diff --git a/test/mkldnn/test_multi_gru_mkldnn_op.py b/test/mkldnn/test_multi_gru_mkldnn_op.py
@@ -203,7 +203,9 @@ def setUp(self):
             self.attrs['Shift_data'] = shift_data
 
     def test_check_output(self):
-        self.check_output(check_dygraph=False, atol=self.error_margin)
+        self.check_output(
+            check_dygraph=False, atol=self.error_margin, check_pir_onednn=True
+        )
 
 
 class TestMultiGruMkldnnOpNoBias(TestMultiGruMkldnnOp):