From 6d2425e2747a8fde63ae5a0e6fb9867c176bf536 Mon Sep 17 00:00:00 2001 From: Wang Huan Date: Tue, 20 Feb 2024 09:20:12 +0000 Subject: [PATCH 1/2] pir onednn multi_gru --- .../pir_adaptor/pir_adaptor_util.cc | 9 +- .../fused/mkldnn/multi_gru_mkldnn_op.cc | 10 +- .../fluid/pir/dialect/op_generator/op_gen.py | 5 +- .../pir/dialect/op_generator/ops_api_gen.py | 1 + .../fluid/pir/dialect/operator/ir/onednn.yaml | 9 ++ .../dialect/operator/ir/ops_onednn_extra.yaml | 2 +- .../fluid/pir/dialect/operator/utils/utils.cc | 1 + paddle/phi/api/yaml/op_compat.yaml | 8 ++ paddle/phi/infermeta/fusion.cc | 117 ++++++++++++++++++ paddle/phi/infermeta/fusion.h | 15 +++ test/mkldnn/test_multi_gru_mkldnn_op.py | 4 +- 11 files changed, 171 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.cc b/paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.cc index 32f67dc2d05a3..b8fbaa6b6e7bd 100644 --- a/paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.cc +++ b/paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.cc @@ -807,7 +807,14 @@ void BuildRuntimeContext(pir::Operation* op, phi::errors::PreconditionNotMet( "can not find var[%s] in scope", in_var_name)); auto var = inner_scope->FindVar(in_var_name); - runtime_ctx->inputs[legacy_attr_name].push_back(var); + if (var->IsType()) { + for (auto single_var : var->Get()) { + runtime_ctx->inputs[legacy_attr_name].push_back( + const_cast(single_var)); + } + } else { + runtime_ctx->inputs[legacy_attr_name].push_back(var); + } } auto& output_name_list = op_yaml_info.OutputNames(); diff --git a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc index 4dd6a9a48a16d..8e11c91a117d1 100644 --- a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc @@ -681,7 +681,7 @@ class MultiGRUHandler { const phi::Vector& x_lod_; }; -template +template class MultiGRUMKLDNNKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -720,8 +720,6 @@ class MultiGRUMKLDNNKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_KERNEL(multi_gru, - MKLDNN, - phi::CPUPlace, - ops::MultiGRUMKLDNNKernel, - ops::MultiGRUMKLDNNKernel); + +PD_REGISTER_STRUCT_KERNEL( + multi_gru, OneDNN, ONEDNN, ops::MultiGRUMKLDNNKernel, float, uint8_t) {} diff --git a/paddle/fluid/pir/dialect/op_generator/op_gen.py b/paddle/fluid/pir/dialect/op_generator/op_gen.py index 92583472e1002..e8941fcf66673 100644 --- a/paddle/fluid/pir/dialect/op_generator/op_gen.py +++ b/paddle/fluid/pir/dialect/op_generator/op_gen.py @@ -2137,7 +2137,10 @@ def OpGenerator( "data_format_tensors" ] op["dynamic_fallback"] = onednn_item["dynamic_fallback"] - op["attrs"] = op["attrs"] + onednn_item["attrs"] + if onednn_item["attrs"] is not None: + op["attrs"] = op["attrs"] + onednn_item["attrs"] + else: + op["attrs"] = op["attrs"] elif op['name'] in ops_onednn_extra_map: onednn_item = ops_onednn_extra_map[op['name']] op["is_onednn_only"] = onednn_item["is_onednn_only"] diff --git a/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py b/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py index 0212d41523444..13c656207f1b8 100644 --- a/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py +++ b/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py @@ -106,6 +106,7 @@ 'share_data', 'onednn_to_paddle_layout', 'lrn', + 'multi_gru', ] NO_NEED_GEN_STATIC_ONLY_APIS = [ diff --git a/paddle/fluid/pir/dialect/operator/ir/onednn.yaml b/paddle/fluid/pir/dialect/operator/ir/onednn.yaml index 0ab6057cc48db..94a850f6fe294 100644 --- a/paddle/fluid/pir/dialect/operator/ir/onednn.yaml +++ b/paddle/fluid/pir/dialect/operator/ir/onednn.yaml @@ -28,6 +28,15 @@ data_type : input optional : bias, residual_param +- op: multi_gru + args: (Tensor x, Tensor[] weight_x, Tensor[] weight_h, Tensor[] bias, Tensor[] scale_weights, str activation="tanh", str gate_activation="sigmoid", int layers=1, bool origin_mode=false, str mkldnn_data_type="float32", float scale_data=1.0, float shift_data=1.0, bool force_fp32_output=false) + output: Tensor(hidden) + infer_meta: + func: MultiGruInferMeta + kernel: + func: multi_gru + optional: bias, scale_weights + - op : quantize args : (Tensor input, bool is_negative_input=false, float scale=1.0, float shift=0.0, str output_format="NHWC", bool bfloat16=false) output : Tensor(output) diff --git a/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml b/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml index d9d8fe6999024..7957e0fafa098 100644 --- a/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml +++ b/paddle/fluid/pir/dialect/operator/ir/ops_onednn_extra.yaml @@ -170,7 +170,7 @@ - op : mish_grad -# - op : multi_gru +- op : multi_gru - op : multiply diff --git a/paddle/fluid/pir/dialect/operator/utils/utils.cc b/paddle/fluid/pir/dialect/operator/utils/utils.cc index c0f88cc3dc4b9..bb90f606ee2dd 100644 --- a/paddle/fluid/pir/dialect/operator/utils/utils.cc +++ b/paddle/fluid/pir/dialect/operator/utils/utils.cc @@ -79,6 +79,7 @@ const std::unordered_set LegacyOpList = { paddle::onednn::dialect::LrnGradOp::name(), paddle::onednn::dialect::QuantizeOp::name(), paddle::onednn::dialect::RequantizeOp::name(), + paddle::onednn::dialect::MultiGruOp::name(), #endif CReduceMinOp::name(), PushSparseV2Op::name()}; diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index 490c43ace3c2c..8f92856436aeb 100755 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -2203,6 +2203,14 @@ out : Out drop_empty_grad : [x_grad] +- op : multi_gru + inputs : + {x : X, weight_x : WeightX, weight_h : WeightH, bias : Bias, scale_weights : Scale_weights} + outputs : + hidden : Hidden + attrs : + {scale_data : Scale_data, shift_data : Shift_data} + - op : multiclass_nms3 inputs : {bboxes : BBoxes, scores : Scores, rois_num : RoisNum} diff --git a/paddle/phi/infermeta/fusion.cc b/paddle/phi/infermeta/fusion.cc index e6e0082f626f0..6e85754335ce9 100644 --- a/paddle/phi/infermeta/fusion.cc +++ b/paddle/phi/infermeta/fusion.cc @@ -3712,4 +3712,121 @@ void SinePosXPUInferMeta(const MetaTensor& x, out->set_dtype(x.dtype()); } +void MultiGruInferMeta( + const MetaTensor& x, + const std::vector& weight_x, + const std::vector& weight_h, + const paddle::optional>& bias, + const paddle::optional>& scale_weights, + const std::string& activation, + const std::string& gate_activation, + int layers, + bool origin_mode, + const std::string& mkldnn_data_type, + float scale_data, + float shift_data, + bool force_fp32_output, + MetaTensor* hidden) { + auto x_dims = x.dims(); + auto x_mat_dims = (x_dims.size() == 3 && x_dims[1] == 1) + ? common::flatten_to_2d(x_dims, 1) + : x_dims; + PADDLE_ENFORCE_EQ( + x_mat_dims.size(), + 2, + phi::errors::InvalidArgument("The size of input X dims should be 2, " + "or 3 with second dimension equal to " + "1, but now Input X dim is:[%s] ", + x_dims)); + + for (int i : {0, 1}) { + PADDLE_ENFORCE_EQ( + weight_x[i]->dims()[0], + x_mat_dims[1], + phi::errors::InvalidArgument( + "The first dimension of flattened WeightX #%d" + "should equal to last dimension of flattened input X, but " + "received fattened WeightX dimension is:%d, flattened X dimension " + "is:%d", + i, + weight_x[i]->dims()[0], + x_mat_dims[1])); + } + + for (int i = 0; i < 2 * layers; ++i) { + PADDLE_ENFORCE_EQ(weight_x[i]->dims().size(), + 2, + phi::errors::InvalidArgument( + "The rank of WeightX #%d should be 2, but received " + "WeightX dim size is:%d, WeightX dim is:[%s] ", + i, + weight_x[i]->dims().size(), + weight_x[i]->dims())); + PADDLE_ENFORCE_EQ(weight_h[i]->dims().size(), + 2, + phi::errors::InvalidArgument( + "The rank of WeightH #%d should be 2, but received " + "WeightH dim size is:%d, WeightH dim is:[%s] ", + i, + weight_h[i]->dims().size(), + weight_h[i]->dims())); + int frame_size = static_cast(weight_h[i]->dims()[0]); + PADDLE_ENFORCE_EQ( + weight_h[i]->dims()[1], + 3 * frame_size, + phi::errors::InvalidArgument( + "The second dimension of WeightH #%d " + "should equal to 3 * frame_size, but received WeightH's " + "second dimension is: %d, frame size is:%d", + i, + weight_h[i]->dims()[1], + frame_size)); + PADDLE_ENFORCE_EQ( + weight_x[i]->dims()[1], + 3 * frame_size, + phi::errors::InvalidArgument( + "The second dimension of WeightX #%d " + "should equal to 3 * frame_size, but received WeightX's " + "second dimension is: %d, frame size is:%d", + i, + weight_x[i]->dims()[1], + frame_size)); + } + + if (bias) { + for (int i = 0; i < 2 * layers; ++i) { + int frame_size = static_cast(weight_h[i]->dims()[0]); + PADDLE_ENFORCE_EQ(bias.get()[i]->dims().size(), + 2, + phi::errors::InvalidArgument( + "The rank of Bias #%d should be 2, but received " + "Bias rank is:%d, Bias dim is:[%s]", + i, + bias.get()[i]->dims().size(), + bias.get()[i]->dims())); + PADDLE_ENFORCE_EQ(bias.get()[i]->dims()[0], + 1, + phi::errors::InvalidArgument( + "The first dimension of Bias #%d should be 1, but " + "received Bias first dim is:%d, Bias dim is:[%s]", + i, + bias.get()[i]->dims()[0], + bias.get()[i]->dims())); + PADDLE_ENFORCE_EQ( + bias.get()[i]->dims()[1], + frame_size * 3, + phi::errors::InvalidArgument( + "The shape of Bias #%d must be [1, frame_size * 3], but " + "received bias dim is:[%s], frame size is:%d", + i, + bias.get()[i]->dims(), + frame_size)); + } + } + + int last_frame_size = static_cast(weight_h.back()->dims()[0]); + phi::DDim out_dims({x_mat_dims[0], 2 * last_frame_size}); + hidden->set_dims(out_dims); + hidden->share_lod(x); +} } // namespace phi diff --git a/paddle/phi/infermeta/fusion.h b/paddle/phi/infermeta/fusion.h index f8e4cb82f6809..767f22fd245f4 100644 --- a/paddle/phi/infermeta/fusion.h +++ b/paddle/phi/infermeta/fusion.h @@ -839,4 +839,19 @@ void SinePosXPUInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out); +void MultiGruInferMeta( + const MetaTensor& x, + const std::vector& weight_x, + const std::vector& weight_h, + const paddle::optional>& bias, + const paddle::optional>& scale_weights, + const std::string& activation, + const std::string& gate_activation, + int layers, + bool origin_mode, + const std::string& mkldnn_data_type, + float scale_data, + float shift_data, + bool force_fp32_output, + MetaTensor* hidden); } // namespace phi diff --git a/test/mkldnn/test_multi_gru_mkldnn_op.py b/test/mkldnn/test_multi_gru_mkldnn_op.py index afabd03a3a0dc..f4d2b9cb9e60d 100644 --- a/test/mkldnn/test_multi_gru_mkldnn_op.py +++ b/test/mkldnn/test_multi_gru_mkldnn_op.py @@ -203,7 +203,9 @@ def setUp(self): self.attrs['Shift_data'] = shift_data def test_check_output(self): - self.check_output(check_dygraph=False, atol=self.error_margin) + self.check_output( + check_dygraph=False, atol=self.error_margin, check_pir_onednn=True + ) class TestMultiGruMkldnnOpNoBias(TestMultiGruMkldnnOp): From 88de71d7669a1d8e24a27aa6065a0c02e4ccdb95 Mon Sep 17 00:00:00 2001 From: Wang Huan Date: Tue, 20 Feb 2024 11:24:39 +0000 Subject: [PATCH 2/2] refine --- paddle/fluid/framework/new_executor/feed_fetch_utils.cc | 1 + paddle/fluid/pir/dialect/operator/ir/onednn.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/paddle/fluid/framework/new_executor/feed_fetch_utils.cc b/paddle/fluid/framework/new_executor/feed_fetch_utils.cc index 99829de387c32..f82350ec6d103 100644 --- a/paddle/fluid/framework/new_executor/feed_fetch_utils.cc +++ b/paddle/fluid/framework/new_executor/feed_fetch_utils.cc @@ -115,6 +115,7 @@ void FetchTensors(const std::vector& job_fetch_names, &(PADDLE_GET(phi::DenseTensor, fetch_list->at(micro_batch_id)[col])); if (src.IsInitialized()) { TensorCopy(src, platform::CPUPlace(), dst); + dst->set_lod(src.lod()); } else { VLOG(6) << "Found " << var_name << " is not initialized and skip TensorCopy."; diff --git a/paddle/fluid/pir/dialect/operator/ir/onednn.yaml b/paddle/fluid/pir/dialect/operator/ir/onednn.yaml index 94a850f6fe294..cfc52121febe5 100644 --- a/paddle/fluid/pir/dialect/operator/ir/onednn.yaml +++ b/paddle/fluid/pir/dialect/operator/ir/onednn.yaml @@ -35,6 +35,7 @@ func: MultiGruInferMeta kernel: func: multi_gru + data_type : x optional: bias, scale_weights - op : quantize