From 20f6c147ed54e91fbbd52d230b63f06d4e56c079 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Wed, 23 Nov 2022 09:40:09 +0000 Subject: [PATCH 1/3] generate static graph code for some operators --- paddle/fluid/operators/gelu_op_npu.cc | 93 ------------ paddle/fluid/operators/gelu_op_npu_test.cc | 167 --------------------- paddle/fluid/operators/is_empty_op.cc | 66 -------- paddle/fluid/operators/isfinite_v2_op.cc | 152 ------------------- paddle/phi/api/yaml/legacy_ops.yaml | 35 ----- paddle/phi/api/yaml/op_compat.yaml | 24 +++ paddle/phi/api/yaml/ops.yaml | 35 +++++ paddle/phi/ops/compat/isfinite_sig.cc | 19 --- 8 files changed, 59 insertions(+), 532 deletions(-) delete mode 100644 paddle/fluid/operators/gelu_op_npu.cc delete mode 100644 paddle/fluid/operators/gelu_op_npu_test.cc delete mode 100644 paddle/fluid/operators/is_empty_op.cc delete mode 100644 paddle/fluid/operators/isfinite_v2_op.cc delete mode 100644 paddle/phi/ops/compat/isfinite_sig.cc diff --git a/paddle/fluid/operators/gelu_op_npu.cc b/paddle/fluid/operators/gelu_op_npu.cc deleted file mode 100644 index f462336b412a3..0000000000000 --- a/paddle/fluid/operators/gelu_op_npu.cc +++ /dev/null @@ -1,93 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/platform/device/npu/npu_op_runner.h" - -namespace paddle { -namespace operators { - -using Tensor = phi::DenseTensor; - -template -class GeluNPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - - auto* out = ctx.Output("Out"); - - auto place = ctx.GetPlace(); - - out->mutable_data(place); - - auto stream = - ctx.template device_context() - .stream(); - - const auto& runner = NpuOpRunner("Gelu", {*x}, {*out}, {}); - runner.Run(stream); - } -}; - -template -class GeluGradNPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - - auto* dx = ctx.Output(framework::GradVarName("X")); - - auto place = ctx.GetPlace(); - - dx->mutable_data(place); - - auto stream = - ctx.template device_context() - .stream(); - - // NOTE(pangyoki): In the original implementation of GeluGrad op, the input - // is {*dout, *x, out}, where out = Gelu(x). However, we find that variable - // `out` was not actually used. In order to improve performance, the - // useless GELU operation was deleted. - // We directly use `*dout` as a placeholder to replace `out`, it will not - // be used in calculations. - const auto& runner_dx = - NpuOpRunner("GeluGrad", {*dout, *x, *dout}, {*dx}, {}); - runner_dx.Run(stream); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_NPU_KERNEL( - gelu, - ops::GeluNPUKernel, - ops::GeluNPUKernel); - -REGISTER_OP_NPU_KERNEL( - gelu_grad, - ops::GeluGradNPUKernel, - ops::GeluGradNPUKernel); diff --git a/paddle/fluid/operators/gelu_op_npu_test.cc b/paddle/fluid/operators/gelu_op_npu_test.cc deleted file mode 100644 index 9dca0bb8cba0f..0000000000000 --- a/paddle/fluid/operators/gelu_op_npu_test.cc +++ /dev/null @@ -1,167 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef _WIN32 -#include -#endif - -#include -#include // NOLINT -#include - -#include "gtest/gtest.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace f = paddle::framework; -namespace p = paddle::platform; - -USE_OP_ITSELF(gelu); -USE_OP_DEVICE_KERNEL(gelu, NPU); - -template -void Compare(f::Scope* scope, const p::DeviceContext& ctx) { - // init - auto x = scope->Var("X"); - auto tensor_x = x->GetMutable(); - - std::vector init_x; - for (int64_t i = 0; i < 10 * 10; ++i) { - init_x.push_back(static_cast(1.0)); - } - - paddle::framework::TensorFromVector(init_x, ctx, tensor_x); - tensor_x->Resize({10, 10}); - - auto out = scope->Var("Out"); - auto tensor_out = out->GetMutable(); - - f::AttributeMap attrs; - - ctx.Wait(); - - // run - auto place = ctx.GetPlace(); - - auto op = f::OpRegistry::CreateOp( - "gelu", {{"X", {"X"}}}, {{"Out", {"Out"}}}, attrs); - op->Run(*scope, place); - - ctx.Wait(); - - // eval time - struct timeval start, end; - gettimeofday(&start, NULL); - - for (int i = 0; i < 100; i++) { - op->Run(*scope, place); - } - - ctx.Wait(); - - gettimeofday(&end, NULL); - int micros = - (((end.tv_sec - start.tv_sec) * 1000000) + end.tv_usec) - (start.tv_usec); - printf("used time: %d\n", micros / 100); - - // eval value - std::vector out_vec; - paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec); - - float expected = 0.841192; - for (uint32_t i = 0; i < out_vec.size(); i++) { - EXPECT_FLOAT_EQ(out_vec[i], static_cast(expected)); - } -} - -template -void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { - auto dout = scope->Var("DOut"); - auto tensor_dout = dout->GetMutable(); - - auto x = scope->Var("X"); - auto tensor_x = x->GetMutable(); - - std::vector init_dout; - for (int64_t i = 0; i < 10 * 10; ++i) { - init_dout.push_back(static_cast(1.0)); - } - - std::vector init_x; - for (int64_t i = 0; i < 10 * 10; ++i) { - init_x.push_back(static_cast(1.0)); - } - - paddle::framework::TensorFromVector(init_dout, ctx, tensor_dout); - tensor_dout->Resize({10, 10}); - paddle::framework::TensorFromVector(init_x, ctx, tensor_x); - tensor_x->Resize({10, 10}); - - auto dx = scope->Var("DX"); - auto tensor_dx = dx->GetMutable(); - - f::AttributeMap attrs; - - ctx.Wait(); - - // run - auto place = ctx.GetPlace(); - - auto op = f::OpRegistry::CreateOp("gelu_grad", - {{"Out@GRAD", {"DOut"}}, {"X", {"X"}}}, - {{"X@GRAD", {"DX"}}}, - attrs); - op->Run(*scope, place); - - ctx.Wait(); - - // eval time - struct timeval start, end; - gettimeofday(&start, NULL); - - for (int i = 0; i < 100; i++) { - op->Run(*scope, place); - } - - ctx.Wait(); - - gettimeofday(&end, NULL); - int micros = - (((end.tv_sec - start.tv_sec) * 1000000) + end.tv_usec) - (start.tv_usec); - printf("used time: %d\n", micros / 100); - - // eval value - std::vector dx_vec; - paddle::framework::TensorToVector(*tensor_dx, ctx, &dx_vec); - - float expected = 1.082964; - for (uint32_t i = 0; i < dx_vec.size(); i++) { - EXPECT_FLOAT_EQ(dx_vec[i], static_cast(expected)); - } -} - -TEST(gelu, NPU_fp32) { - f::Scope scope; - auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); - Compare(&scope, *ctx); -} - -TEST(gelu_grad, NPU) { - f::Scope scope; - auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); - CompareGrad(&scope, *ctx); -} diff --git a/paddle/fluid/operators/is_empty_op.cc b/paddle/fluid/operators/is_empty_op.cc deleted file mode 100644 index b891e9c019ff9..0000000000000 --- a/paddle/fluid/operators/is_empty_op.cc +++ /dev/null @@ -1,66 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/is_empty_op.h" - -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/phi/core/infermeta_utils.h" -#include "paddle/phi/infermeta/unary.h" - -namespace paddle { -namespace operators { - -class IsEmptyOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - auto *x = ctx.Input("X"); - return framework::OpKernelType( - OperatorWithKernel::IndicateVarDataType(ctx, "X"), x->place()); - } -}; - -class IsEmptyOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(LoDTensor) Tensor which is to be checked."); - AddOutput("Out", - "(LoDTensor) a boolean Tensor that indicate empty or not."); - AddComment(R"DOC( -IsEmpty Operator which checks whether a tensor is empty. - -It will just return product(tensor.ddims()) > 0; - )DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -DECLARE_INFER_SHAPE_FUNCTOR(is_empty, - IsEmptyInferShapeFunctor, - PD_INFER_META(phi::IsEmptyInferMeta)); -REGISTER_OPERATOR( - is_empty, - ops::IsEmptyOp, - ops::IsEmptyOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker, - IsEmptyInferShapeFunctor); diff --git a/paddle/fluid/operators/isfinite_v2_op.cc b/paddle/fluid/operators/isfinite_v2_op.cc deleted file mode 100644 index 81b6617d344f2..0000000000000 --- a/paddle/fluid/operators/isfinite_v2_op.cc +++ /dev/null @@ -1,152 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/common_infer_shape_functions.h" -#include "paddle/phi/core/infermeta_utils.h" -#include "paddle/phi/infermeta/unary.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class OpDesc; -template -class EmptyGradOpMaker; -} // namespace framework -namespace imperative { -class OpBase; -} // namespace imperative -namespace operators { -template -class OverflowKernel; -} // namespace operators -} // namespace paddle - -namespace plat = paddle::platform; - -namespace paddle { -namespace operators { - -class OverflowV2Op : public framework::OperatorWithKernel { - public: - OverflowV2Op(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : OperatorWithKernel(type, inputs, outputs, attrs) {} - - protected: - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - int dtype = -1; - auto *x_var = ctx.InputVar("X"); - if (x_var->IsType()) { - dtype = framework::TransToProtoVarType( - x_var->Get().dtype()); - } else if (x_var->IsType()) { - dtype = framework::TransToProtoVarType( - x_var->Get().value().dtype()); - } else { - PADDLE_THROW(plat::errors::InvalidArgument( - "Cannot find the input data type by all input data")); - } - return framework::OpKernelType(framework::proto::VarType::Type(dtype), - ctx.GetPlace()); - } -}; - -class OverflowV2OpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "(Tensor) The input tensors of overflowv2 operator."); - AddOutput("Out", - "(Tensor) The output tensor of overflowv2 operator. " - "Same size compare to input tensor"); - AddComment(string::Sprintf(R"DOC( -Overflow %s operator. - -$$Out = any(X)$$ - -Check whether each element of X is Inf or Nan, return the bool result of each -element of X as a tensor. - -%s -)DOC", - GetName(), - GetComments())); - } - - protected: - virtual std::string GetName() const = 0; - virtual std::string GetComments() const = 0; -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -DECLARE_INFER_SHAPE_FUNCTOR(isinf_v2, - IsinfInferShapeFunctor, - PD_INFER_META(phi::IsfiniteInferMeta)); - -DECLARE_INFER_SHAPE_FUNCTOR(isnan_v2, - IsnanInferShapeFunctor, - PD_INFER_META(phi::IsfiniteInferMeta)); - -DECLARE_INFER_SHAPE_FUNCTOR(isfinite_v2, - IsfiniteInferShapeFunctor, - PD_INFER_META(phi::IsfiniteInferMeta)); - -#define REGISTER_V2OP_MAKER(op_type, comment) \ - namespace paddle { \ - namespace operators { \ - class _##op_type##OverflowV2OpMaker \ - : public ::paddle::operators::OverflowV2OpMaker { \ - protected: \ - std::string GetName() const { return #op_type; } \ - std::string GetComments() const { return comment; } \ - }; \ - } \ - } - -REGISTER_V2OP_MAKER(isinf_v2, "isinfv2(X)") -REGISTER_V2OP_MAKER(isnan_v2, "isnanv2(X)") -REGISTER_V2OP_MAKER(isfinite_v2, "isfinitev2(X)"); - -REGISTER_OPERATOR( - isinf_v2, - ops::OverflowV2Op, - ops::_isinf_v2OverflowV2OpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker, - IsinfInferShapeFunctor); - -REGISTER_OPERATOR( - isnan_v2, - ops::OverflowV2Op, - ops::_isnan_v2OverflowV2OpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker, - IsnanInferShapeFunctor); - -REGISTER_OPERATOR( - isfinite_v2, - ops::OverflowV2Op, - ops::_isfinite_v2OverflowV2OpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker, - IsfiniteInferShapeFunctor); diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index a1bc49a477ac3..be457ed65d80a 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -994,14 +994,6 @@ intermediate : saved_mean, saved_variance backward : instance_norm_grad -- op : is_empty - args : (Tensor x) - output : Tensor(out) - infer_meta : - func : IsEmptyInferMeta - kernel : - func : is_empty - - op : isclose args : (Tensor x, Tensor y, Scalar rtol, Scalar atol, bool equal_nan) output : Tensor(out) @@ -1011,33 +1003,6 @@ kernel : func : isclose -- op : isfinite - args : (Tensor x) - output : Tensor(out) - infer_meta : - func : IsfiniteInferMeta - kernel : - func : isfinite {dense -> dense}, - infinite_sr {selected_rows -> selected_rows} - -- op : isinf - args : (Tensor x) - output : Tensor(out) - infer_meta : - func : IsfiniteInferMeta - kernel : - func : isinf {dense -> dense}, - isinf_sr {selected_rows -> selected_rows} - -- op : isnan - args : (Tensor x) - output : Tensor(out) - infer_meta : - func : IsfiniteInferMeta - kernel : - func : isnan {dense -> dense}, - isnan_sr {selected_rows -> selected_rows} - - op : kldiv_loss args : (Tensor x, Tensor label, str reduction) output : Tensor(out) diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index 8c1d7ac308576..777f58602aacd 100644 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -628,6 +628,30 @@ outputs : out : Output +- op : is_empty + inputs : + x : X + outputs : + out : Out + +- op : isfinite (isfinite_v2) + inputs : + x : X + outputs : + out : Out + +- op : isinf (isinf_v2) + inputs : + x : X + outputs : + out : Out + +- op : isnan (isnan_v2) + inputs : + x : X + outputs : + out : Out + - op : layer_norm backward : layer_norm_grad extra : diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index 88ab2ee099ca6..35a55e71a8068 100644 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -497,6 +497,41 @@ func : inverse backward : inverse_grad +- op : is_empty + args : (Tensor x) + output : Tensor(out) + infer_meta : + func : IsEmptyInferMeta + kernel : + func : is_empty + +- op : isfinite + args : (Tensor x) + output : Tensor(out) + infer_meta : + func : IsfiniteInferMeta + kernel : + func : isfinite {dense -> dense}, + infinite_sr {selected_rows -> selected_rows} + +- op : isinf + args : (Tensor x) + output : Tensor(out) + infer_meta : + func : IsfiniteInferMeta + kernel : + func : isinf {dense -> dense}, + isinf_sr {selected_rows -> selected_rows} + +- op : isnan + args : (Tensor x) + output : Tensor(out) + infer_meta : + func : IsfiniteInferMeta + kernel : + func : isnan {dense -> dense}, + isnan_sr {selected_rows -> selected_rows} + - op : leaky_relu args : (Tensor x, float negative_slope = 0.02f) output : Tensor diff --git a/paddle/phi/ops/compat/isfinite_sig.cc b/paddle/phi/ops/compat/isfinite_sig.cc deleted file mode 100644 index 218b4c2f962c4..0000000000000 --- a/paddle/phi/ops/compat/isfinite_sig.cc +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/compat/op_utils.h" - -PD_REGISTER_BASE_KERNEL_NAME(isinf_v2, isinf); -PD_REGISTER_BASE_KERNEL_NAME(isnan_v2, isnan); -PD_REGISTER_BASE_KERNEL_NAME(isfinite_v2, isfinite); From 4266f2948438ed795bda52ed3ac6036aa9c6376a Mon Sep 17 00:00:00 2001 From: zyfncg Date: Sun, 27 Nov 2022 11:59:33 +0000 Subject: [PATCH 2/3] add some ops generate --- paddle/fluid/operators/kthvalue_op.cc | 128 ----------------- paddle/fluid/operators/label_smooth_op.cc | 157 --------------------- paddle/fluid/operators/masked_select_op.cc | 110 --------------- paddle/fluid/operators/matrix_power_op.cc | 118 ---------------- paddle/fluid/operators/maxout_op.cc | 105 -------------- paddle/phi/api/yaml/backward.yaml | 77 ++++++++-- paddle/phi/api/yaml/legacy_backward.yaml | 52 ------- paddle/phi/api/yaml/legacy_ops.yaml | 50 ------- paddle/phi/api/yaml/op_compat.yaml | 30 ++++ paddle/phi/api/yaml/ops.yaml | 49 +++++++ paddle/phi/ops/compat/kthvalue_sig.cc | 29 ---- paddle/phi/ops/compat/label_smooth_sig.cc | 35 ----- paddle/phi/ops/compat/masked_select_sig.cc | 34 ----- paddle/phi/ops/compat/matrix_power_sig.cc | 28 ---- paddle/phi/ops/compat/maxout_sig.cc | 31 ---- 15 files changed, 144 insertions(+), 889 deletions(-) delete mode 100644 paddle/fluid/operators/kthvalue_op.cc delete mode 100644 paddle/fluid/operators/label_smooth_op.cc delete mode 100644 paddle/fluid/operators/masked_select_op.cc delete mode 100644 paddle/fluid/operators/matrix_power_op.cc delete mode 100644 paddle/fluid/operators/maxout_op.cc delete mode 100644 paddle/phi/ops/compat/kthvalue_sig.cc delete mode 100644 paddle/phi/ops/compat/label_smooth_sig.cc delete mode 100644 paddle/phi/ops/compat/masked_select_sig.cc delete mode 100644 paddle/phi/ops/compat/matrix_power_sig.cc delete mode 100644 paddle/phi/ops/compat/maxout_sig.cc diff --git a/paddle/fluid/operators/kthvalue_op.cc b/paddle/fluid/operators/kthvalue_op.cc deleted file mode 100644 index 47ad520c87e76..0000000000000 --- a/paddle/fluid/operators/kthvalue_op.cc +++ /dev/null @@ -1,128 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "paddle/fluid/framework/generator.h" -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/phi/infermeta/unary.h" - -namespace paddle { -namespace operators { - -class KthvalueOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType( - OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.device_context()); - } -}; - -class KthvalueOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddComment(R"DOC( - This operator find the k-th smallest elements in the specific axis of a Tensor. - It will return the values and corresponding indices. - )DOC"); - AddInput("X", "(Tensor) The input of Kthvalue op"); - AddOutput("Out", "(Tensor) The values of k-th smallest elements of input"); - AddOutput("Indices", - "(Tensor) The indices of k-th smallest elements of input"); - AddAttr( - "k", - "(int, default 1) k for k-th smallest elements to look for along " - "the tensor).") - .SetDefault(1); - AddAttr("axis", - "the axis to sort and get the k indices, value." - "if not set, will get k-th value in last axis.") - .SetDefault(-1); - AddAttr("keepdim", "Keep the dim that to reduce.").SetDefault(false); - } -}; - -class KthvalueOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("X"), - true, - platform::errors::InvalidArgument("Input(X) should be not null")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("Indices"), - true, - platform::errors::InvalidArgument("Input(Indices) should be not null")); - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), - true, - platform::errors::InvalidArgument( - "Grad Input(Out) should be not null")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput(framework::GradVarName("X")), - true, - platform::errors::InvalidArgument("Grad Output(X) should be not null")); - - auto x_dims = ctx->GetInputDim("X"); - ctx->SetOutputDim(framework::GradVarName("X"), x_dims); - } - - protected: - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")); - return framework::OpKernelType(data_type, ctx.device_context()); - } -}; - -template -class KthvalueGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("kthvalue_grad"); - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetInput("X", this->Input("X")); - op->SetInput("Indices", this->Output("Indices")); - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - op->SetAttrMap(this->Attrs()); - } -}; - -} // namespace operators -} // namespace paddle - -DECLARE_INFER_SHAPE_FUNCTOR(kthvalue, - KthvalueInferShapeFunctor, - PD_INFER_META(phi::KthvalueInferMeta)); - -namespace ops = paddle::operators; -REGISTER_OPERATOR(kthvalue, - ops::KthvalueOp, - ops::KthvalueOpMaker, - ops::KthvalueGradOpMaker, - ops::KthvalueGradOpMaker, - KthvalueInferShapeFunctor); - -REGISTER_OPERATOR(kthvalue_grad, ops::KthvalueOpGrad); diff --git a/paddle/fluid/operators/label_smooth_op.cc b/paddle/fluid/operators/label_smooth_op.cc deleted file mode 100644 index 72813e76c757e..0000000000000 --- a/paddle/fluid/operators/label_smooth_op.cc +++ /dev/null @@ -1,157 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class OpDesc; -} // namespace framework -namespace imperative { -class OpBase; -} // namespace imperative -} // namespace paddle - -namespace paddle { -namespace operators { - -class LabelSmoothOp : public framework::OperatorWithKernel { - public: - LabelSmoothOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : OperatorWithKernel(type, inputs, outputs, attrs) {} - - void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), - true, - platform::errors::NotFound( - "The input 'X' of LabelSmoothOp is not found.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), - true, - platform::errors::NotFound( - "The output 'Out' of LabelSmoothOp is not found.")); - auto in_dims = ctx->GetInputDim("X"); - if (ctx->HasInput("PriorDist")) { - auto noise_dims = ctx->GetInputDim("PriorDist"); - auto noise_numel = phi::product(noise_dims); - PADDLE_ENFORCE_EQ( - in_dims[in_dims.size() - 1], - noise_numel, - platform::errors::InvalidArgument( - "The number of elements in input 'PriorDist' must be equal to " - "the " - "dimension of each label. But received each label's " - "dimension=[%d], number of elements in input 'PriorDist' is [%d]", - in_dims[in_dims.size() - 1], - noise_numel)); - } - ctx->ShareLoD("X", /*->*/ "Out"); - ctx->SetOutputDim("Out", in_dims); - } -}; - -class LabelSmoothOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "(LoDTensor) The input labels of LabelSmooth operator. This " - "input can be batched labels in one-hot encoding or output from " - "softmax, with shape [N x K], where N is the batch size and K is " - "the number of classes"); - AddInput("PriorDist", - "(Tensor, optional)" - "The prior distribution to be added to the smoothed label. It is " - "fixed during training and the number of elements should be equal " - "to the dimension K of each label. Default is uniform " - "distribution and each element will be set to 1/K if not provided " - "in input.") - .AsDispensable(); - AddOutput("Out", - "(loDTensor) The smoothed label of LabelSmooth operator. It has" - "the same shape and LoD with the Input(LoDTensor)."); - AddAttr("epsilon", - "(float, default 0.0f)" - "The smoothing parameter of LabelSmooth operator.") - .SetDefault(0.0f); - AddComment(R"DOC( -LabelSmooth Operator. - -Label smoothing is a mechanism to regularize the classifier layer. In machine -learning, optimizing the log-likelihood of the correct label directly may -cause two problems. First, it may result in overfitting: if the model learns -to assign full probability to the ground-truth label for each training example, -it is not guaranteed to generalize. Second, it encourages the differences -between the largest logit and all others to become large, reducing the ability -of the model to adapt. Label smoothing is proposed to encourage the model to -be less confident, which replaces the ground-truth label $y$ with the weighted -sum of itself and some fixed distribution $\mu$, i.e. - -$$ - \tilde{y} = (1 - \epsilon) * y + \epsilon * \mu, -$$ - -where $(1 - \epsilon)$ and $\epsilon$ are the weights respectively, and -$\tilde{y}$ is the smoothed label. Usually uniform distribution is used for -$\mu$. This change in the ground-truth label is called label-smoothing -regularization or LSR. - -See more details about label smoothing in https://arxiv.org/abs/1512.00567. - -)DOC"); - } -}; - -class LabelSmoothGradOp : public framework::OperatorWithKernel { - public: - LabelSmoothGradOp(const std::string &type, - const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : OperatorWithKernel(type, inputs, outputs, attrs) {} - - void InferShape(framework::InferShapeContext *ctx) const override { - ctx->SetOutputDim(framework::GradVarName("X"), - ctx->GetInputDim(framework::GradVarName("Out"))); - } -}; - -template -class LabelSmoothGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("label_smooth_grad"); - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - op->SetAttrMap(this->Attrs()); - } -}; - -} // namespace operators -} // namespace paddle -namespace ops = paddle::operators; - -REGISTER_OPERATOR(label_smooth, - ops::LabelSmoothOp, - ops::LabelSmoothOpMaker, - ops::LabelSmoothGradMaker, - ops::LabelSmoothGradMaker); -REGISTER_OPERATOR(label_smooth_grad, ops::LabelSmoothGradOp); diff --git a/paddle/fluid/operators/masked_select_op.cc b/paddle/fluid/operators/masked_select_op.cc deleted file mode 100644 index 7f53afb1ac3b8..0000000000000 --- a/paddle/fluid/operators/masked_select_op.cc +++ /dev/null @@ -1,110 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/infermeta/binary.h" - -namespace paddle { -namespace operators { - -class MaskedSelectOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); - return framework::OpKernelType(data_type, ctx.device_context()); - } -}; - -class MaskedSelectOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "The input tensor."); - AddInput("Mask", - "The mask of Input Tensor to be selected which is a bool Tensor."); - AddOutput( - "Y", - "The returned tensor, the data type " - "is same as input, will be on the same device with the input Tensor."); - AddComment(R"DOC( -Size Operator. - -Return a new 0-D tensor which indexes the indexed tensor according -the mask which is a tensor withe data type bool. -)DOC"); - } -}; - -class MaskedSelectOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")), - "Input", - "Input", - "MaskedSelect"); - OP_INOUT_CHECK(ctx->HasInput("Mask"), "Input", "Mask", "MaskedSelect"); - ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); - ctx->ShareLoD("X", /*-->*/ framework::GradVarName("X")); - } - - protected: - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Y")), - ctx.device_context()); - } -}; - -template -class MaskedSelectGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("masked_select_grad"); - op->SetInput("X", this->Input("X")); - op->SetInput("Mask", this->Input("Mask")); - op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y")); - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - } -}; - -DECLARE_NO_NEED_BUFFER_VARS_INFERER(MaskedSelectedGradNoNeedBufferVarsInferer, - "X"); -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -DECLARE_INFER_SHAPE_FUNCTOR(masked_select, - MaksedSelectInferShapeFunctor, - PD_INFER_META(phi::MaskedSelectInferMeta)); - -REGISTER_OPERATOR(masked_select, - ops::MaskedSelectOp, - ops::MaskedSelectOpMaker, - ops::MaskedSelectGradOpMaker, - ops::MaskedSelectGradOpMaker, - MaksedSelectInferShapeFunctor); -REGISTER_OPERATOR(masked_select_grad, - ops::MaskedSelectOpGrad, - ops::MaskedSelectedGradNoNeedBufferVarsInferer); diff --git a/paddle/fluid/operators/matrix_power_op.cc b/paddle/fluid/operators/matrix_power_op.cc deleted file mode 100644 index 01898d828b07d..0000000000000 --- a/paddle/fluid/operators/matrix_power_op.cc +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/tensor_util.h" -#include "paddle/phi/core/infermeta_utils.h" -#include "paddle/phi/infermeta/unary.h" - -namespace paddle { -namespace operators { - -class MatrixPowerOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; -}; - -class MatrixPowerOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput( - "X", - "(Tensor), The input tensor of matrix_power op. Its shape should be " - "[*, M, M] where * is zero or more batch dimensions, and matrices " - "on the inner-most 2 dimensions all should be square matrices."); - AddOutput("Out", - "(Tensor), The output tensor of matrix_power op. It has the same " - "shape as the input."); - AddAttr("n", "(int), The exponent used to calculate the power of X."); - AddComment(R"DOC( -Matrix Power Operator. - -Computes the n-th power of a square matrix or a batch of square matrices. - -)DOC"); - } -}; - -class MatrixPowerOpInferVarType - : public framework::PassInDtypeAndVarTypeToOutput { - protected: - std::unordered_map& GetInputOutputWithSameType() - const override { - static std::unordered_map u_map{ - {"X", /*->*/ "Out"}}; - return u_map; - } -}; - -class MatrixPowerGradOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext* context) const override { - OP_INOUT_CHECK(context->HasInput("X"), "Input", "X", "matrix_power_grad"); - OP_INOUT_CHECK( - context->HasInput("Out"), "Input", "Out", "matrix_power_grad"); - OP_INOUT_CHECK(context->HasInput(framework::GradVarName("Out")), - "Input", - "Out@GRAD", - "matrix_power_grad"); - auto x_dims = context->GetInputDim("X"); - auto x_grad_name = framework::GradVarName("X"); - if (context->HasOutput(x_grad_name)) { - context->SetOutputDim(x_grad_name, x_dims); - } - } -}; - -template -class MatrixPowerGradOpMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType(this->ForwardOpType() + "_grad"); - op->SetInput("X", this->Input("X")); - op->SetInput("Out", this->Output("Out")); - op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); - op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); - op->SetAttrMap(this->Attrs()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -DECLARE_INFER_SHAPE_FUNCTOR(matrix_power, - MatrixPowerInferShapeFunctor, - PD_INFER_META(phi::MatrixPowerInferMeta)); - -REGISTER_OPERATOR(matrix_power, - ops::MatrixPowerOp, - ops::MatrixPowerOpMaker, - ops::MatrixPowerOpInferVarType, - ops::MatrixPowerGradOpMaker, - ops::MatrixPowerGradOpMaker, - MatrixPowerInferShapeFunctor); - -REGISTER_OPERATOR(matrix_power_grad, ops::MatrixPowerGradOp); diff --git a/paddle/fluid/operators/maxout_op.cc b/paddle/fluid/operators/maxout_op.cc deleted file mode 100644 index d05535ae1fbf9..0000000000000 --- a/paddle/fluid/operators/maxout_op.cc +++ /dev/null @@ -1,105 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. */ - -#include - -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/phi/core/infermeta_utils.h" -#include "paddle/phi/infermeta/unary.h" - -namespace paddle { -namespace operators { - -class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "A 4-D Tensor with data type of float32 or float64. " - "The data format is NCHW or NHWC. Where N is " - "batch size, C is the number of channels, " - "H and W is the height and width of " - "feature. "); - AddOutput("Out", - "A 4-D Tensor with same data type and data format " - "with input Tensor. "); - AddAttr( - "groups", - "Specifies how many groups the input tensor will be split into " - "at the channel dimension. And the number of output channel is " - "the number of channels divided by groups. "); - AddAttr( - "axis", - "Specifies the index of channel dimension where maxout will " - "be performed. It should be 1 when data format is NCHW, -1 or 3 " - "when data format is NHWC. " - "Default: 1. ") - .SetDefault(1); - AddComment(R"DOC( -MaxOut Operator. - -Assumed the input shape is (N, Ci, H, W). -The output shape is (N, Co, H, W). -Then $Co = Ci / groups$ and the operator formula is as follows: - -$$ y_{si+j} = \max_{k} x_{gsi + sk + j} $$ -$$ g = groups $$ -$$ s = \\frac{input.size}{num\\_channels} $$ -$$ 0 \\le i < \\frac{num\\_channels}{groups} $$ -$$ 0 \\le j < s $$ -$$ 0 \\le k < groups $$ - -Please refer to Paper: - - Maxout Networks: http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf - - Multi-digit Number Recognition from Street View \ - Imagery using Deep Convolutional Neural Networks: \ - https://arxiv.org/pdf/1312.6082v4.pdf - -)DOC"); - } -}; - -class MaxOutOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; -}; - -class MaxOutOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext *ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "maxout_grad"); - OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")), - "Output", - "X@Grad", - "maxout_grad"); - ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -DECLARE_INFER_SHAPE_FUNCTOR(maxout, - MaxOutInferShapeFunctor, - PD_INFER_META(phi::MaxOutInferMeta)); -REGISTER_OPERATOR( - maxout, - ops::MaxOutOp, - ops::MaxOutOpMaker, - paddle::framework::DefaultGradOpMaker, - paddle::framework::DefaultGradOpMaker, - MaxOutInferShapeFunctor); -REGISTER_OPERATOR(maxout_grad, ops::MaxOutOpGrad); diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml index 8ab3589a3e970..a67e791c4d9ec 100644 --- a/paddle/phi/api/yaml/backward.yaml +++ b/paddle/phi/api/yaml/backward.yaml @@ -447,6 +447,18 @@ func : floor_grad inplace : (out_grad -> x_grad) +- backward_op : fold_grad + forward: fold (Tensor x, int[] output_sizes, int[] kernel_sizes, int[] strides, int[] paddings, int[] dilations) -> Tensor(out) + args: (Tensor x, Tensor out_grad, int[] output_sizes, int[] kernel_sizes, int[] strides, int[] paddings, int[] dilations) + output: Tensor(x_grad) + infer_meta: + func: UnchangedInferMeta + param : [x] + kernel: + func: fold_grad + data_type : out_grad + no_need_buffer : x + - backward_op : gelu_grad forward : gelu(Tensor x, bool approximate) -> Tensor(out) args : (Tensor x, Tensor out_grad, bool approximate) @@ -532,6 +544,27 @@ kernel : func : inverse_grad +- backward_op : kthvalue_grad + forward : kthvalue(Tensor x, int k, int axis, bool keepdim) -> Tensor(out), Tensor(indices) + args : (Tensor x, Tensor indices, Tensor out_grad, int k, int axis, bool keepdim) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param: [x] + kernel : + func : kthvalue_grad + data_type : out_grad + +- backward_op : label_smooth_grad + forward : label_smooth (Tensor label, Tensor prior_dist, float epsilon) -> Tensor(out) + args : (Tensor out_grad, float epsilon) + output : Tensor(label_grad) + infer_meta : + func : UnchangedInferMeta + param : [out_grad] + kernel : + func : label_smooth_grad + - backward_op : leaky_relu_double_grad forward : leaky_relu_grad (Tensor x, Tensor grad_out, float negative_slope) -> Tensor(grad_x) args : (Tensor x, Tensor grad_x_grad, float negative_slope) @@ -642,6 +675,38 @@ func : logsigmoid_grad inplace : (out_grad -> x_grad) +- backward_op : masked_select_grad + forward : masked_select (Tensor x, Tensor mask) -> Tensor(out) + args : (Tensor x, Tensor mask, Tensor out_grad) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : masked_select_grad + data_type : x + no_need_buffer : x + +- backward_op : matrix_power_grad + forward : matrix_power (Tensor x, int n) -> Tensor(out) + args : (Tensor x, Tensor out, Tensor out_grad, int n) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : matrix_power_grad + +- backward_op : maxout_grad + forward : maxout(Tensor x, int groups, int axis) -> Tensor(out) + args : (Tensor x, Tensor out, Tensor out_grad, int groups, int axis) + output : Tensor(x_grad) + infer_meta : + func : GeneralUnaryGradInferMeta + param: [x] + kernel : + func : maxout_grad + - backward_op : mv_grad forward : mv (Tensor x, Tensor vec) -> Tensor(out) args : (Tensor x, Tensor vec, Tensor out_grad) @@ -1024,15 +1089,3 @@ func : unfold_grad data_type : out_grad no_need_buffer : x - -- backward_op: fold_grad - forward: fold (Tensor x, int[] output_sizes, int[] kernel_sizes, int[] strides, int[] paddings, int[] dilations) -> Tensor(out) - args: (Tensor x, Tensor out_grad, int[] output_sizes, int[] kernel_sizes, int[] strides, int[] paddings, int[] dilations) - output: Tensor(x_grad) - infer_meta: - func: UnchangedInferMeta - param : [x] - kernel: - func: fold_grad - data_type : out_grad - no_need_buffer : x diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index f920bbb8b23a7..6bc308df91253 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -743,26 +743,6 @@ func : kron_grad data_type : out_grad -- backward_op : kthvalue_grad - forward : kthvalue(Tensor x, int k, int axis, bool keepdim) -> Tensor(out), Tensor(indices) - args : (Tensor x, Tensor indices, Tensor out_grad, int k, int axis, bool keepdim) - output : Tensor(x_grad) - infer_meta : - func : UnchangedInferMeta - param: [x] - kernel : - func : kthvalue_grad - -- backward_op : label_smooth_grad - forward : label_smooth (Tensor label, Tensor prior_dist, float epsilon) -> Tensor(out) - args : (Tensor out_grad, float epsilon) - output : Tensor(label_grad) - infer_meta : - func : UnchangedInferMeta - param : [out_grad] - kernel : - func : label_smooth_grad - - backward_op : layer_norm_grad forward : layer_norm (Tensor x, Tensor scale, Tensor bias, float epsilon, int begin_norm_axis, bool is_test) -> Tensor(out), Tensor(mean), Tensor(variance) args : (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, Tensor out_grad, float epsilon, int begin_norm_axis, bool is_test) @@ -867,18 +847,6 @@ data_type : softmax inplace : (softmax -> logits_grad) -- backward_op : masked_select_grad - forward : masked_select (Tensor x, Tensor mask) -> Tensor(out) - args : (Tensor x, Tensor mask, Tensor out_grad) - output : Tensor(x_grad) - infer_meta : - func : UnchangedInferMeta - param : [x] - kernel : - func : masked_select_grad - data_type : x - no_need_buffer : x - - backward_op : matmul_double_grad forward : matmul_grad (Tensor x, Tensor y, Tensor grad_out, bool transpose_x=false, bool transpose_y=false) -> Tensor(grad_x), Tensor(grad_y) args : (Tensor x, Tensor y, Tensor grad_out, Tensor grad_x_grad, Tensor grad_y_grad, bool transpose_x=false, bool transpose_y=false) @@ -913,16 +881,6 @@ func : matmul_triple_grad optional : grad_x_grad, grad_y_grad, grad_grad_out_grad -- backward_op : matrix_power_grad - forward : matrix_power (Tensor x, int n) -> Tensor(out) - args : (Tensor x, Tensor out, Tensor out_grad, int n) - output : Tensor(x_grad) - infer_meta : - func : UnchangedInferMeta - param : [x] - kernel : - func : matrix_power_grad - - backward_op : max_grad forward: max (Tensor x, IntArray axis={}, bool keepdim=false) -> Tensor(out) args : (Tensor x, Tensor out, Tensor out_grad, IntArray axis={}, bool keepdim=false, bool reduce_all=false) @@ -961,16 +919,6 @@ kernel : func : maximum_grad -- backward_op : maxout_grad - forward : maxout(Tensor x, int groups, int axis) -> Tensor(out) - args : (Tensor x, Tensor out, Tensor out_grad, int groups, int axis) - output : Tensor(x_grad) - infer_meta : - func : GeneralUnaryGradInferMeta - param: [x] - kernel : - func : maxout_grad - - backward_op : mean_all_grad forward : mean_all(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index be457ed65d80a..f5090eb6fad17 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -1022,27 +1022,6 @@ func : kron backward : kron_grad -- op : kthvalue - args : (Tensor x, int k, int axis, bool keepdim) - output : Tensor(out), Tensor(indices) - infer_meta : - func : KthvalueInferMeta - kernel : - func : kthvalue - backward : kthvalue_grad - -- op : label_smooth - args : (Tensor label, Tensor prior_dist, float epsilon) - output : Tensor - infer_meta : - func : UnchangedInferMeta - param : [label] - kernel : - func : label_smooth - data_type : label - optional : prior_dist - backward : label_smooth_grad - - op : lamb_ args : (Tensor param, Tensor grad, Tensor learning_rate, Tensor moment1, Tensor moment2, Tensor beta1_pow, Tensor beta2_pow, Tensor master_param, Tensor skip_update, float weight_decay, float beta1, float beta2, float epsilon, bool multi_precision) output : Tensor(param_out), Tensor(moment1_out), Tensor(moment2_out), Tensor(beta1_pow_out), Tensor(beta2_pow_out), Tensor(master_param_outs) @@ -1221,16 +1200,6 @@ data_type : logits backward : margin_cross_entropy_grad -- op : masked_select - args : (Tensor x, Tensor mask) - output : Tensor - infer_meta : - func : MaskedSelectInferMeta - kernel : - func : masked_select - data_type : x - backward : masked_select_grad - - op : matmul args : (Tensor x, Tensor y, bool transpose_x = false, bool transpose_y = false) output : Tensor @@ -1248,16 +1217,6 @@ kernel : func : matrix_nms -- op : matrix_power - args : (Tensor x, int n) - output : Tensor - infer_meta : - func : UnchangedInferMeta - param : [x] - kernel : - func : matrix_power - backward : matrix_power_grad - - op : matrix_rank args : (Tensor x, float tol, bool use_default_tol=true, bool hermitian=false) output : Tensor(out) @@ -1311,15 +1270,6 @@ func : maximum backward : maximum_grad -- op : maxout - args : (Tensor x, int groups, int axis) - output : Tensor(out) - infer_meta : - func : MaxOutInferMeta - kernel : - func : maxout - backward : maxout_grad - - op : mean args : (Tensor x, IntArray axis={}, bool keepdim=false) output : Tensor(out) diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index 777f58602aacd..0c59acbc98839 100644 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -652,6 +652,18 @@ outputs : out : Out +- op : kthvalue + inputs : + x : X + outputs : + {out : Out, indices : Indices} + +- op : label_smooth + inputs : + {label : X, prior_dist : PriorDist} + outputs : + out : Out + - op : layer_norm backward : layer_norm_grad extra : @@ -742,6 +754,12 @@ extra : attrs : [bool use_mkldnn = false, bool is_test = false] +- op : masked_select + inputs : + {x : X, mask : Mask} + outputs : + out : Y + - op : matmul (matmul_v2) backward : matmul_grad (matmul_v2_grad) extra : @@ -755,6 +773,12 @@ attrs : [bool use_mkldnn = false, float scale_x = 1.0f, 'float[] scale_y = {1.0f}', float scale_out = 1.0f, bool force_fp32_output = false] +- op : matrix_power + inputs : + x : X + outputs : + out : Out + - op : maximum (elementwise_max) backward : maximum_grad (elementwise_max_grad) extra : @@ -767,6 +791,12 @@ attrs : [bool use_mkldnn = false, str x_data_format = "", str y_data_format = "", str mkldnn_data_type = "float32", bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] +- op : maxout + inputs : + x : X + outputs : + out : Out + - op : mish backward : mish_grad extra : diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index 35a55e71a8068..effcb06dc4596 100644 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -532,6 +532,27 @@ func : isnan {dense -> dense}, isnan_sr {selected_rows -> selected_rows} +- op : kthvalue + args : (Tensor x, int k = 1, int axis = -1, bool keepdim = false) + output : Tensor(out), Tensor(indices) + infer_meta : + func : KthvalueInferMeta + kernel : + func : kthvalue + backward : kthvalue_grad + +- op : label_smooth + args : (Tensor label, Tensor prior_dist, float epsilon = 0.0f) + output : Tensor (out) + infer_meta : + func : UnchangedInferMeta + param : [label] + kernel : + func : label_smooth + data_type : label + optional : prior_dist + backward : label_smooth_grad + - op : leaky_relu args : (Tensor x, float negative_slope = 0.02f) output : Tensor @@ -606,6 +627,34 @@ func : logsigmoid backward : logsigmoid_grad +- op : masked_select + args : (Tensor x, Tensor mask) + output : Tensor (out) + infer_meta : + func : MaskedSelectInferMeta + kernel : + func : masked_select + data_type : x + backward : masked_select_grad + +- op : matrix_power + args : (Tensor x, int n) + output : Tensor + infer_meta : + func : MatrixPowerInferMeta + kernel : + func : matrix_power + backward : matrix_power_grad + +- op : maxout + args : (Tensor x, int groups, int axis = 1) + output : Tensor(out) + infer_meta : + func : MaxOutInferMeta + kernel : + func : maxout + backward : maxout_grad + - op : mv args : (Tensor x, Tensor vec) output : Tensor diff --git a/paddle/phi/ops/compat/kthvalue_sig.cc b/paddle/phi/ops/compat/kthvalue_sig.cc deleted file mode 100644 index b04726ec3b3a1..0000000000000 --- a/paddle/phi/ops/compat/kthvalue_sig.cc +++ /dev/null @@ -1,29 +0,0 @@ - -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature KthvalueGradOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature("kthvalue_grad", - {"X", "Indices", "Out@GRAD"}, - {"k", "axis", "keepdim"}, - {"X@GRAD"}); -} - -} // namespace phi -PD_REGISTER_ARG_MAPPING_FN(kthvalue_grad, phi::KthvalueGradOpArgumentMapping); diff --git a/paddle/phi/ops/compat/label_smooth_sig.cc b/paddle/phi/ops/compat/label_smooth_sig.cc deleted file mode 100644 index 7607af2b61b7c..0000000000000 --- a/paddle/phi/ops/compat/label_smooth_sig.cc +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature LabelSmoothOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature( - "label_smooth", {"X", "PriorDist"}, {"epsilon"}, {"Out"}); -} - -KernelSignature LabelSmoothGradOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature( - "label_smooth_grad", {"Out@GRAD"}, {"epsilon"}, {"X@GRAD"}); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(label_smooth, phi::LabelSmoothOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(label_smooth_grad, - phi::LabelSmoothGradOpArgumentMapping); diff --git a/paddle/phi/ops/compat/masked_select_sig.cc b/paddle/phi/ops/compat/masked_select_sig.cc deleted file mode 100644 index 47b4f2fac3155..0000000000000 --- a/paddle/phi/ops/compat/masked_select_sig.cc +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature MaskedSelectOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature("masked_select", {"X", "Mask"}, {}, {"Y"}); -} - -KernelSignature MaskedSelectGradOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature( - "masked_select_grad", {"X", "Mask", "Y@GRAD"}, {}, {"X@GRAD"}); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(masked_select, phi::MaskedSelectOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(masked_select_grad, - phi::MaskedSelectGradOpArgumentMapping); diff --git a/paddle/phi/ops/compat/matrix_power_sig.cc b/paddle/phi/ops/compat/matrix_power_sig.cc deleted file mode 100644 index 00cb1f82b8047..0000000000000 --- a/paddle/phi/ops/compat/matrix_power_sig.cc +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature MatrixPowerGradOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature( - "matrix_power_grad", {"X", "Out", "Out@GRAD"}, {"n"}, {"X@GRAD"}); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(matrix_power_grad, - phi::MatrixPowerGradOpArgumentMapping); diff --git a/paddle/phi/ops/compat/maxout_sig.cc b/paddle/phi/ops/compat/maxout_sig.cc deleted file mode 100644 index 9e028bc81fbc3..0000000000000 --- a/paddle/phi/ops/compat/maxout_sig.cc +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature MaxoutArgumentMapping(const ArgumentMappingContext& ctx) { - return KernelSignature("maxout", {"X"}, {"groups", "axis"}, {"Out"}); -} - -KernelSignature MaxoutGradArgumentMapping(const ArgumentMappingContext& ctx) { - return KernelSignature( - "maxout_grad", {"X", "Out", "Out@GRAD"}, {"groups", "axis"}, {"X@GRAD"}); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(maxout, phi::MaxoutArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(maxout_grad, phi::MaxoutGradArgumentMapping); From 4915deb3c80b941821104a67550e83120144e52b Mon Sep 17 00:00:00 2001 From: zyfncg Date: Sun, 27 Nov 2022 12:07:41 +0000 Subject: [PATCH 3/3] revert npu gelu --- paddle/fluid/operators/gelu_op_npu.cc | 93 ++++++++++++ paddle/fluid/operators/gelu_op_npu_test.cc | 167 +++++++++++++++++++++ 2 files changed, 260 insertions(+) create mode 100644 paddle/fluid/operators/gelu_op_npu.cc create mode 100644 paddle/fluid/operators/gelu_op_npu_test.cc diff --git a/paddle/fluid/operators/gelu_op_npu.cc b/paddle/fluid/operators/gelu_op_npu.cc new file mode 100644 index 0000000000000..f462336b412a3 --- /dev/null +++ b/paddle/fluid/operators/gelu_op_npu.cc @@ -0,0 +1,93 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/platform/device/npu/npu_op_runner.h" + +namespace paddle { +namespace operators { + +using Tensor = phi::DenseTensor; + +template +class GeluNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + + auto* out = ctx.Output("Out"); + + auto place = ctx.GetPlace(); + + out->mutable_data(place); + + auto stream = + ctx.template device_context() + .stream(); + + const auto& runner = NpuOpRunner("Gelu", {*x}, {*out}, {}); + runner.Run(stream); + } +}; + +template +class GeluGradNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + + auto* dx = ctx.Output(framework::GradVarName("X")); + + auto place = ctx.GetPlace(); + + dx->mutable_data(place); + + auto stream = + ctx.template device_context() + .stream(); + + // NOTE(pangyoki): In the original implementation of GeluGrad op, the input + // is {*dout, *x, out}, where out = Gelu(x). However, we find that variable + // `out` was not actually used. In order to improve performance, the + // useless GELU operation was deleted. + // We directly use `*dout` as a placeholder to replace `out`, it will not + // be used in calculations. + const auto& runner_dx = + NpuOpRunner("GeluGrad", {*dout, *x, *dout}, {*dx}, {}); + runner_dx.Run(stream); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_NPU_KERNEL( + gelu, + ops::GeluNPUKernel, + ops::GeluNPUKernel); + +REGISTER_OP_NPU_KERNEL( + gelu_grad, + ops::GeluGradNPUKernel, + ops::GeluGradNPUKernel); diff --git a/paddle/fluid/operators/gelu_op_npu_test.cc b/paddle/fluid/operators/gelu_op_npu_test.cc new file mode 100644 index 0000000000000..9dca0bb8cba0f --- /dev/null +++ b/paddle/fluid/operators/gelu_op_npu_test.cc @@ -0,0 +1,167 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef _WIN32 +#include +#endif + +#include +#include // NOLINT +#include + +#include "gtest/gtest.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/string/printf.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +namespace f = paddle::framework; +namespace p = paddle::platform; + +USE_OP_ITSELF(gelu); +USE_OP_DEVICE_KERNEL(gelu, NPU); + +template +void Compare(f::Scope* scope, const p::DeviceContext& ctx) { + // init + auto x = scope->Var("X"); + auto tensor_x = x->GetMutable(); + + std::vector init_x; + for (int64_t i = 0; i < 10 * 10; ++i) { + init_x.push_back(static_cast(1.0)); + } + + paddle::framework::TensorFromVector(init_x, ctx, tensor_x); + tensor_x->Resize({10, 10}); + + auto out = scope->Var("Out"); + auto tensor_out = out->GetMutable(); + + f::AttributeMap attrs; + + ctx.Wait(); + + // run + auto place = ctx.GetPlace(); + + auto op = f::OpRegistry::CreateOp( + "gelu", {{"X", {"X"}}}, {{"Out", {"Out"}}}, attrs); + op->Run(*scope, place); + + ctx.Wait(); + + // eval time + struct timeval start, end; + gettimeofday(&start, NULL); + + for (int i = 0; i < 100; i++) { + op->Run(*scope, place); + } + + ctx.Wait(); + + gettimeofday(&end, NULL); + int micros = + (((end.tv_sec - start.tv_sec) * 1000000) + end.tv_usec) - (start.tv_usec); + printf("used time: %d\n", micros / 100); + + // eval value + std::vector out_vec; + paddle::framework::TensorToVector(*tensor_out, ctx, &out_vec); + + float expected = 0.841192; + for (uint32_t i = 0; i < out_vec.size(); i++) { + EXPECT_FLOAT_EQ(out_vec[i], static_cast(expected)); + } +} + +template +void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx) { + auto dout = scope->Var("DOut"); + auto tensor_dout = dout->GetMutable(); + + auto x = scope->Var("X"); + auto tensor_x = x->GetMutable(); + + std::vector init_dout; + for (int64_t i = 0; i < 10 * 10; ++i) { + init_dout.push_back(static_cast(1.0)); + } + + std::vector init_x; + for (int64_t i = 0; i < 10 * 10; ++i) { + init_x.push_back(static_cast(1.0)); + } + + paddle::framework::TensorFromVector(init_dout, ctx, tensor_dout); + tensor_dout->Resize({10, 10}); + paddle::framework::TensorFromVector(init_x, ctx, tensor_x); + tensor_x->Resize({10, 10}); + + auto dx = scope->Var("DX"); + auto tensor_dx = dx->GetMutable(); + + f::AttributeMap attrs; + + ctx.Wait(); + + // run + auto place = ctx.GetPlace(); + + auto op = f::OpRegistry::CreateOp("gelu_grad", + {{"Out@GRAD", {"DOut"}}, {"X", {"X"}}}, + {{"X@GRAD", {"DX"}}}, + attrs); + op->Run(*scope, place); + + ctx.Wait(); + + // eval time + struct timeval start, end; + gettimeofday(&start, NULL); + + for (int i = 0; i < 100; i++) { + op->Run(*scope, place); + } + + ctx.Wait(); + + gettimeofday(&end, NULL); + int micros = + (((end.tv_sec - start.tv_sec) * 1000000) + end.tv_usec) - (start.tv_usec); + printf("used time: %d\n", micros / 100); + + // eval value + std::vector dx_vec; + paddle::framework::TensorToVector(*tensor_dx, ctx, &dx_vec); + + float expected = 1.082964; + for (uint32_t i = 0; i < dx_vec.size(); i++) { + EXPECT_FLOAT_EQ(dx_vec[i], static_cast(expected)); + } +} + +TEST(gelu, NPU_fp32) { + f::Scope scope; + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + Compare(&scope, *ctx); +} + +TEST(gelu_grad, NPU) { + f::Scope scope; + auto* ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); + CompareGrad(&scope, *ctx); +}